summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThorsten Behrens <thorsten.behrens@allotropia.de>2023-11-02 18:17:50 +0100
committerThorsten Behrens <thorsten.behrens@allotropia.de>2023-12-13 07:32:13 +0100
commitff071078ee5f13f0e9d430d6783444a631d232a0 (patch)
tree41c795640ead619a1860de413110d530456389df
parentfd527ecbac39c6e15cc27da30bd05cb2ff85fd82 (diff)
reprobuild: don't write timestamps to clucene index files
Our embedded clucene by default write a random current-time millisecond value into version fields, in an attempt to randomise. Clearly this is not needed for our static help, and it also prevents builds from being reproducible. Change-Id: I011388b5bc72b5d86bc1900f5439036ede60c020 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158845 Tested-by: Jenkins Reviewed-by: Thorsten Behrens <thorsten.behrens@allotropia.de>
-rw-r--r--RepositoryExternal.mk1
-rw-r--r--external/clucene/UnpackedTarball_clucene.mk1
-rw-r--r--external/clucene/patches/clucene-reprobuild.patch.161
-rw-r--r--helpcompiler/source/HelpIndexer.cxx5
4 files changed, 68 insertions, 0 deletions
diff --git a/RepositoryExternal.mk b/RepositoryExternal.mk
index 9659ec7c163f..da11a7d153f2 100644
--- a/RepositoryExternal.mk
+++ b/RepositoryExternal.mk
@@ -2847,6 +2847,7 @@ ifneq ($(SYSTEM_CLUCENE),)
define gb_LinkTarget__use_clucene
$(call gb_LinkTarget_add_defs,$(1),\
$(filter-out -I% -isystem%,$(subst -isystem /,-isystem/,$(CLUCENE_CFLAGS))) \
+ -DSYSTEM_CLUCENE \
)
$(call gb_LinkTarget_set_include,$(1),\
diff --git a/external/clucene/UnpackedTarball_clucene.mk b/external/clucene/UnpackedTarball_clucene.mk
index 0d7b2a5c288e..3acdb35b5757 100644
--- a/external/clucene/UnpackedTarball_clucene.mk
+++ b/external/clucene/UnpackedTarball_clucene.mk
@@ -55,6 +55,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,clucene,\
external/clucene/patches/binary_function.patch \
external/clucene/patches/clucene-pure-virtual.patch \
external/clucene/patches/enumarith.patch \
+ external/clucene/patches/clucene-reprobuild.patch.1 \
))
ifneq ($(OS),WNT)
diff --git a/external/clucene/patches/clucene-reprobuild.patch.1 b/external/clucene/patches/clucene-reprobuild.patch.1
new file mode 100644
index 000000000000..2c5a0b95135f
--- /dev/null
+++ b/external/clucene/patches/clucene-reprobuild.patch.1
@@ -0,0 +1,61 @@
+diff -ur clucene.org/src/core/CLucene/index/IndexWriter.cpp clucene/src/core/CLucene/index/IndexWriter.cpp
+--- clucene.org/src/core/CLucene/index/IndexWriter.cpp 2023-11-02 17:31:00.110168174 +0100
++++ clucene/src/core/CLucene/index/IndexWriter.cpp 2023-11-02 17:33:22.507665912 +0100
+@@ -366,6 +366,10 @@
+ }
+ }
+
++void IndexWriter::setSegmentInfoStartVersion(int64_t startVersion) {
++ this->segmentInfos->setStartVersion(startVersion);
++}
++
+ int32_t IndexWriter::getMaxBufferedDocs() {
+ ensureOpen();
+ return docWriter->getMaxBufferedDocs();
+diff -ur clucene.org/src/core/CLucene/index/IndexWriter.h clucene/src/core/CLucene/index/IndexWriter.h
+--- clucene.org/src/core/CLucene/index/IndexWriter.h 2023-11-02 17:31:00.113501525 +0100
++++ clucene/src/core/CLucene/index/IndexWriter.h 2023-11-02 17:33:43.547787510 +0100
+@@ -336,6 +336,12 @@
+ int64_t getWriteLockTimeout();
+
+ /**
++ * Sets the 0th segmentinfo version. Default is current system time
++ * in milliseconds
++ */
++ void setSegmentInfoStartVersion(int64_t startVersion);
++
++ /**
+ * Sets the maximum time to wait for a commit lock (in milliseconds).
+ */
+ void setCommitLockTimeout(int64_t commitLockTimeout);
+diff -ur clucene.org/src/core/CLucene/index/SegmentInfos.cpp clucene/src/core/CLucene/index/SegmentInfos.cpp
+--- clucene.org/src/core/CLucene/index/SegmentInfos.cpp 2023-11-02 17:31:00.110168174 +0100
++++ clucene/src/core/CLucene/index/SegmentInfos.cpp 2023-11-02 18:04:43.855243418 +0100
+@@ -662,6 +662,10 @@
+ return IndexFileNames::fileNameFromGeneration( IndexFileNames::SEGMENTS, "", nextGeneration );
+ }
+
++ void SegmentInfos::setStartVersion(int64_t version) {
++ this->version = version;
++ }
++
+ void SegmentInfos::clearto(size_t from, size_t end){
+ size_t range = end - from;
+ if ( (infos.size() - from) >= range) { // Make sure we actually need to remove
+diff -ur clucene.org/src/core/CLucene/index/_SegmentInfos.h clucene/src/core/CLucene/index/_SegmentInfos.h
+--- clucene.org/src/core/CLucene/index/_SegmentInfos.h 2023-11-02 17:31:00.106834824 +0100
++++ clucene/src/core/CLucene/index/_SegmentInfos.h 2023-11-02 18:04:51.178598463 +0100
+@@ -347,6 +347,13 @@
+ */
+ std::string getNextSegmentFileName();
+
++ /**
++ * Set version value to start from
++
++ Defaults to current time in milliseconds
++ */
++ void setStartVersion(int64_t version);
++
+ /* public vector-like operations */
+ //delete and clears objects 'from' from to 'to'
+ void clearto(size_t to, size_t end);
diff --git a/helpcompiler/source/HelpIndexer.cxx b/helpcompiler/source/HelpIndexer.cxx
index 44506bddfc31..65e46743b482 100644
--- a/helpcompiler/source/HelpIndexer.cxx
+++ b/helpcompiler/source/HelpIndexer.cxx
@@ -106,6 +106,11 @@ bool HelpIndexer::indexDocuments()
analyzer.get(), true);
#endif
+#ifndef SYSTEM_CLUCENE
+ // avoid random values in index file, making help indices reproducible
+ writer->setSegmentInfoStartVersion(0);
+#endif
+
//Double limit of tokens allowed, otherwise we'll get a too-many-tokens
//exception for ja help. Could alternative ignore the exception and get
//truncated results as per java-Lucene apparently