From a67d8a4d1b51f9a2cd212ea46492e2db9b6bf533 Mon Sep 17 00:00:00 2001
From: Michael Hamann <michael.hamann@xwiki.com>
Date: Mon, 24 Feb 2025 17:54:28 +0100
Subject: [PATCH] XWIKI-22926: Make it possible to configure the batch size in
 the Solr index synchronization on startup (#3928)

* Add a new configuration property for configuring the batch size of the
  initial Solr indexing synchronization.
* Update and add tests.

(cherry picked from commit db2814b642203d1c8c19dc4a9f734dad5a5280c5)
---
 .../internal/DefaultSolrConfiguration.java    | 16 +++++++++++++
 .../solr/internal/api/SolrConfiguration.java  |  8 +++++++
 .../job/AbstractDocumentIterator.java         | 23 +++++++++++++++----
 .../job/DatabaseDocumentIterator.java         |  4 ++--
 .../internal/job/SolrDocumentIterator.java    |  2 +-
 .../DefaultSolrConfigurationTest.java         |  9 ++++++++
 .../job/DatabaseDocumentIteratorTest.java     | 14 ++++++++---
 .../job/SolrDocumentIteratorTest.java         | 12 ++++++++++
 .../src/main/resources/xwiki.properties.vm    | 10 ++++++++
 9 files changed, 87 insertions(+), 11 deletions(-)

diff --git a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/DefaultSolrConfiguration.java b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/DefaultSolrConfiguration.java
index fb67f5853e0..9fcc7acb5b9 100644
--- a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/DefaultSolrConfiguration.java
+++ b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/DefaultSolrConfiguration.java
@@ -139,6 +139,16 @@ public class DefaultSolrConfiguration implements SolrConfiguration
      */
     public static final String SOLR_SYNCHRONIZE_AT_STARTUP_MODE = "solr.synchronizeAtStartupMode";
 
+    /**
+     * The default synchronization batch size.
+     */
+    public static final int SOLR_SYNCHRONIZE_BATCH_SIZE_DEFAULT = 1000;
+
+    /**
+     * The name of the configuration property containing the batch size for the synchronization.
+     */
+    public static final String SOLR_SYNCHRONIZE_BATCH_SIZE = "solr.synchronizeBatchSize";
+
     /**
      * Indicate which mode to use for synchronize at startup by default.
      */
@@ -238,4 +248,10 @@ public SynchronizeAtStartupMode synchronizeAtStartupMode()
         }
         return result;
     }
+
+    @Override
+    public int getSynchronizationBatchSize()
+    {
+        return this.configuration.getProperty(SOLR_SYNCHRONIZE_BATCH_SIZE, SOLR_SYNCHRONIZE_BATCH_SIZE_DEFAULT);
+    }
 }
diff --git a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/api/SolrConfiguration.java b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/api/SolrConfiguration.java
index 68ce77bf2d2..40307f466d1 100644
--- a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/api/SolrConfiguration.java
+++ b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/api/SolrConfiguration.java
@@ -121,4 +121,12 @@ enum SynchronizeAtStartupMode
      * @since 12.5RC1
      */
     SynchronizeAtStartupMode synchronizeAtStartupMode();
+
+    /**
+     * @return the size of the batch for the synchronization job between the database and SOLR index
+     * @since 17.2.0RC1
+     * @since 16.10.5
+     * @since 16.4.7
+     */
+    int getSynchronizationBatchSize();
 }
diff --git a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/AbstractDocumentIterator.java b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/AbstractDocumentIterator.java
index 25c1f00be18..1ce0c1a4220 100644
--- a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/AbstractDocumentIterator.java
+++ b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/AbstractDocumentIterator.java
@@ -19,7 +19,10 @@
  */
 package org.xwiki.search.solr.internal.job;
 
+import javax.inject.Inject;
+
 import org.xwiki.model.reference.EntityReference;
+import org.xwiki.search.solr.internal.api.SolrConfiguration;
 
 /**
  * Base class for {@link DocumentIterator}s.
@@ -30,16 +33,26 @@
  */
 public abstract class AbstractDocumentIterator<T> implements DocumentIterator<T>
 {
-    /**
-     * The maximum number of documents to query at once.
-     */
-    protected static final int LIMIT = 100;
-
     /**
      * Specifies the root entity whose documents are iterated. If {@code null} then all the documents are iterated.
      */
     protected EntityReference rootReference;
 
+    @Inject
+    protected SolrConfiguration solrConfiguration;
+
+    private int limit;
+
+    protected int getLimit()
+    {
+        // Cache the limit value to avoid possibly changing values during iteration.
+        if (this.limit == 0) {
+            this.limit = this.solrConfiguration.getSynchronizationBatchSize();
+        }
+
+        return this.limit;
+    }
+
     @Override
     public void remove()
     {
diff --git a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/DatabaseDocumentIterator.java b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/DatabaseDocumentIterator.java
index 8c7db8fad93..85431be1db7 100644
--- a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/DatabaseDocumentIterator.java
+++ b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/DatabaseDocumentIterator.java
@@ -199,7 +199,7 @@ private void fetchNextResults()
             // the synchronization takes place. Also, the database is used as the reference store, meaning that we
             // update the Solr index to match the database, not the other way around.
             results = getQuery().setWiki(wiki).setOffset(offset).execute();
-            offset += LIMIT;
+            offset += getLimit();
         } catch (QueryException e) {
             throw new IllegalStateException("Failed to query the database.", e);
         }
@@ -231,7 +231,7 @@ private Query getQuery() throws QueryException
                 }
             }
 
-            query = queryManager.createQuery(select + whereClause + orderBy, Query.HQL).setLimit(LIMIT);
+            query = queryManager.createQuery(select + whereClause + orderBy, Query.HQL).setLimit(getLimit());
             countQuery = queryManager.createQuery(whereClause, Query.HQL).addFilter(countFilter);
 
             if (spaceReference != null) {
diff --git a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/SolrDocumentIterator.java b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/SolrDocumentIterator.java
index 156915a01eb..5efc5fc1f5d 100644
--- a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/SolrDocumentIterator.java
+++ b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/SolrDocumentIterator.java
@@ -165,7 +165,7 @@ private SolrQuery getQuery() throws SolrIndexerException
             // plan to update the index to match the database during the synchronization process).
             // See https://cwiki.apache.org/confluence/display/solr/Pagination+of+Results
             query.set(CursorMarkParams.CURSOR_MARK_PARAM, CursorMarkParams.CURSOR_MARK_START);
-            query.setRows(LIMIT);
+            query.setRows(getLimit());
         }
         return query;
     }
diff --git a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/DefaultSolrConfigurationTest.java b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/DefaultSolrConfigurationTest.java
index f1bacb2330c..9a67d2507e3 100644
--- a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/DefaultSolrConfigurationTest.java
+++ b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/DefaultSolrConfigurationTest.java
@@ -126,4 +126,13 @@ public void synchronizeAtStartupMode()
             DefaultSolrConfiguration.SOLR_SYNCHRONIZE_AT_STARTUP_MODE_DEFAULT.name())).thenReturn("");
         assertEquals(SolrConfiguration.SynchronizeAtStartupMode.FARM, this.configuration.synchronizeAtStartupMode());
     }
+
+    @Test
+    void getSynchronizationBatchSize()
+    {
+        when(this.source.getProperty(DefaultSolrConfiguration.SOLR_SYNCHRONIZE_BATCH_SIZE,
+            DefaultSolrConfiguration.SOLR_SYNCHRONIZE_BATCH_SIZE_DEFAULT)).thenReturn(42);
+
+        assertEquals(42, this.configuration.getSynchronizationBatchSize());
+    }
 }
diff --git a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/job/DatabaseDocumentIteratorTest.java b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/job/DatabaseDocumentIteratorTest.java
index 7f9d52abb38..fad480bd9b5 100644
--- a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/job/DatabaseDocumentIteratorTest.java
+++ b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/job/DatabaseDocumentIteratorTest.java
@@ -42,6 +42,7 @@
 import org.xwiki.query.QueryException;
 import org.xwiki.query.QueryFilter;
 import org.xwiki.query.QueryManager;
+import org.xwiki.search.solr.internal.api.SolrConfiguration;
 import org.xwiki.test.junit5.mockito.ComponentTest;
 import org.xwiki.test.junit5.mockito.InjectMockComponents;
 import org.xwiki.test.junit5.mockito.MockComponent;
@@ -85,6 +86,9 @@ class DatabaseDocumentIteratorTest
     @Named("count")
     private QueryFilter countQueryFilter;
 
+    @MockComponent
+    private SolrConfiguration configuration;
+
     @InjectMockComponents
     private DatabaseDocumentIterator databaseIterator;
 
@@ -107,12 +111,14 @@ void sizeWithException() throws Exception
     @Test
     void iterateAllWikis() throws Exception
     {
+        int batchSize = 83;
+        when(this.configuration.getSynchronizationBatchSize()).thenReturn(batchSize);
         Query emptyQuery = mock(Query.class);
         when(emptyQuery.execute()).thenReturn(Collections.emptyList());
 
         Query chessQuery = mock(Query.class);
         when(chessQuery.setOffset(0)).thenReturn(chessQuery);
-        when(chessQuery.setOffset(100)).thenReturn(emptyQuery);
+        when(chessQuery.setOffset(batchSize)).thenReturn(emptyQuery);
         when(chessQuery.execute()).thenReturn(Arrays.asList(new Object[] { "Blog.Code", "WebHome", "", "3.2" },
             new Object[] { "Main", "Welcome", "en", "1.1" }, new Object[] { "XWiki.Syntax", "Links", "fr", "2.5" }));
 
@@ -125,7 +131,7 @@ void iterateAllWikis() throws Exception
 
         Query tennisQuery = mock(Query.class);
         when(tennisQuery.setOffset(0)).thenReturn(tennisQuery);
-        when(tennisQuery.setOffset(100)).thenReturn(emptyQuery);
+        when(tennisQuery.setOffset(batchSize)).thenReturn(emptyQuery);
         when(tennisQuery.execute()).thenReturn(Arrays.asList(new Object[] { "Main", "Welcome", "en", "2.1" },
             new Object[] { "XWiki.Syntax", "Links", "fr", "1.3" }));
 
@@ -178,6 +184,8 @@ void iterateAllWikis() throws Exception
     @Test
     void iterateOneWiki() throws Exception
     {
+        int batchSize = 23;
+        when(this.configuration.getSynchronizationBatchSize()).thenReturn(batchSize);
         DocumentReference rootReference = createDocumentReference("gang", Arrays.asList("A", "B"), "C", null);
 
         Query emptyQuery = mock(Query.class);
@@ -187,7 +195,7 @@ void iterateOneWiki() throws Exception
         when(query.setLimit(anyInt())).thenReturn(query);
         when(query.setWiki(rootReference.getWikiReference().getName())).thenReturn(query);
         when(query.setOffset(0)).thenReturn(query);
-        when(query.setOffset(100)).thenReturn(emptyQuery);
+        when(query.setOffset(batchSize)).thenReturn(emptyQuery);
         when(query.execute()).thenReturn(Collections.singletonList(new Object[] { "A.B", "C", "de", "3.1" }));
 
         Map<String, Object> namedParameters = new HashMap();
diff --git a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/job/SolrDocumentIteratorTest.java b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/job/SolrDocumentIteratorTest.java
index b0a1e60cde2..74d44d76e74 100644
--- a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/job/SolrDocumentIteratorTest.java
+++ b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/job/SolrDocumentIteratorTest.java
@@ -37,6 +37,7 @@
 import org.xwiki.model.reference.DocumentReferenceResolver;
 import org.xwiki.model.reference.WikiReference;
 import org.xwiki.search.solr.internal.api.FieldUtils;
+import org.xwiki.search.solr.internal.api.SolrConfiguration;
 import org.xwiki.search.solr.internal.api.SolrInstance;
 import org.xwiki.search.solr.internal.reference.SolrReferenceResolver;
 import org.xwiki.test.junit5.mockito.ComponentTest;
@@ -46,7 +47,9 @@
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.argThat;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
@@ -68,6 +71,9 @@ class SolrDocumentIteratorTest
     @MockComponent
     private DocumentReferenceResolver<SolrDocument> solrDocumentReferenceResolver;
 
+    @MockComponent
+    private SolrConfiguration configuration;
+
     @InjectMockComponents
     private SolrDocumentIterator solrIterator;
 
@@ -101,6 +107,9 @@ void sizeWithException()
     @Test
     void iterate() throws Exception
     {
+        int limit = 42;
+        when(this.configuration.getSynchronizationBatchSize()).thenReturn(limit);
+
         SolrDocumentList firstResults = new SolrDocumentList();
         firstResults.add(createSolrDocument("chess", Arrays.asList("A", "B"), "C", "", "1.3"));
         firstResults.add(createSolrDocument("chess", Arrays.asList("M"), "N", "en", "2.4"));
@@ -139,6 +148,9 @@ void iterate() throws Exception
         expectedResult.add(new ImmutablePair<>(documentReference, "1.1"));
 
         assertEquals(expectedResult, actualResult);
+
+        verify(this.solrInstance, times(3)).query(argThat(query ->
+            query instanceof SolrQuery solrQuery && solrQuery.getRows() == limit));
     }
 
     private SolrDocument createSolrDocument(String wiki, List<String> spaces, String name, String locale,
diff --git a/xwiki-platform-tools/xwiki-platform-tool-configuration-resources/src/main/resources/xwiki.properties.vm b/xwiki-platform-tools/xwiki-platform-tool-configuration-resources/src/main/resources/xwiki.properties.vm
index 7b9fb6ae1f3..917d05b6df9 100644
--- a/xwiki-platform-tools/xwiki-platform-tool-configuration-resources/src/main/resources/xwiki.properties.vm
+++ b/xwiki-platform-tools/xwiki-platform-tool-configuration-resources/src/main/resources/xwiki.properties.vm
@@ -797,6 +797,16 @@ distribution.automaticStartOnWiki=$xwikiPropertiesAutomaticStartOnWiki
 #-# The default is:
 # solr.synchronizeAtStartupMode=FARM
 
+#-# [Since 17.2.0RC1]
+#-# [Since 16.10.5]
+#-# [Since 16.4.7]
+#-# Indicates the batch size for the synchronization between SOLR index and XWiki database. This defines how many
+#-# documents will be loaded from the database and Solr in each step. Higher values lead to fewer queries and thus
+#-# better performance but increase the memory usage. The expected memory usage is around 1KB per document, but
+#-# depends highly on the length of the document names.
+#-# The default is 1000.
+# solr.synchronizeBatchSize=1000
+
 #-------------------------------------------------------------------------------------
 # Security
 #-------------------------------------------------------------------------------------
-- 
GitLab