From a67d8a4d1b51f9a2cd212ea46492e2db9b6bf533 Mon Sep 17 00:00:00 2001 From: Michael Hamann <michael.hamann@xwiki.com> Date: Mon, 24 Feb 2025 17:54:28 +0100 Subject: [PATCH] XWIKI-22926: Make it possible to configure the batch size in the Solr index synchronization on startup (#3928) * Add a new configuration property for configuring the batch size of the initial Solr indexing synchronization. * Update and add tests. (cherry picked from commit db2814b642203d1c8c19dc4a9f734dad5a5280c5) --- .../internal/DefaultSolrConfiguration.java | 16 +++++++++++++ .../solr/internal/api/SolrConfiguration.java | 8 +++++++ .../job/AbstractDocumentIterator.java | 23 +++++++++++++++---- .../job/DatabaseDocumentIterator.java | 4 ++-- .../internal/job/SolrDocumentIterator.java | 2 +- .../DefaultSolrConfigurationTest.java | 9 ++++++++ .../job/DatabaseDocumentIteratorTest.java | 14 ++++++++--- .../job/SolrDocumentIteratorTest.java | 12 ++++++++++ .../src/main/resources/xwiki.properties.vm | 10 ++++++++ 9 files changed, 87 insertions(+), 11 deletions(-) diff --git a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/DefaultSolrConfiguration.java b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/DefaultSolrConfiguration.java index fb67f5853e0..9fcc7acb5b9 100644 --- a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/DefaultSolrConfiguration.java +++ b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/DefaultSolrConfiguration.java @@ -139,6 +139,16 @@ public class DefaultSolrConfiguration implements SolrConfiguration */ public static final String SOLR_SYNCHRONIZE_AT_STARTUP_MODE = "solr.synchronizeAtStartupMode"; + /** + * The default synchronization batch size. + */ + public static final int SOLR_SYNCHRONIZE_BATCH_SIZE_DEFAULT = 1000; + + /** + * The name of the configuration property containing the batch size for the synchronization. + */ + public static final String SOLR_SYNCHRONIZE_BATCH_SIZE = "solr.synchronizeBatchSize"; + /** * Indicate which mode to use for synchronize at startup by default. */ @@ -238,4 +248,10 @@ public SynchronizeAtStartupMode synchronizeAtStartupMode() } return result; } + + @Override + public int getSynchronizationBatchSize() + { + return this.configuration.getProperty(SOLR_SYNCHRONIZE_BATCH_SIZE, SOLR_SYNCHRONIZE_BATCH_SIZE_DEFAULT); + } } diff --git a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/api/SolrConfiguration.java b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/api/SolrConfiguration.java index 68ce77bf2d2..40307f466d1 100644 --- a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/api/SolrConfiguration.java +++ b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/api/SolrConfiguration.java @@ -121,4 +121,12 @@ enum SynchronizeAtStartupMode * @since 12.5RC1 */ SynchronizeAtStartupMode synchronizeAtStartupMode(); + + /** + * @return the size of the batch for the synchronization job between the database and SOLR index + * @since 17.2.0RC1 + * @since 16.10.5 + * @since 16.4.7 + */ + int getSynchronizationBatchSize(); } diff --git a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/AbstractDocumentIterator.java b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/AbstractDocumentIterator.java index 25c1f00be18..1ce0c1a4220 100644 --- a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/AbstractDocumentIterator.java +++ b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/AbstractDocumentIterator.java @@ -19,7 +19,10 @@ */ package org.xwiki.search.solr.internal.job; +import javax.inject.Inject; + import org.xwiki.model.reference.EntityReference; +import org.xwiki.search.solr.internal.api.SolrConfiguration; /** * Base class for {@link DocumentIterator}s. @@ -30,16 +33,26 @@ */ public abstract class AbstractDocumentIterator<T> implements DocumentIterator<T> { - /** - * The maximum number of documents to query at once. - */ - protected static final int LIMIT = 100; - /** * Specifies the root entity whose documents are iterated. If {@code null} then all the documents are iterated. */ protected EntityReference rootReference; + @Inject + protected SolrConfiguration solrConfiguration; + + private int limit; + + protected int getLimit() + { + // Cache the limit value to avoid possibly changing values during iteration. + if (this.limit == 0) { + this.limit = this.solrConfiguration.getSynchronizationBatchSize(); + } + + return this.limit; + } + @Override public void remove() { diff --git a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/DatabaseDocumentIterator.java b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/DatabaseDocumentIterator.java index 8c7db8fad93..85431be1db7 100644 --- a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/DatabaseDocumentIterator.java +++ b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/DatabaseDocumentIterator.java @@ -199,7 +199,7 @@ private void fetchNextResults() // the synchronization takes place. Also, the database is used as the reference store, meaning that we // update the Solr index to match the database, not the other way around. results = getQuery().setWiki(wiki).setOffset(offset).execute(); - offset += LIMIT; + offset += getLimit(); } catch (QueryException e) { throw new IllegalStateException("Failed to query the database.", e); } @@ -231,7 +231,7 @@ private Query getQuery() throws QueryException } } - query = queryManager.createQuery(select + whereClause + orderBy, Query.HQL).setLimit(LIMIT); + query = queryManager.createQuery(select + whereClause + orderBy, Query.HQL).setLimit(getLimit()); countQuery = queryManager.createQuery(whereClause, Query.HQL).addFilter(countFilter); if (spaceReference != null) { diff --git a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/SolrDocumentIterator.java b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/SolrDocumentIterator.java index 156915a01eb..5efc5fc1f5d 100644 --- a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/SolrDocumentIterator.java +++ b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/main/java/org/xwiki/search/solr/internal/job/SolrDocumentIterator.java @@ -165,7 +165,7 @@ private SolrQuery getQuery() throws SolrIndexerException // plan to update the index to match the database during the synchronization process). // See https://cwiki.apache.org/confluence/display/solr/Pagination+of+Results query.set(CursorMarkParams.CURSOR_MARK_PARAM, CursorMarkParams.CURSOR_MARK_START); - query.setRows(LIMIT); + query.setRows(getLimit()); } return query; } diff --git a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/DefaultSolrConfigurationTest.java b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/DefaultSolrConfigurationTest.java index f1bacb2330c..9a67d2507e3 100644 --- a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/DefaultSolrConfigurationTest.java +++ b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/DefaultSolrConfigurationTest.java @@ -126,4 +126,13 @@ public void synchronizeAtStartupMode() DefaultSolrConfiguration.SOLR_SYNCHRONIZE_AT_STARTUP_MODE_DEFAULT.name())).thenReturn(""); assertEquals(SolrConfiguration.SynchronizeAtStartupMode.FARM, this.configuration.synchronizeAtStartupMode()); } + + @Test + void getSynchronizationBatchSize() + { + when(this.source.getProperty(DefaultSolrConfiguration.SOLR_SYNCHRONIZE_BATCH_SIZE, + DefaultSolrConfiguration.SOLR_SYNCHRONIZE_BATCH_SIZE_DEFAULT)).thenReturn(42); + + assertEquals(42, this.configuration.getSynchronizationBatchSize()); + } } diff --git a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/job/DatabaseDocumentIteratorTest.java b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/job/DatabaseDocumentIteratorTest.java index 7f9d52abb38..fad480bd9b5 100644 --- a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/job/DatabaseDocumentIteratorTest.java +++ b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/job/DatabaseDocumentIteratorTest.java @@ -42,6 +42,7 @@ import org.xwiki.query.QueryException; import org.xwiki.query.QueryFilter; import org.xwiki.query.QueryManager; +import org.xwiki.search.solr.internal.api.SolrConfiguration; import org.xwiki.test.junit5.mockito.ComponentTest; import org.xwiki.test.junit5.mockito.InjectMockComponents; import org.xwiki.test.junit5.mockito.MockComponent; @@ -85,6 +86,9 @@ class DatabaseDocumentIteratorTest @Named("count") private QueryFilter countQueryFilter; + @MockComponent + private SolrConfiguration configuration; + @InjectMockComponents private DatabaseDocumentIterator databaseIterator; @@ -107,12 +111,14 @@ void sizeWithException() throws Exception @Test void iterateAllWikis() throws Exception { + int batchSize = 83; + when(this.configuration.getSynchronizationBatchSize()).thenReturn(batchSize); Query emptyQuery = mock(Query.class); when(emptyQuery.execute()).thenReturn(Collections.emptyList()); Query chessQuery = mock(Query.class); when(chessQuery.setOffset(0)).thenReturn(chessQuery); - when(chessQuery.setOffset(100)).thenReturn(emptyQuery); + when(chessQuery.setOffset(batchSize)).thenReturn(emptyQuery); when(chessQuery.execute()).thenReturn(Arrays.asList(new Object[] { "Blog.Code", "WebHome", "", "3.2" }, new Object[] { "Main", "Welcome", "en", "1.1" }, new Object[] { "XWiki.Syntax", "Links", "fr", "2.5" })); @@ -125,7 +131,7 @@ void iterateAllWikis() throws Exception Query tennisQuery = mock(Query.class); when(tennisQuery.setOffset(0)).thenReturn(tennisQuery); - when(tennisQuery.setOffset(100)).thenReturn(emptyQuery); + when(tennisQuery.setOffset(batchSize)).thenReturn(emptyQuery); when(tennisQuery.execute()).thenReturn(Arrays.asList(new Object[] { "Main", "Welcome", "en", "2.1" }, new Object[] { "XWiki.Syntax", "Links", "fr", "1.3" })); @@ -178,6 +184,8 @@ void iterateAllWikis() throws Exception @Test void iterateOneWiki() throws Exception { + int batchSize = 23; + when(this.configuration.getSynchronizationBatchSize()).thenReturn(batchSize); DocumentReference rootReference = createDocumentReference("gang", Arrays.asList("A", "B"), "C", null); Query emptyQuery = mock(Query.class); @@ -187,7 +195,7 @@ void iterateOneWiki() throws Exception when(query.setLimit(anyInt())).thenReturn(query); when(query.setWiki(rootReference.getWikiReference().getName())).thenReturn(query); when(query.setOffset(0)).thenReturn(query); - when(query.setOffset(100)).thenReturn(emptyQuery); + when(query.setOffset(batchSize)).thenReturn(emptyQuery); when(query.execute()).thenReturn(Collections.singletonList(new Object[] { "A.B", "C", "de", "3.1" })); Map<String, Object> namedParameters = new HashMap(); diff --git a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/job/SolrDocumentIteratorTest.java b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/job/SolrDocumentIteratorTest.java index b0a1e60cde2..74d44d76e74 100644 --- a/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/job/SolrDocumentIteratorTest.java +++ b/xwiki-platform-core/xwiki-platform-search/xwiki-platform-search-solr/xwiki-platform-search-solr-api/src/test/java/org/xwiki/search/solr/internal/job/SolrDocumentIteratorTest.java @@ -37,6 +37,7 @@ import org.xwiki.model.reference.DocumentReferenceResolver; import org.xwiki.model.reference.WikiReference; import org.xwiki.search.solr.internal.api.FieldUtils; +import org.xwiki.search.solr.internal.api.SolrConfiguration; import org.xwiki.search.solr.internal.api.SolrInstance; import org.xwiki.search.solr.internal.reference.SolrReferenceResolver; import org.xwiki.test.junit5.mockito.ComponentTest; @@ -46,7 +47,9 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.argThat; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -68,6 +71,9 @@ class SolrDocumentIteratorTest @MockComponent private DocumentReferenceResolver<SolrDocument> solrDocumentReferenceResolver; + @MockComponent + private SolrConfiguration configuration; + @InjectMockComponents private SolrDocumentIterator solrIterator; @@ -101,6 +107,9 @@ void sizeWithException() @Test void iterate() throws Exception { + int limit = 42; + when(this.configuration.getSynchronizationBatchSize()).thenReturn(limit); + SolrDocumentList firstResults = new SolrDocumentList(); firstResults.add(createSolrDocument("chess", Arrays.asList("A", "B"), "C", "", "1.3")); firstResults.add(createSolrDocument("chess", Arrays.asList("M"), "N", "en", "2.4")); @@ -139,6 +148,9 @@ void iterate() throws Exception expectedResult.add(new ImmutablePair<>(documentReference, "1.1")); assertEquals(expectedResult, actualResult); + + verify(this.solrInstance, times(3)).query(argThat(query -> + query instanceof SolrQuery solrQuery && solrQuery.getRows() == limit)); } private SolrDocument createSolrDocument(String wiki, List<String> spaces, String name, String locale, diff --git a/xwiki-platform-tools/xwiki-platform-tool-configuration-resources/src/main/resources/xwiki.properties.vm b/xwiki-platform-tools/xwiki-platform-tool-configuration-resources/src/main/resources/xwiki.properties.vm index 7b9fb6ae1f3..917d05b6df9 100644 --- a/xwiki-platform-tools/xwiki-platform-tool-configuration-resources/src/main/resources/xwiki.properties.vm +++ b/xwiki-platform-tools/xwiki-platform-tool-configuration-resources/src/main/resources/xwiki.properties.vm @@ -797,6 +797,16 @@ distribution.automaticStartOnWiki=$xwikiPropertiesAutomaticStartOnWiki #-# The default is: # solr.synchronizeAtStartupMode=FARM +#-# [Since 17.2.0RC1] +#-# [Since 16.10.5] +#-# [Since 16.4.7] +#-# Indicates the batch size for the synchronization between SOLR index and XWiki database. This defines how many +#-# documents will be loaded from the database and Solr in each step. Higher values lead to fewer queries and thus +#-# better performance but increase the memory usage. The expected memory usage is around 1KB per document, but +#-# depends highly on the length of the document names. +#-# The default is 1000. +# solr.synchronizeBatchSize=1000 + #------------------------------------------------------------------------------------- # Security #------------------------------------------------------------------------------------- -- GitLab