Skip to content
Snippets Groups Projects
Commit a67d8a4d authored by Michael Hamann's avatar Michael Hamann Committed by Michael Hamann
Browse files

XWIKI-22926: Make it possible to configure the batch size in the Solr index...

XWIKI-22926: Make it possible to configure the batch size in the Solr index synchronization on startup (#3928)

* Add a new configuration property for configuring the batch size of the
  initial Solr indexing synchronization.
* Update and add tests.

(cherry picked from commit db2814b6)
parent e2750dbd
No related merge requests found
Showing
with 87 additions and 11 deletions
......@@ -139,6 +139,16 @@ public class DefaultSolrConfiguration implements SolrConfiguration
*/
public static final String SOLR_SYNCHRONIZE_AT_STARTUP_MODE = "solr.synchronizeAtStartupMode";
/**
* The default synchronization batch size.
*/
public static final int SOLR_SYNCHRONIZE_BATCH_SIZE_DEFAULT = 1000;
/**
* The name of the configuration property containing the batch size for the synchronization.
*/
public static final String SOLR_SYNCHRONIZE_BATCH_SIZE = "solr.synchronizeBatchSize";
/**
* Indicate which mode to use for synchronize at startup by default.
*/
......@@ -238,4 +248,10 @@ public SynchronizeAtStartupMode synchronizeAtStartupMode()
}
return result;
}
@Override
public int getSynchronizationBatchSize()
{
return this.configuration.getProperty(SOLR_SYNCHRONIZE_BATCH_SIZE, SOLR_SYNCHRONIZE_BATCH_SIZE_DEFAULT);
}
}
......@@ -121,4 +121,12 @@ enum SynchronizeAtStartupMode
* @since 12.5RC1
*/
SynchronizeAtStartupMode synchronizeAtStartupMode();
/**
* @return the size of the batch for the synchronization job between the database and SOLR index
* @since 17.2.0RC1
* @since 16.10.5
* @since 16.4.7
*/
int getSynchronizationBatchSize();
}
......@@ -19,7 +19,10 @@
*/
package org.xwiki.search.solr.internal.job;
import javax.inject.Inject;
import org.xwiki.model.reference.EntityReference;
import org.xwiki.search.solr.internal.api.SolrConfiguration;
/**
* Base class for {@link DocumentIterator}s.
......@@ -30,16 +33,26 @@
*/
public abstract class AbstractDocumentIterator<T> implements DocumentIterator<T>
{
/**
* The maximum number of documents to query at once.
*/
protected static final int LIMIT = 100;
/**
* Specifies the root entity whose documents are iterated. If {@code null} then all the documents are iterated.
*/
protected EntityReference rootReference;
@Inject
protected SolrConfiguration solrConfiguration;
private int limit;
protected int getLimit()
{
// Cache the limit value to avoid possibly changing values during iteration.
if (this.limit == 0) {
this.limit = this.solrConfiguration.getSynchronizationBatchSize();
}
return this.limit;
}
@Override
public void remove()
{
......
......@@ -199,7 +199,7 @@ private void fetchNextResults()
// the synchronization takes place. Also, the database is used as the reference store, meaning that we
// update the Solr index to match the database, not the other way around.
results = getQuery().setWiki(wiki).setOffset(offset).execute();
offset += LIMIT;
offset += getLimit();
} catch (QueryException e) {
throw new IllegalStateException("Failed to query the database.", e);
}
......@@ -231,7 +231,7 @@ private Query getQuery() throws QueryException
}
}
query = queryManager.createQuery(select + whereClause + orderBy, Query.HQL).setLimit(LIMIT);
query = queryManager.createQuery(select + whereClause + orderBy, Query.HQL).setLimit(getLimit());
countQuery = queryManager.createQuery(whereClause, Query.HQL).addFilter(countFilter);
if (spaceReference != null) {
......
......@@ -165,7 +165,7 @@ private SolrQuery getQuery() throws SolrIndexerException
// plan to update the index to match the database during the synchronization process).
// See https://cwiki.apache.org/confluence/display/solr/Pagination+of+Results
query.set(CursorMarkParams.CURSOR_MARK_PARAM, CursorMarkParams.CURSOR_MARK_START);
query.setRows(LIMIT);
query.setRows(getLimit());
}
return query;
}
......
......@@ -126,4 +126,13 @@ public void synchronizeAtStartupMode()
DefaultSolrConfiguration.SOLR_SYNCHRONIZE_AT_STARTUP_MODE_DEFAULT.name())).thenReturn("");
assertEquals(SolrConfiguration.SynchronizeAtStartupMode.FARM, this.configuration.synchronizeAtStartupMode());
}
@Test
void getSynchronizationBatchSize()
{
when(this.source.getProperty(DefaultSolrConfiguration.SOLR_SYNCHRONIZE_BATCH_SIZE,
DefaultSolrConfiguration.SOLR_SYNCHRONIZE_BATCH_SIZE_DEFAULT)).thenReturn(42);
assertEquals(42, this.configuration.getSynchronizationBatchSize());
}
}
......@@ -42,6 +42,7 @@
import org.xwiki.query.QueryException;
import org.xwiki.query.QueryFilter;
import org.xwiki.query.QueryManager;
import org.xwiki.search.solr.internal.api.SolrConfiguration;
import org.xwiki.test.junit5.mockito.ComponentTest;
import org.xwiki.test.junit5.mockito.InjectMockComponents;
import org.xwiki.test.junit5.mockito.MockComponent;
......@@ -85,6 +86,9 @@ class DatabaseDocumentIteratorTest
@Named("count")
private QueryFilter countQueryFilter;
@MockComponent
private SolrConfiguration configuration;
@InjectMockComponents
private DatabaseDocumentIterator databaseIterator;
......@@ -107,12 +111,14 @@ void sizeWithException() throws Exception
@Test
void iterateAllWikis() throws Exception
{
int batchSize = 83;
when(this.configuration.getSynchronizationBatchSize()).thenReturn(batchSize);
Query emptyQuery = mock(Query.class);
when(emptyQuery.execute()).thenReturn(Collections.emptyList());
Query chessQuery = mock(Query.class);
when(chessQuery.setOffset(0)).thenReturn(chessQuery);
when(chessQuery.setOffset(100)).thenReturn(emptyQuery);
when(chessQuery.setOffset(batchSize)).thenReturn(emptyQuery);
when(chessQuery.execute()).thenReturn(Arrays.asList(new Object[] { "Blog.Code", "WebHome", "", "3.2" },
new Object[] { "Main", "Welcome", "en", "1.1" }, new Object[] { "XWiki.Syntax", "Links", "fr", "2.5" }));
......@@ -125,7 +131,7 @@ void iterateAllWikis() throws Exception
Query tennisQuery = mock(Query.class);
when(tennisQuery.setOffset(0)).thenReturn(tennisQuery);
when(tennisQuery.setOffset(100)).thenReturn(emptyQuery);
when(tennisQuery.setOffset(batchSize)).thenReturn(emptyQuery);
when(tennisQuery.execute()).thenReturn(Arrays.asList(new Object[] { "Main", "Welcome", "en", "2.1" },
new Object[] { "XWiki.Syntax", "Links", "fr", "1.3" }));
......@@ -178,6 +184,8 @@ void iterateAllWikis() throws Exception
@Test
void iterateOneWiki() throws Exception
{
int batchSize = 23;
when(this.configuration.getSynchronizationBatchSize()).thenReturn(batchSize);
DocumentReference rootReference = createDocumentReference("gang", Arrays.asList("A", "B"), "C", null);
Query emptyQuery = mock(Query.class);
......@@ -187,7 +195,7 @@ void iterateOneWiki() throws Exception
when(query.setLimit(anyInt())).thenReturn(query);
when(query.setWiki(rootReference.getWikiReference().getName())).thenReturn(query);
when(query.setOffset(0)).thenReturn(query);
when(query.setOffset(100)).thenReturn(emptyQuery);
when(query.setOffset(batchSize)).thenReturn(emptyQuery);
when(query.execute()).thenReturn(Collections.singletonList(new Object[] { "A.B", "C", "de", "3.1" }));
Map<String, Object> namedParameters = new HashMap();
......
......@@ -37,6 +37,7 @@
import org.xwiki.model.reference.DocumentReferenceResolver;
import org.xwiki.model.reference.WikiReference;
import org.xwiki.search.solr.internal.api.FieldUtils;
import org.xwiki.search.solr.internal.api.SolrConfiguration;
import org.xwiki.search.solr.internal.api.SolrInstance;
import org.xwiki.search.solr.internal.reference.SolrReferenceResolver;
import org.xwiki.test.junit5.mockito.ComponentTest;
......@@ -46,7 +47,9 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.argThat;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
......@@ -68,6 +71,9 @@ class SolrDocumentIteratorTest
@MockComponent
private DocumentReferenceResolver<SolrDocument> solrDocumentReferenceResolver;
@MockComponent
private SolrConfiguration configuration;
@InjectMockComponents
private SolrDocumentIterator solrIterator;
......@@ -101,6 +107,9 @@ void sizeWithException()
@Test
void iterate() throws Exception
{
int limit = 42;
when(this.configuration.getSynchronizationBatchSize()).thenReturn(limit);
SolrDocumentList firstResults = new SolrDocumentList();
firstResults.add(createSolrDocument("chess", Arrays.asList("A", "B"), "C", "", "1.3"));
firstResults.add(createSolrDocument("chess", Arrays.asList("M"), "N", "en", "2.4"));
......@@ -139,6 +148,9 @@ void iterate() throws Exception
expectedResult.add(new ImmutablePair<>(documentReference, "1.1"));
assertEquals(expectedResult, actualResult);
verify(this.solrInstance, times(3)).query(argThat(query ->
query instanceof SolrQuery solrQuery && solrQuery.getRows() == limit));
}
private SolrDocument createSolrDocument(String wiki, List<String> spaces, String name, String locale,
......
......@@ -797,6 +797,16 @@ distribution.automaticStartOnWiki=$xwikiPropertiesAutomaticStartOnWiki
#-# The default is:
# solr.synchronizeAtStartupMode=FARM
#-# [Since 17.2.0RC1]
#-# [Since 16.10.5]
#-# [Since 16.4.7]
#-# Indicates the batch size for the synchronization between SOLR index and XWiki database. This defines how many
#-# documents will be loaded from the database and Solr in each step. Higher values lead to fewer queries and thus
#-# better performance but increase the memory usage. The expected memory usage is around 1KB per document, but
#-# depends highly on the length of the document names.
#-# The default is 1000.
# solr.synchronizeBatchSize=1000
#-------------------------------------------------------------------------------------
# Security
#-------------------------------------------------------------------------------------
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment