Skip to content
Snippets Groups Projects
Commit a67d8a4d authored by Michael Hamann's avatar Michael Hamann Committed by Michael Hamann
Browse files

XWIKI-22926: Make it possible to configure the batch size in the Solr index...

XWIKI-22926: Make it possible to configure the batch size in the Solr index synchronization on startup (#3928)

* Add a new configuration property for configuring the batch size of the
  initial Solr indexing synchronization.
* Update and add tests.

(cherry picked from commit db2814b6)
parent e2750dbd
No related branches found
No related tags found
No related merge requests found
Showing
with 87 additions and 11 deletions
......@@ -139,6 +139,16 @@ public class DefaultSolrConfiguration implements SolrConfiguration
*/
public static final String SOLR_SYNCHRONIZE_AT_STARTUP_MODE = "solr.synchronizeAtStartupMode";
/**
* The default synchronization batch size.
*/
public static final int SOLR_SYNCHRONIZE_BATCH_SIZE_DEFAULT = 1000;
/**
* The name of the configuration property containing the batch size for the synchronization.
*/
public static final String SOLR_SYNCHRONIZE_BATCH_SIZE = "solr.synchronizeBatchSize";
/**
* Indicate which mode to use for synchronize at startup by default.
*/
......@@ -238,4 +248,10 @@ public SynchronizeAtStartupMode synchronizeAtStartupMode()
}
return result;
}
@Override
public int getSynchronizationBatchSize()
{
return this.configuration.getProperty(SOLR_SYNCHRONIZE_BATCH_SIZE, SOLR_SYNCHRONIZE_BATCH_SIZE_DEFAULT);
}
}
......@@ -121,4 +121,12 @@ enum SynchronizeAtStartupMode
* @since 12.5RC1
*/
SynchronizeAtStartupMode synchronizeAtStartupMode();
/**
* @return the size of the batch for the synchronization job between the database and SOLR index
* @since 17.2.0RC1
* @since 16.10.5
* @since 16.4.7
*/
int getSynchronizationBatchSize();
}
......@@ -19,7 +19,10 @@
*/
package org.xwiki.search.solr.internal.job;
import javax.inject.Inject;
import org.xwiki.model.reference.EntityReference;
import org.xwiki.search.solr.internal.api.SolrConfiguration;
/**
* Base class for {@link DocumentIterator}s.
......@@ -30,16 +33,26 @@
*/
public abstract class AbstractDocumentIterator<T> implements DocumentIterator<T>
{
/**
* The maximum number of documents to query at once.
*/
protected static final int LIMIT = 100;
/**
* Specifies the root entity whose documents are iterated. If {@code null} then all the documents are iterated.
*/
protected EntityReference rootReference;
@Inject
protected SolrConfiguration solrConfiguration;
private int limit;
protected int getLimit()
{
// Cache the limit value to avoid possibly changing values during iteration.
if (this.limit == 0) {
this.limit = this.solrConfiguration.getSynchronizationBatchSize();
}
return this.limit;
}
@Override
public void remove()
{
......
......@@ -199,7 +199,7 @@ private void fetchNextResults()
// the synchronization takes place. Also, the database is used as the reference store, meaning that we
// update the Solr index to match the database, not the other way around.
results = getQuery().setWiki(wiki).setOffset(offset).execute();
offset += LIMIT;
offset += getLimit();
} catch (QueryException e) {
throw new IllegalStateException("Failed to query the database.", e);
}
......@@ -231,7 +231,7 @@ private Query getQuery() throws QueryException
}
}
query = queryManager.createQuery(select + whereClause + orderBy, Query.HQL).setLimit(LIMIT);
query = queryManager.createQuery(select + whereClause + orderBy, Query.HQL).setLimit(getLimit());
countQuery = queryManager.createQuery(whereClause, Query.HQL).addFilter(countFilter);
if (spaceReference != null) {
......
......@@ -165,7 +165,7 @@ private SolrQuery getQuery() throws SolrIndexerException
// plan to update the index to match the database during the synchronization process).
// See https://cwiki.apache.org/confluence/display/solr/Pagination+of+Results
query.set(CursorMarkParams.CURSOR_MARK_PARAM, CursorMarkParams.CURSOR_MARK_START);
query.setRows(LIMIT);
query.setRows(getLimit());
}
return query;
}
......
......@@ -126,4 +126,13 @@ public void synchronizeAtStartupMode()
DefaultSolrConfiguration.SOLR_SYNCHRONIZE_AT_STARTUP_MODE_DEFAULT.name())).thenReturn("");
assertEquals(SolrConfiguration.SynchronizeAtStartupMode.FARM, this.configuration.synchronizeAtStartupMode());
}
@Test
void getSynchronizationBatchSize()
{
when(this.source.getProperty(DefaultSolrConfiguration.SOLR_SYNCHRONIZE_BATCH_SIZE,
DefaultSolrConfiguration.SOLR_SYNCHRONIZE_BATCH_SIZE_DEFAULT)).thenReturn(42);
assertEquals(42, this.configuration.getSynchronizationBatchSize());
}
}
......@@ -42,6 +42,7 @@
import org.xwiki.query.QueryException;
import org.xwiki.query.QueryFilter;
import org.xwiki.query.QueryManager;
import org.xwiki.search.solr.internal.api.SolrConfiguration;
import org.xwiki.test.junit5.mockito.ComponentTest;
import org.xwiki.test.junit5.mockito.InjectMockComponents;
import org.xwiki.test.junit5.mockito.MockComponent;
......@@ -85,6 +86,9 @@ class DatabaseDocumentIteratorTest
@Named("count")
private QueryFilter countQueryFilter;
@MockComponent
private SolrConfiguration configuration;
@InjectMockComponents
private DatabaseDocumentIterator databaseIterator;
......@@ -107,12 +111,14 @@ void sizeWithException() throws Exception
@Test
void iterateAllWikis() throws Exception
{
int batchSize = 83;
when(this.configuration.getSynchronizationBatchSize()).thenReturn(batchSize);
Query emptyQuery = mock(Query.class);
when(emptyQuery.execute()).thenReturn(Collections.emptyList());
Query chessQuery = mock(Query.class);
when(chessQuery.setOffset(0)).thenReturn(chessQuery);
when(chessQuery.setOffset(100)).thenReturn(emptyQuery);
when(chessQuery.setOffset(batchSize)).thenReturn(emptyQuery);
when(chessQuery.execute()).thenReturn(Arrays.asList(new Object[] { "Blog.Code", "WebHome", "", "3.2" },
new Object[] { "Main", "Welcome", "en", "1.1" }, new Object[] { "XWiki.Syntax", "Links", "fr", "2.5" }));
......@@ -125,7 +131,7 @@ void iterateAllWikis() throws Exception
Query tennisQuery = mock(Query.class);
when(tennisQuery.setOffset(0)).thenReturn(tennisQuery);
when(tennisQuery.setOffset(100)).thenReturn(emptyQuery);
when(tennisQuery.setOffset(batchSize)).thenReturn(emptyQuery);
when(tennisQuery.execute()).thenReturn(Arrays.asList(new Object[] { "Main", "Welcome", "en", "2.1" },
new Object[] { "XWiki.Syntax", "Links", "fr", "1.3" }));
......@@ -178,6 +184,8 @@ void iterateAllWikis() throws Exception
@Test
void iterateOneWiki() throws Exception
{
int batchSize = 23;
when(this.configuration.getSynchronizationBatchSize()).thenReturn(batchSize);
DocumentReference rootReference = createDocumentReference("gang", Arrays.asList("A", "B"), "C", null);
Query emptyQuery = mock(Query.class);
......@@ -187,7 +195,7 @@ void iterateOneWiki() throws Exception
when(query.setLimit(anyInt())).thenReturn(query);
when(query.setWiki(rootReference.getWikiReference().getName())).thenReturn(query);
when(query.setOffset(0)).thenReturn(query);
when(query.setOffset(100)).thenReturn(emptyQuery);
when(query.setOffset(batchSize)).thenReturn(emptyQuery);
when(query.execute()).thenReturn(Collections.singletonList(new Object[] { "A.B", "C", "de", "3.1" }));
Map<String, Object> namedParameters = new HashMap();
......
......@@ -37,6 +37,7 @@
import org.xwiki.model.reference.DocumentReferenceResolver;
import org.xwiki.model.reference.WikiReference;
import org.xwiki.search.solr.internal.api.FieldUtils;
import org.xwiki.search.solr.internal.api.SolrConfiguration;
import org.xwiki.search.solr.internal.api.SolrInstance;
import org.xwiki.search.solr.internal.reference.SolrReferenceResolver;
import org.xwiki.test.junit5.mockito.ComponentTest;
......@@ -46,7 +47,9 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.argThat;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
......@@ -68,6 +71,9 @@ class SolrDocumentIteratorTest
@MockComponent
private DocumentReferenceResolver<SolrDocument> solrDocumentReferenceResolver;
@MockComponent
private SolrConfiguration configuration;
@InjectMockComponents
private SolrDocumentIterator solrIterator;
......@@ -101,6 +107,9 @@ void sizeWithException()
@Test
void iterate() throws Exception
{
int limit = 42;
when(this.configuration.getSynchronizationBatchSize()).thenReturn(limit);
SolrDocumentList firstResults = new SolrDocumentList();
firstResults.add(createSolrDocument("chess", Arrays.asList("A", "B"), "C", "", "1.3"));
firstResults.add(createSolrDocument("chess", Arrays.asList("M"), "N", "en", "2.4"));
......@@ -139,6 +148,9 @@ void iterate() throws Exception
expectedResult.add(new ImmutablePair<>(documentReference, "1.1"));
assertEquals(expectedResult, actualResult);
verify(this.solrInstance, times(3)).query(argThat(query ->
query instanceof SolrQuery solrQuery && solrQuery.getRows() == limit));
}
private SolrDocument createSolrDocument(String wiki, List<String> spaces, String name, String locale,
......
......@@ -797,6 +797,16 @@ distribution.automaticStartOnWiki=$xwikiPropertiesAutomaticStartOnWiki
#-# The default is:
# solr.synchronizeAtStartupMode=FARM
#-# [Since 17.2.0RC1]
#-# [Since 16.10.5]
#-# [Since 16.4.7]
#-# Indicates the batch size for the synchronization between SOLR index and XWiki database. This defines how many
#-# documents will be loaded from the database and Solr in each step. Higher values lead to fewer queries and thus
#-# better performance but increase the memory usage. The expected memory usage is around 1KB per document, but
#-# depends highly on the length of the document names.
#-# The default is 1000.
# solr.synchronizeBatchSize=1000
#-------------------------------------------------------------------------------------
# Security
#-------------------------------------------------------------------------------------
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment