Skip to content
Snippets Groups Projects
Commit efdc9c78 authored by Michael Hamann's avatar Michael Hamann Committed by Michael Hamann
Browse files

XWIKI-22917: Solr indexing takes very long when there are many objects with...

XWIKI-22917: Solr indexing takes very long when there are many objects with different values (#3923)

* Use a HashSet to compute unique values instead of a linear search.
* Rename LengthSolrInputDocument to XWikiSolrInputDocument.
* Use XWikiSolrInputDocument in more places to use the new method.
* Add a unit test for XWikiSolrInputDocument.

(cherry picked from commit 39e2c7ff)
parent 4707c64f
No related branches found
No related tags found
No related merge requests found
Showing
with 249 additions and 29 deletions
......@@ -57,8 +57,8 @@
import org.xwiki.search.solr.internal.api.SolrInstance;
import org.xwiki.search.solr.internal.job.IndexerJob;
import org.xwiki.search.solr.internal.job.IndexerRequest;
import org.xwiki.search.solr.internal.metadata.LengthSolrInputDocument;
import org.xwiki.search.solr.internal.metadata.SolrMetadataExtractor;
import org.xwiki.search.solr.internal.metadata.XWikiSolrInputDocument;
import org.xwiki.search.solr.internal.reference.SolrReferenceResolver;
import org.xwiki.store.ReadyIndicator;
......@@ -501,7 +501,7 @@ private boolean processBatch(IndexQueueEntry queueEntry)
switch (operation) {
case INDEX:
LengthSolrInputDocument solrDocument = getSolrDocument(batchEntry.reference);
XWikiSolrInputDocument solrDocument = getSolrDocument(batchEntry.reference);
if (solrDocument != null) {
solrInstance.add(solrDocument);
length += solrDocument.getLength();
......@@ -599,7 +599,7 @@ private boolean shouldCommit(int length, int size)
* @throws IllegalArgumentException if there is an incompatibility between a reference and the assigned extractor.
* @throws ExecutionContextException
*/
private LengthSolrInputDocument getSolrDocument(EntityReference reference)
private XWikiSolrInputDocument getSolrDocument(EntityReference reference)
throws SolrIndexerException, IllegalArgumentException, ExecutionContextException
{
SolrMetadataExtractor metadataExtractor = getMetadataExtractor(reference.getType());
......
......@@ -151,11 +151,11 @@ protected int getShortTextLimit()
}
@Override
public LengthSolrInputDocument getSolrDocument(EntityReference entityReference)
public XWikiSolrInputDocument getSolrDocument(EntityReference entityReference)
throws SolrIndexerException, IllegalArgumentException
{
try {
LengthSolrInputDocument solrDocument = new LengthSolrInputDocument();
XWikiSolrInputDocument solrDocument = new XWikiSolrInputDocument();
solrDocument.setField(FieldUtils.ID, this.seachUtils.getId(entityReference));
solrDocument.setField(FieldUtils.REFERENCE,
......@@ -180,12 +180,12 @@ public LengthSolrInputDocument getSolrDocument(EntityReference entityReference)
}
/**
* @param solrDocument the {@link LengthSolrInputDocument} to modify
* @param solrDocument the {@link XWikiSolrInputDocument} to modify
* @param entityReference the reference of the entity
* @return false if the entity should not be indexed (generally mean it does not exist), true otherwise
* @throws Exception in case of errors
*/
protected abstract boolean setFieldsInternal(LengthSolrInputDocument solrDocument, EntityReference entityReference)
protected abstract boolean setFieldsInternal(XWikiSolrInputDocument solrDocument, EntityReference entityReference)
throws Exception;
/**
......@@ -365,7 +365,7 @@ protected Locale getLocale(DocumentReference documentReference) throws SolrIndex
* @param locale the locale of the indexed document; in case of translations, this will obviously be different than
* the original document's locale
*/
protected void setObjectContent(SolrInputDocument solrDocument, BaseObject object, Locale locale)
protected void setObjectContent(XWikiSolrInputDocument solrDocument, BaseObject object, Locale locale)
{
if (object == null) {
// Yes, the platform can return null objects.
......@@ -392,7 +392,7 @@ protected void setObjectContent(SolrInputDocument solrDocument, BaseObject objec
* @param propertyClass the class that describes the given property
* @param locale the locale of the indexed document
*/
protected void setPropertyValue(SolrInputDocument solrDocument, BaseProperty<?> property,
protected void setPropertyValue(XWikiSolrInputDocument solrDocument, BaseProperty<?> property,
PropertyClass propertyClass, Locale locale)
{
Object propertyValue = property.getValue();
......@@ -454,7 +454,7 @@ protected void setPropertyValue(SolrInputDocument solrDocument, BaseProperty<?>
* @param locale the locale of the indexed document
* @see "XWIKI-9417: Search does not return any results for Static List values"
*/
private void setStaticListPropertyValue(SolrInputDocument solrDocument, BaseProperty<?> property,
private void setStaticListPropertyValue(XWikiSolrInputDocument solrDocument, BaseProperty<?> property,
StaticListClass propertyClass, Locale locale)
{
// The list of known values specified in the XClass.
......@@ -489,7 +489,7 @@ private void setStaticListPropertyValue(SolrInputDocument solrDocument, BaseProp
* @param typedValue the value to add
* @param locale the locale of the indexed document
*/
protected void setPropertyValue(SolrInputDocument solrDocument, BaseProperty<?> property,
protected void setPropertyValue(XWikiSolrInputDocument solrDocument, BaseProperty<?> property,
TypedValue typedValue, Locale locale)
{
// Collect all the property values from all the objects of a document in a single (localized) field.
......@@ -508,12 +508,9 @@ protected void setPropertyValue(SolrInputDocument solrDocument, BaseProperty<?>
* @param fieldName the field name
* @param fieldValue the field value to add
*/
protected void addFieldValueOnce(SolrInputDocument solrDocument, String fieldName, Object fieldValue)
protected void addFieldValueOnce(XWikiSolrInputDocument solrDocument, String fieldName, Object fieldValue)
{
Collection<Object> fieldValues = solrDocument.getFieldValues(fieldName);
if (fieldValues == null || !fieldValues.contains(fieldValue)) {
solrDocument.addField(fieldName, fieldValue);
}
solrDocument.addFieldOnce(fieldName, fieldValue);
}
/**
......
......@@ -51,7 +51,7 @@ public class AttachmentSolrMetadataExtractor extends AbstractSolrMetadataExtract
private EntityReferenceSerializer<String> entityReferenceSerializer;
@Override
public boolean setFieldsInternal(LengthSolrInputDocument solrDocument, EntityReference entityReference)
public boolean setFieldsInternal(XWikiSolrInputDocument solrDocument, EntityReference entityReference)
throws Exception
{
AttachmentReference attachmentReference = new AttachmentReference(entityReference);
......
......@@ -97,7 +97,7 @@ public class DocumentSolrMetadataExtractor extends AbstractSolrMetadataExtractor
private SolrFieldNameEncoder fieldNameEncoder;
@Override
public boolean setFieldsInternal(LengthSolrInputDocument solrDocument, EntityReference entityReference)
public boolean setFieldsInternal(XWikiSolrInputDocument solrDocument, EntityReference entityReference)
throws Exception
{
DocumentReference documentReference = new DocumentReference(entityReference);
......@@ -218,7 +218,7 @@ private void setLinks(SolrInputDocument solrDocument, XWikiDocument translatedDo
* @param locale the locale of which to index the extra data.
* @throws XWikiException if problems occur.
*/
protected void setExtras(DocumentReference documentReference, SolrInputDocument solrDocument, Locale locale)
protected void setExtras(DocumentReference documentReference, XWikiSolrInputDocument solrDocument, Locale locale)
throws XWikiException
{
// We need to support the following types of queries:
......@@ -244,7 +244,7 @@ protected void setExtras(DocumentReference documentReference, SolrInputDocument
* @param locale the locale for which to index the objects.
* @param originalDocument the original document where the objects come from.
*/
protected void setObjects(SolrInputDocument solrDocument, Locale locale, XWikiDocument originalDocument)
protected void setObjects(XWikiSolrInputDocument solrDocument, Locale locale, XWikiDocument originalDocument)
{
for (Map.Entry<DocumentReference, List<BaseObject>> objects : originalDocument.getXObjects().entrySet()) {
boolean hasObjectsOfThisType = false;
......@@ -260,7 +260,7 @@ protected void setObjects(SolrInputDocument solrDocument, Locale locale, XWikiDo
}
@Override
protected void setPropertyValue(SolrInputDocument solrDocument, BaseProperty<?> property,
protected void setPropertyValue(XWikiSolrInputDocument solrDocument, BaseProperty<?> property,
TypedValue typedValue, Locale locale)
{
Object value = typedValue.getValue();
......
......@@ -25,7 +25,6 @@
import javax.inject.Named;
import javax.inject.Singleton;
import org.apache.solr.common.SolrInputDocument;
import org.xwiki.component.annotation.Component;
import org.xwiki.model.reference.DocumentReference;
import org.xwiki.model.reference.EntityReference;
......@@ -57,7 +56,7 @@ public class ObjectPropertySolrMetadataExtractor extends AbstractSolrMetadataExt
private SolrReferenceResolver resolver;
@Override
public boolean setFieldsInternal(LengthSolrInputDocument solrDocument, EntityReference entityReference)
public boolean setFieldsInternal(XWikiSolrInputDocument solrDocument, EntityReference entityReference)
throws Exception
{
ObjectPropertyReference objectPropertyReference = new ObjectPropertyReference(entityReference);
......@@ -91,7 +90,7 @@ public boolean setFieldsInternal(LengthSolrInputDocument solrDocument, EntityRef
}
@Override
protected void setPropertyValue(SolrInputDocument solrDocument, BaseProperty<?> property,
protected void setPropertyValue(XWikiSolrInputDocument solrDocument, BaseProperty<?> property,
TypedValue typedValue, Locale locale)
{
String fieldName = FieldUtils.getFieldName(FieldUtils.PROPERTY_VALUE, locale);
......@@ -112,7 +111,7 @@ protected void setPropertyValue(SolrInputDocument solrDocument, BaseProperty<?>
* @param objectProperty the object property.
* @throws Exception if problems occur.
*/
protected void setLocaleAndContentFields(DocumentReference documentReference, SolrInputDocument solrDocument,
protected void setLocaleAndContentFields(DocumentReference documentReference, XWikiSolrInputDocument solrDocument,
BaseProperty<ObjectPropertyReference> objectProperty) throws Exception
{
PropertyClass propertyClass = objectProperty.getPropertyClass(this.xcontextProvider.get());
......
......@@ -25,7 +25,6 @@
import javax.inject.Named;
import javax.inject.Singleton;
import org.apache.solr.common.SolrInputDocument;
import org.xwiki.component.annotation.Component;
import org.xwiki.model.reference.DocumentReference;
import org.xwiki.model.reference.EntityReference;
......@@ -55,7 +54,7 @@ public class ObjectSolrMetadataExtractor extends AbstractSolrMetadataExtractor
private SolrReferenceResolver resolver;
@Override
public boolean setFieldsInternal(LengthSolrInputDocument solrDocument, EntityReference entityReference)
public boolean setFieldsInternal(XWikiSolrInputDocument solrDocument, EntityReference entityReference)
throws Exception
{
BaseObjectReference objectReference = new BaseObjectReference(entityReference);
......@@ -96,7 +95,7 @@ public boolean setFieldsInternal(LengthSolrInputDocument solrDocument, EntityRef
* @param object the object.
* @throws Exception if problems occur.
*/
protected void setLocaleAndContentFields(DocumentReference documentReference, SolrInputDocument solrDocument,
protected void setLocaleAndContentFields(DocumentReference documentReference, XWikiSolrInputDocument solrDocument,
BaseObject object) throws Exception
{
// Do the work for each locale.
......
......@@ -19,6 +19,7 @@
*/
package org.xwiki.search.solr.internal.metadata;
import org.apache.solr.common.SolrInputDocument;
import org.xwiki.component.annotation.Role;
import org.xwiki.model.reference.EntityReference;
import org.xwiki.search.solr.internal.api.SolrIndexerException;
......@@ -45,6 +46,6 @@ public interface SolrMetadataExtractor
* @throws SolrIndexerException if problems occur.
* @throws IllegalArgumentException if the passed reference is not supported by the current implementation.
*/
LengthSolrInputDocument getSolrDocument(EntityReference entityReference) throws SolrIndexerException,
XWikiSolrInputDocument getSolrDocument(EntityReference entityReference) throws SolrIndexerException,
IllegalArgumentException;
}
......@@ -19,19 +19,30 @@
*/
package org.xwiki.search.solr.internal.metadata;
import java.io.Serial;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
/**
* Extended SolrInputDocument with calculated size.
* Extended SolrInputDocument with calculated size and support for adding fields once.
*
* @version $Id$
* @since 5.1M2
* @since 16.4.7
* @since 16.10.5
* @since 17.2.0RC1
*/
public class LengthSolrInputDocument extends SolrInputDocument
public class XWikiSolrInputDocument extends SolrInputDocument
{
/**
* Serialization identifier.
*/
@Serial
private static final long serialVersionUID = 1L;
/**
......@@ -39,6 +50,8 @@ public class LengthSolrInputDocument extends SolrInputDocument
*/
private int length;
private final Map<String, Set<Object>> uniqueFields = new HashMap<>();
/**
* @return the length (generally the number of characters). It's not the exact byte length, it's more a scale value.
*/
......@@ -52,12 +65,63 @@ public void setField(String name, Object value)
{
super.setField(name, value);
if (value instanceof String) {
this.length += ((String) value).length();
} else if (value instanceof byte[]) {
this.length += ((byte[]) value).length;
if (value instanceof String stringValue) {
this.length += stringValue.length();
} else if (value instanceof byte[] bytesValue) {
this.length += bytesValue.length;
}
// Remove the field as the values have been reset.
this.uniqueFields.remove(name);
// TODO: support more type ?
}
@Override
public SolrInputField removeField(String name)
{
this.uniqueFields.remove(name);
return super.removeField(name);
}
@Override
public void clear()
{
this.uniqueFields.clear();
super.clear();
}
/**
* Add a value to a field if it's not already present.
*
* @param name the field name
* @param value the value to add
*/
protected void addFieldOnce(String name, Object value)
{
Set<Object> existingValues = this.uniqueFields.computeIfAbsent(name, k -> {
Collection<Object> fieldValues = getFieldValues(name);
if (fieldValues != null) {
return new HashSet<>(fieldValues);
} else {
return new HashSet<>();
}
});
if (existingValues.add(value)) {
addField(name, value);
}
}
@Override
public void addField(String name, Object value)
{
// Add the value, but only if the field was already used with the "once" method.
// This adds the value again when called from the "once" method.
this.uniqueFields.computeIfPresent(name, (k, existingValues) -> {
existingValues.add(value);
return existingValues;
});
super.addField(name, value);
}
}
/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.xwiki.search.solr.internal.metadata;
import java.util.List;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
/**
* Unit tests for {@link XWikiSolrInputDocument}.
*
* @version $Id$
*/
class XWikiSolrInputDocumentTest
{
protected static final String FIELD_1 = "field1";
protected static final String VALUE_1 = "value1";
@Test
void testAddField()
{
XWikiSolrInputDocument doc = new XWikiSolrInputDocument();
doc.addField(FIELD_1, VALUE_1);
doc.addField(FIELD_1, VALUE_1);
assertEquals(List.of(VALUE_1, VALUE_1), doc.getFieldValues(FIELD_1));
}
@Test
void testAddFieldOnce()
{
XWikiSolrInputDocument doc = new XWikiSolrInputDocument();
// Basic test: adding a value twice should only add it once.
doc.addFieldOnce(FIELD_1, VALUE_1);
doc.addFieldOnce(FIELD_1, VALUE_1);
assertEquals(List.of(VALUE_1), doc.getFieldValues(FIELD_1));
// Adding a different value should add it, and it's unique even when added without the "once" method first.
String value2 = "value2";
doc.addField(FIELD_1, value2);
doc.addFieldOnce(FIELD_1, value2);
assertEquals(List.of(VALUE_1, value2), doc.getFieldValues(FIELD_1));
// Setting the field removes the previous values.
doc.setField(FIELD_1, value2);
assertEquals(value2, doc.getFieldValue(FIELD_1));
// Adding a value after setting the field should add it.
doc.addFieldOnce(FIELD_1, VALUE_1);
assertEquals(List.of(value2, VALUE_1), doc.getFieldValues(FIELD_1));
// Removing the field should allow adding the value again.
doc.removeField(FIELD_1);
doc.addFieldOnce(FIELD_1, VALUE_1);
assertEquals(List.of(VALUE_1), doc.getFieldValues(FIELD_1));
// Adding the value to another field should not affect the first field.
String field2 = "field2";
doc.addFieldOnce(field2, VALUE_1);
assertEquals(List.of(VALUE_1), doc.getFieldValues(FIELD_1));
assertEquals(List.of(VALUE_1), doc.getFieldValues(field2));
// Clearing the document should remove all fields.
doc.clear();
assertNull(doc.getFieldValue(FIELD_1));
assertNull(doc.getFieldValue(field2));
// Adding a value after clearing should add it.
doc.addFieldOnce(FIELD_1, VALUE_1);
assertEquals(List.of(VALUE_1), doc.getFieldValues(FIELD_1));
}
@Test
void setField()
{
XWikiSolrInputDocument doc = new XWikiSolrInputDocument();
doc.setField(FIELD_1, VALUE_1);
assertEquals(VALUE_1.length(), doc.getLength());
doc.setField(FIELD_1, VALUE_1);
assertEquals(2 * VALUE_1.length(), doc.getLength());
doc.setField(FIELD_1, VALUE_1.getBytes());
assertEquals(3 * VALUE_1.length(), doc.getLength());
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment