Commit 4a185e05 authored by Michael Hamann's avatar Michael Hamann Committed by Michael Hamann
Browse files

XCOMMONS-2426: Provide a component for filtering safe HTML elements and attributes

* Add an interface and two implementations for an HTML element sanitizer.
* Let the default implementation dispatch to the different
  implementations depending on a configuration.
* Allow overriding the hint from the execution context to allow a
  context to be more permissive than another.
* Add configuration options for allowed elements/attributes
* Add tests.
parent 67aeae54
......@@ -32,7 +32,7 @@
<packaging>jar</packaging>
<description>XWiki Commons - XML</description>
<properties>
<xwiki.jacoco.instructionRatio>0.72</xwiki.jacoco.instructionRatio>
<xwiki.jacoco.instructionRatio>0.82</xwiki.jacoco.instructionRatio>
<!-- There's a utility class with lots of features, allow it to have many dependencies;
There's a SAX event listener, which requires complex code -->
<checkstyle.suppressions.location>${basedir}/src/main/checkstyle/checkstyle-suppressions.xml
......@@ -53,6 +53,11 @@
<artifactId>xwiki-commons-context</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.xwiki.commons</groupId>
<artifactId>xwiki-commons-configuration-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
......
......@@ -36,4 +36,9 @@
<!-- XWikiDomSerializer copied from DomSerializer -->
<suppress checks="CyclomaticComplexity" files="XWikiDOMSerializer" />
<suppress checks="NPathComplexity" files="XWikiDOMSerializer" />
<!-- These files have lists of strings copied from a source, making them constants would complicate updating from
upstream. -->
<suppress checks="MultipleStringLiterals"
files="SecureHTMLElementSanitizer.java|HTMLDefinitions.java|MathMLDefinitions.java|SVGDefinitions.java"/>
</suppressions>
/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.xwiki.xml.html;
import org.xwiki.component.annotation.Role;
import org.xwiki.stability.Unstable;
/**
* Provides methods to check if HTML elements and attributes/attribute values are considered safe.
* <p>
* This also includes SVG and MathML elements and attributes.
*
* @version $Id$
* @since 14.6RC1
*/
@Role
@Unstable
public interface HTMLElementSanitizer
{
/**
* The key under which a hint can be stored that will be used by the default implementation.
*/
String EXECUTION_CONTEXT_HINT_KEY = "xml.html.htmlElementSanitizerHint";
/**
* @param elementName the name of the HTML element
* @return {@code true} if the given element is allowed in principle (given appropriate attributes)
*/
boolean isElementAllowed(String elementName);
/**
* @param elementName the element for which the attributes shall be checked
* @param attributeName the attributes to check
* @param value the value of the attribute
* @return {@code true} if the attribute with this value is considered safe
*/
boolean isAttributeAllowed(String elementName, String attributeName, String value);
}
/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.xwiki.xml.internal.html;
import javax.inject.Inject;
import javax.inject.Named;
import javax.inject.Provider;
import javax.inject.Singleton;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.slf4j.Logger;
import org.xwiki.component.annotation.Component;
import org.xwiki.component.manager.ComponentLookupException;
import org.xwiki.component.manager.ComponentManager;
import org.xwiki.component.phase.Initializable;
import org.xwiki.component.phase.InitializationException;
import org.xwiki.configuration.ConfigurationSource;
import org.xwiki.context.Execution;
import org.xwiki.context.ExecutionContext;
import org.xwiki.stability.Unstable;
import org.xwiki.xml.html.HTMLElementSanitizer;
/**
* Default {@link HTMLElementSanitizer} that loads the implementation chosen by the configuration.
*
* @version $Id$
* @since 14.6RC1
*/
@Component
@Singleton
@Unstable
public class DefaultHTMLElementSanitizer implements HTMLElementSanitizer, Initializable
{
private static final String CONFIGURATION_KEY = "xml.htmlElementSanitizer";
private HTMLElementSanitizer implementation;
@Inject
@Named("restricted")
private Provider<ConfigurationSource> configurationSourceProvider;
@Inject
private Execution execution;
@Inject
private Provider<ComponentManager> componentManagerProvider;
@Inject
private Logger logger;
@Override
public void initialize() throws InitializationException
{
ConfigurationSource configurationSource = this.configurationSourceProvider.get();
String hint;
if (configurationSource != null) {
hint = configurationSource.getProperty(CONFIGURATION_KEY, SecureHTMLElementSanitizer.HINT);
} else {
hint = SecureHTMLElementSanitizer.HINT;
}
try {
this.implementation = loadImplementationWithSecureFallback(hint);
} catch (ComponentLookupException ex) {
throw new InitializationException("Couldn't initialize the default secure HTMLElementSanitizer", ex);
}
}
private HTMLElementSanitizer loadImplementationWithSecureFallback(String hint) throws ComponentLookupException
{
ComponentManager componentManager = this.componentManagerProvider.get();
HTMLElementSanitizer result;
try {
result = componentManager.getInstance(HTMLElementSanitizer.class, hint);
} catch (ComponentLookupException e) {
this.logger.error("Couldn't load the configured HTMLElementSanitizer with hint [{}], falling back to the "
+ "default secure implementation: {}", hint, ExceptionUtils.getRootCauseMessage(e));
result = componentManager.getInstance(HTMLElementSanitizer.class, SecureHTMLElementSanitizer.HINT);
}
return result;
}
private HTMLElementSanitizer getImplementation()
{
ExecutionContext context = this.execution.getContext();
HTMLElementSanitizer result = this.implementation;
if (context != null && context.hasProperty(HTMLElementSanitizer.EXECUTION_CONTEXT_HINT_KEY)) {
String hint = (String) context.getProperty(HTMLElementSanitizer.EXECUTION_CONTEXT_HINT_KEY);
try {
result = this.componentManagerProvider.get().getInstance(HTMLElementSanitizer.class, hint);
} catch (ComponentLookupException e) {
this.logger.error("Couldn't load the HTMLElementSanitizer with hint [{}] from the execution context, "
+ "falling back to the configured implementation: {}", hint, ExceptionUtils.getRootCauseMessage(e));
}
}
return result;
}
@Override
public boolean isElementAllowed(String elementName)
{
return getImplementation().isElementAllowed(elementName);
}
@Override
public boolean isAttributeAllowed(String elementName, String attributeName, String value)
{
return getImplementation().isAttributeAllowed(elementName, attributeName, value);
}
}
/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
/*
* Alternatively, at your choice, the contents of this file may be used under the terms of the Mozilla Public License,
* v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
package org.xwiki.xml.internal.html;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import javax.inject.Singleton;
import org.xwiki.component.annotation.Component;
/**
* Provides definitions of safe HTML attributes and tags.
* <p>
* Unless otherwise noted, lists of elements and attributes are copied from DOMPurify by Cure53 and other contributors |
* Released under the Apache license 2.0 and Mozilla Public License 2.0 -
* <a href="https://github.com/cure53/DOMPurify/blob/main/LICENSE">LICENSE</a>.
*
* @version $Id$
* @since 14.6RC1
*/
@Component(roles = HTMLDefinitions.class)
@Singleton
public class HTMLDefinitions
{
/**
* Allowed HTML elements.
*/
private final Set<String> htmlTags;
/**
* Allowed attributes.
*/
private final Set<String> htmlAttributes;
/**
* Default constructor.
*/
public HTMLDefinitions()
{
this.htmlTags = new HashSet<>(
Arrays.asList("a", "abbr", "acronym", "address", "area", "article", "aside", "audio", "b", "bdi", "bdo",
"big", "blink", "blockquote", "body", "br", "button", "canvas", "caption", "center", "cite", "code",
"col", "colgroup", "content", "data", "datalist", "dd", "decorator", "del", "details", "dfn", "dialog",
"dir", "div", "dl", "dt", "element", "em", "fieldset", "figcaption", "figure", "font", "footer", "form",
"h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html", "i", "img", "input",
"ins", "kbd", "label", "legend", "li", "main", "map", "mark", "marquee", "menu", "menuitem", "meter",
"nav", "nobr", "ol", "optgroup", "option", "output", "p", "picture", "pre", "progress", "q", "rp", "rt",
"ruby", "s", "samp", "section", "select", "shadow", "small", "source", "spacer", "span", "strike",
"strong", "style", "sub", "summary", "sup", "table", "tbody", "td", "template", "textarea", "tfoot",
"th", "thead", "time", "tr", "track", "tt", "u", "ul", "var", "video", "wbr"));
// Attributes that are in general allowed. Note that "target" is not generally safe, but XWiki contains code
// that already adds the necessary attributes to make it safe both in HTMLCleaner and in XHTML rendering.
this.htmlAttributes = new HashSet<>(
Arrays.asList("accept", "action", "align", "alt", "autocapitalize", "autocomplete", "autopictureinpicture",
"autoplay", "background", "bgcolor", "border", "capture", "cellpadding", "cellspacing", "checked",
"cite", "class", "clear", "color", "cols", "colspan", "controls", "controlslist", "coords",
"crossorigin", "datetime", "decoding", "default", "dir", "disabled", "disablepictureinpicture",
"disableremoteplayback", "download", "draggable", "enctype", "enterkeyhint", "face", "for", "headers",
"height", "hidden", "high", "href", "hreflang", "id", "inputmode", "integrity", "ismap", "kind",
"label", "lang", "list", "loading", "loop", "low", "max", "maxlength", "media", "method", "min",
"minlength", "multiple", "muted", "name", "nonce", "noshade", "novalidate", "nowrap", "open", "optimum",
"pattern", "placeholder", "playsinline", "poster", "preload", "pubdate", "radiogroup", "readonly",
"rel", "required", "rev", "reversed", "role", "rows", "rowspan", "spellcheck", "scope", "selected",
"shape", "size", "sizes", "span", "srclang", "start", "src", "srcset", "step", "style", "summary",
"tabindex", "title", "translate", "type", "usemap", "valign", "value", "width", "xmlns", "slot",
"target"));
}
/**
* @param tagName the name of the tag to check
* @return if the tag is considered safe
*/
public boolean isSafeTag(String tagName)
{
return this.htmlTags.contains(tagName);
}
/**
* @param attributeName the name of the attribute to check
* @return if the attribute is allowed
*/
public boolean isAllowedAttribute(String attributeName)
{
return this.htmlAttributes.contains(attributeName);
}
}
/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.xwiki.xml.internal.html;
import java.util.Collections;
import java.util.List;
import javax.inject.Inject;
import javax.inject.Named;
import javax.inject.Provider;
import javax.inject.Singleton;
import org.xwiki.component.annotation.Component;
import org.xwiki.configuration.ConfigurationSource;
/**
* Provides methods to easily access the configuration options of {@link org.xwiki.xml.html.HTMLElementSanitizer}.
*
* @version $Id$
* @since 14.6RC1
*/
@Component(roles = HTMLElementSanitizerConfiguration.class)
@Singleton
public class HTMLElementSanitizerConfiguration
{
private static final String EXTRA_ALLOWED_TAGS_CONFIGURATION = "xml.htmlElementSanitizer.extraAllowedTags";
private static final String EXTRA_ALLOWED_ATTRIBUTES_CONFIGURATION =
"xml.htmlElementSanitizer.extraAllowedAttributes";
private static final String EXTRA_URI_SAFE_ATTRIBUTES_CONFIGURATION =
"xml.htmlElementSanitizer.extraURISafeAttributes";
private static final String EXTRA_DATA_URI_TAGS_CONFIGURATION = "xml.htmlElementSanitizer.extraDataUriTags";
private static final String FORBID_TAGS_CONFIGURATION = "xml.htmlElementSanitizer.forbidTags";
private static final String FORBID_ATTRIBUTES_CONFIGURATION = "xml.htmlElementSanitizer.forbidAttributes";
private static final String ALLOW_UNKNOWN_PROTOCOLS_CONFIGURATION =
"xml.htmlElementSanitizer.allowUnknownProtocols";
private static final String ALLOWED_URI_REGEXP_CONFIGURATION = "xml.htmlElementSanitizer.allowedUriRegexp";
@Inject
@Named("restricted")
private Provider<ConfigurationSource> configurationSourceProvider;
private <T> T getValue(String key, Class<T> valueType, T defaultValue)
{
ConfigurationSource configurationSource = this.configurationSourceProvider.get();
T result;
if (configurationSource != null) {
result = configurationSource.getProperty(key, valueType, defaultValue);
} else {
result = defaultValue;
}
return result;
}
/**
* @return The list of additionally allowed tags
*/
public List<String> getExtraAllowedTags()
{
return getValue(EXTRA_ALLOWED_TAGS_CONFIGURATION, List.class, Collections.emptyList());
}
/**
* @return the list of additionally allowed attributes
*/
public List<String> getExtraAllowedAttributes()
{
return getValue(EXTRA_ALLOWED_ATTRIBUTES_CONFIGURATION, List.class, Collections.emptyList());
}
/**
* @return the list of additional tags that are safe for all kinds of URIs
*/
public List<String> getExtraUriSafeAttributes()
{
return getValue(EXTRA_URI_SAFE_ATTRIBUTES_CONFIGURATION, List.class, Collections.emptyList());
}
/**
* @return the list of additional tags whose attributes may have data URIs
*/
public List<String> getExtraDataUriTags()
{
return getValue(EXTRA_DATA_URI_TAGS_CONFIGURATION, List.class, Collections.emptyList());
}
/**
* @return the list of forbidden tags
*/
public List<String> getForbidTags()
{
return getValue(FORBID_TAGS_CONFIGURATION, List.class, Collections.emptyList());
}
/**
* @return the list of forbidden attributes
*/
public List<String> getForbidAttributes()
{
return getValue(FORBID_ATTRIBUTES_CONFIGURATION, List.class, Collections.emptyList());
}
/**
* @return if unknown protocols shall be allowed
*/
public boolean isAllowUnknownProtocols()
{
return getValue(ALLOW_UNKNOWN_PROTOCOLS_CONFIGURATION, Boolean.class, Boolean.TRUE);
}
/**
* @return the regular expression for allowed URIs
*/
public String getAllowedUriRegexp()
{
return getValue(ALLOWED_URI_REGEXP_CONFIGURATION, String.class, null);
}
}
/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.xwiki.xml.internal.html;
import javax.inject.Named;
import javax.inject.Singleton;
import org.xwiki.component.annotation.Component;
import org.xwiki.stability.Unstable;
import org.xwiki.xml.html.HTMLElementSanitizer;
/**
* Implementation of {@link HTMLElementSanitizer} that allows all elements and attributes.
*
* @version $Id$
* @since 14.6RC1
*/
@Component
@Singleton
@Named("insecure")
@Unstable
public class InsecureHTMLElementSanitizer implements HTMLElementSanitizer
{
@Override
public boolean isElementAllowed(String elementName)
{
return true;
}
@Override
public boolean isAttributeAllowed(String elementName, String attributeName, String value)
{
return true;
}
}
/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
/*
* Alternatively, at your choice, the contents of this file may be used under the terms of the Mozilla Public License,
* v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
package org.xwiki.xml.internal.html;
import java.util.Arrays;
import java.util.HashSet;