Commit b11eae9d authored by Michael Hamann's avatar Michael Hamann Committed by Michael Hamann
Browse files

XCOMMONS-1680: Filter Html attributes in restricted mode based on a whitelist

* Add a SanitizerFilter that checks namespaces and filters elements
  based on the HTMLElementSanitizer
parent 4a185e05
......@@ -107,6 +107,10 @@ public class DefaultHTMLCleaner implements HTMLCleaner
// TODO: remove when upgrading to HTMLClener 2.23
private HTMLFilter controlFilter;
@Inject
@Named("sanitizer")
private HTMLFilter sanitizerFilter;
@Inject
private Execution execution;
......@@ -201,7 +205,8 @@ public HTMLCleanerConfiguration getDefaultConfiguration()
this.listFilter,
this.fontFilter,
this.attributeFilter,
this.linkFilter));
this.linkFilter,
this.sanitizerFilter));
return configuration;
}
......
/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
/*
* Alternatively, at your choice, the contents of this file may be used under the terms of the Mozilla Public License,
* v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
package org.xwiki.xml.internal.html.filter;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.List;
import java.util.Map;
import java.util.function.BiPredicate;
import javax.inject.Inject;
import javax.inject.Named;
import javax.inject.Singleton;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.xwiki.component.annotation.Component;
import org.xwiki.xml.html.HTMLCleanerConfiguration;
import org.xwiki.xml.html.HTMLElementSanitizer;
import org.xwiki.xml.html.filter.AbstractHTMLFilter;
import org.xwiki.xml.internal.html.MathMLDefinitions;
import org.xwiki.xml.internal.html.SVGDefinitions;
/**
* Sanitizer that sanitizes the document.
*
* @version $Id$
* @since 14.6RC1
*/
@Component
@Named("sanitizer")
@Singleton
public class SanitizerFilter extends AbstractHTMLFilter
{
private static final String MATHML_NAMESPACE = "http://www.w3.org/1998/Math/MathML";
private static final String SVG_NAMESPACE = "http://www.w3.org/2000/svg";
private static final String HTML_NAMESPACE = "http://www.w3.org/1999/xhtml";
@Inject
private HTMLElementSanitizer htmlElementSanitizer;
@Inject
private SVGDefinitions svgDefinitions;
@Inject
private MathMLDefinitions mathMLDefinitions;
@Override
public void filter(Document document, Map<String, String> cleaningParameters)
{
String restricted = cleaningParameters.get(HTMLCleanerConfiguration.RESTRICTED);
if ("true".equalsIgnoreCase(restricted)) {
cleanDocument(document.getDocumentElement());
}
}
private static class TagInformation
{
public static final TagInformation INVALID = new TagInformation(null, null);
public final String tagName;
public final String namespace;
/**
* Default constructor.
*
* @param tagName the name of the tag
* @param namespace the namespace of the tag
*/
TagInformation(String tagName, String namespace)
{
this.tagName = tagName;
this.namespace = namespace;
}
}
private void cleanDocument(Element rootElement)
{
List<Element> elementsToRemove = new ArrayList<>();
traverseWithNamespace(rootElement, (element, currentNamespace) -> {
if (currentNamespace == TagInformation.INVALID
|| !this.htmlElementSanitizer.isElementAllowed(element.getTagName()))
{
elementsToRemove.add(element);
return true;
} else {
getAttributes(element).stream()
.filter(
attr -> !this.htmlElementSanitizer.isAttributeAllowed(element.getTagName(), attr.getName(),
attr.getValue())
)
.forEach(element::removeAttributeNode);
return false;
}
});
elementsToRemove.forEach(element -> element.getParentNode().removeChild(element));
}
private void traverseWithNamespace(Element rootElement, BiPredicate<Element, TagInformation> traversal)
{
Node node = rootElement;
boolean reachedRoot = false;
Deque<TagInformation> parentNamespace = new ArrayDeque<>();
TagInformation currentNamespace = new TagInformation("html", HTML_NAMESPACE);
parentNamespace.push(currentNamespace);
while (!reachedRoot) {
boolean skipChildren = false;
if (node.getNodeType() == Node.ELEMENT_NODE && node instanceof Element) {
Element element = (Element) node;
currentNamespace = checkNamespace(element, parentNamespace.peek());
skipChildren = traversal.test(element, currentNamespace);
}
if (node.getFirstChild() != null && !skipChildren) {
node = node.getFirstChild();
parentNamespace.push(currentNamespace);
} else {
while (node.getNextSibling() == null) {
if (node == rootElement) {
reachedRoot = true;
break;
}
node = node.getParentNode();
currentNamespace = parentNamespace.pop();
}
node = node.getNextSibling();
}
}
}
/**
* Computes the namespace of the current element if it is allowed.
* <p>
* Tries to follow the logic in DOMPurify by Cure53 and other contributors | Released under the Apache license
* 2.0 and Mozilla Public License 2.0 - <a href="https://github.com/cure53/DOMPurify/blob/main/LICENSE">LICENSE</a>.
*
* @param element the element to check
* @param parentTag the information of the parent tag
* @return the tag information of the current tag or {@link TagInformation#INVALID} if the element must not be
* there
*/
private TagInformation checkNamespace(Element element, TagInformation parentTag)
{
TagInformation result = TagInformation.INVALID;
// Stay in parent SVG/MathML namespace if the current element clearly belongs to the parent namespace.
if (SVG_NAMESPACE.equals(parentTag.namespace) && isPureSVGTag(element.getTagName(), parentTag)) {
result = new TagInformation(element.getTagName(), SVG_NAMESPACE);
} else if (MATHML_NAMESPACE.equals(parentTag.namespace)
&& this.mathMLDefinitions.isMathMLTag(element.getTagName()))
{
result = new TagInformation(element.getTagName(), MATHML_NAMESPACE);
} else if (areHTMLChildrenAllowed(parentTag)) {
// If HTML children are allowed, only allow the element if is actually an HTML element or the root
// element of MathML/SVG.
if ("math".equals(element.getTagName())) {
result = new TagInformation(element.getTagName(), MATHML_NAMESPACE);
} else if ("svg".equals(element.getTagName())) {
result = new TagInformation(element.getTagName(), SVG_NAMESPACE);
} else if (isPossiblyHtmlTag(element.getTagName())) {
result = new TagInformation(element.getTagName(), HTML_NAMESPACE);
}
}
return result;
}
/**
* @param tagName the tag name to check
* @param parentTag the parent information
* @return if the tag is an SVG tag and not also an HTML tag that is nested in an HTML integration point in SVG
*/
private boolean isPureSVGTag(String tagName, TagInformation parentTag)
{
return this.svgDefinitions.isSVGTag(tagName) && (
!this.svgDefinitions.isHTMLIntegrationPoint(parentTag.tagName)
|| !this.svgDefinitions.isCommonHTMLElement(tagName));
}
private boolean areHTMLChildrenAllowed(TagInformation parent)
{
boolean result = HTML_NAMESPACE.equals(parent.namespace);
result = result || (SVG_NAMESPACE.equals(parent.namespace)
&& this.svgDefinitions.isHTMLIntegrationPoint(parent.tagName));
result = result || (MATHML_NAMESPACE.equals(parent.namespace)
&& this.mathMLDefinitions.isTextOrHTMLIntegrationPoint(parent.tagName));
return result;
}
/**
* @param tagName the tag name to check
* @return if the given tag is neither a MathML tag nor an SVG tag that is also an HTML tag
*/
private boolean isPossiblyHtmlTag(String tagName)
{
return !this.mathMLDefinitions.isMathMLTag(tagName)
&& (!this.svgDefinitions.isSVGTag(tagName) || this.svgDefinitions.isCommonHTMLElement(tagName));
}
private List<Attr> getAttributes(Element element)
{
NamedNodeMap attributeNodes = element.getAttributes();
List<Attr> result = new ArrayList<>();
for (int i = 0, length = attributeNodes.getLength(); i < length; ++i) {
result.add((Attr) attributeNodes.item(i));
}
return result;
}
}
......@@ -6,6 +6,7 @@ org.xwiki.xml.internal.html.filter.BodyFilter
org.xwiki.xml.internal.html.filter.ControlCharactersFilter
org.xwiki.xml.internal.html.filter.AttributeFilter
org.xwiki.xml.internal.html.filter.UniqueIdFilter
org.xwiki.xml.internal.html.filter.SanitizerFilter
org.xwiki.xml.internal.html.DefaultHTMLCleaner
org.xwiki.xml.internal.html.XWikiHTML5TagProvider
org.xwiki.xml.internal.html.DefaultHTMLElementSanitizer
......
......@@ -25,11 +25,18 @@
import org.junit.jupiter.api.Test;
import org.w3c.dom.Document;
import org.xwiki.component.manager.ComponentManager;
import org.xwiki.configuration.internal.RestrictedConfigurationSourceProvider;
import org.xwiki.context.internal.DefaultExecution;
import org.xwiki.test.annotation.ComponentList;
import org.xwiki.test.junit5.mockito.ComponentTest;
import org.xwiki.xml.internal.html.DefaultHTMLCleaner;
import org.xwiki.xml.internal.html.DefaultHTMLCleanerTest;
import org.xwiki.xml.internal.html.DefaultHTMLElementSanitizer;
import org.xwiki.xml.internal.html.HTMLDefinitions;
import org.xwiki.xml.internal.html.HTMLElementSanitizerConfiguration;
import org.xwiki.xml.internal.html.MathMLDefinitions;
import org.xwiki.xml.internal.html.SVGDefinitions;
import org.xwiki.xml.internal.html.SecureHTMLElementSanitizer;
import org.xwiki.xml.internal.html.XWikiHTML5TagProvider;
import org.xwiki.xml.internal.html.filter.AttributeFilter;
import org.xwiki.xml.internal.html.filter.BodyFilter;
......@@ -38,6 +45,7 @@
import org.xwiki.xml.internal.html.filter.LinkFilter;
import org.xwiki.xml.internal.html.filter.ListFilter;
import org.xwiki.xml.internal.html.filter.ListItemFilter;
import org.xwiki.xml.internal.html.filter.SanitizerFilter;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
......@@ -58,6 +66,14 @@
BodyFilter.class,
AttributeFilter.class,
ControlCharactersFilter.class,
SanitizerFilter.class,
DefaultHTMLElementSanitizer.class,
SecureHTMLElementSanitizer.class,
HTMLElementSanitizerConfiguration.class,
RestrictedConfigurationSourceProvider.class,
HTMLDefinitions.class,
MathMLDefinitions.class,
SVGDefinitions.class,
DefaultHTMLCleaner.class,
DefaultExecution.class,
XWikiHTML5TagProvider.class
......
......@@ -28,11 +28,18 @@
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xwiki.component.manager.ComponentManager;
import org.xwiki.configuration.internal.RestrictedConfigurationSourceProvider;
import org.xwiki.context.internal.DefaultExecution;
import org.xwiki.test.annotation.ComponentList;
import org.xwiki.test.junit5.mockito.ComponentTest;
import org.xwiki.xml.html.HTMLCleaner;
import org.xwiki.xml.internal.html.DefaultHTMLCleaner;
import org.xwiki.xml.internal.html.DefaultHTMLElementSanitizer;
import org.xwiki.xml.internal.html.HTMLDefinitions;
import org.xwiki.xml.internal.html.HTMLElementSanitizerConfiguration;
import org.xwiki.xml.internal.html.MathMLDefinitions;
import org.xwiki.xml.internal.html.SVGDefinitions;
import org.xwiki.xml.internal.html.SecureHTMLElementSanitizer;
import org.xwiki.xml.internal.html.XWikiHTML5TagProvider;
import org.xwiki.xml.internal.html.filter.AttributeFilter;
import org.xwiki.xml.internal.html.filter.BodyFilter;
......@@ -41,6 +48,7 @@
import org.xwiki.xml.internal.html.filter.LinkFilter;
import org.xwiki.xml.internal.html.filter.ListFilter;
import org.xwiki.xml.internal.html.filter.ListItemFilter;
import org.xwiki.xml.internal.html.filter.SanitizerFilter;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
......@@ -63,6 +71,14 @@
DefaultHTMLCleaner.class,
DefaultExecution.class,
ControlCharactersFilter.class,
SanitizerFilter.class,
DefaultHTMLElementSanitizer.class,
SecureHTMLElementSanitizer.class,
HTMLElementSanitizerConfiguration.class,
RestrictedConfigurationSourceProvider.class,
HTMLDefinitions.class,
MathMLDefinitions.class,
SVGDefinitions.class,
XWikiHTML5TagProvider.class
})
// @formatter:on
......
......@@ -36,6 +36,7 @@
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xwiki.component.manager.ComponentManager;
import org.xwiki.configuration.internal.RestrictedConfigurationSourceProvider;
import org.xwiki.test.annotation.ComponentList;
import org.xwiki.test.junit5.mockito.ComponentTest;
import org.xwiki.test.junit5.mockito.InjectMockComponents;
......@@ -49,6 +50,7 @@
import org.xwiki.xml.internal.html.filter.LinkFilter;
import org.xwiki.xml.internal.html.filter.ListFilter;
import org.xwiki.xml.internal.html.filter.ListItemFilter;
import org.xwiki.xml.internal.html.filter.SanitizerFilter;
import org.xwiki.xml.internal.html.filter.UniqueIdFilter;
import static org.junit.jupiter.api.Assertions.assertEquals;
......@@ -71,6 +73,14 @@
DefaultHTMLCleaner.class,
LinkFilter.class,
ControlCharactersFilter.class,
SanitizerFilter.class,
DefaultHTMLElementSanitizer.class,
SecureHTMLElementSanitizer.class,
HTMLElementSanitizerConfiguration.class,
RestrictedConfigurationSourceProvider.class,
HTMLDefinitions.class,
MathMLDefinitions.class,
SVGDefinitions.class,
XWikiHTML5TagProvider.class
})
// @formatter:on
......@@ -107,7 +117,7 @@ public String getHeaderFull()
/**
* Cleans using the cleaner configuration {@link DefaultHTMLCleanerTest#cleanerConfiguration}.
*
* <p>
* Ensures that always the correct configuration is used and allows executing the same tests for HTML 4 and HTML 5.
*
* @param originalHtmlContent The content to clean as string.
......@@ -322,6 +332,31 @@ void restrictedHtml()
assertEquals(getHeaderFull() + "<pre>p {color:white;}</pre>" + FOOTER, result);
}
/**
* Verify that the restricted parameter forbids dangerous attributes and tags.
*/
@Test
void restrictedAttributesAndTags() throws Exception
{
Map<String, String> parameters = new HashMap<>(this.cleanerConfiguration.getParameters());
parameters.put("restricted", "true");
this.cleanerConfiguration.setParameters(parameters);
assertHTML("<p><img src=\"img.png\" /></p>", "<img onerror=\"alert(1)\" src=img.png />");
assertHTML("<p><a>Hello!</a></p>", "<a href=\"javascript:alert(1)\">Hello!</a>");
assertHTML("<p></p>", "<iframe src=\"whatever\"/>");
// Check that SVG is still working in restricted mode.
cleanSVGTags();
cleanTitleWithNamespace();
// Check that MathML is still working in restricted mode.
assertHTML("<p><math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mtext>X</mtext><mi><span>foo</span>"
+ "</mi></math></p>",
"<math xmlns=\"http://www.w3.org/1998/Math/MathML\"><span></span><mtext>X</mtext><mi><span>foo</span>"
+ "</mi></math>");
}
/**
* Verify that passing a fully-formed XHTML header works fine.
*/
......@@ -366,30 +401,29 @@ void cleanSVGTags() throws Exception
* also
* <a href="https://jira.xwiki.org/browse/XWIKI-9753">XWIKI-9753</a>).
*/
@Disabled("See https://jira.xwiki.org/browse/XWIKI-9753")
@Test
void cleanTitleWithNamespace()
{
// Test with TITLE in HEAD
String input =
"<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\">\n"
+ " <head>\n"
"<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">"
+ "<head>\n"
+ " <title>Title test</title>\n"
+ " </head>\n"
+ " <body>\n"
+ " </head>"
+ "<body>\n"
+ " <p>before</p>\n"
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"300\" width=\"500\">\n"
+ " <p><svg xmlns=\"http://www.w3.org/2000/svg\" height=\"300\" width=\"500\">\n"
+ " <g>\n"
+ " <title>SVG Title Demo example</title>\n"
+ " <rect height=\"50\" style=\"fill:none; stroke:blue; stroke-width:1px\" width=\"200\" x=\"10\" "
+ "y=\"10\"></rect>\n" + " </g>\n" + " </svg>\n" + " <p>after</p>\n";
+ "y=\"10\"></rect>\n" + " </g>\n" + " </svg></p>\n" + " <p>after</p>\n";
assertEquals(getHeader() + input + FOOTER,
HTMLUtils.toString(clean(input)));
}
/**
* Verify that a xmlns namespace set on the HTML element is not removed by default and it's removed if {@link
* HTMLCleanerConfiguration#NAMESPACES_AWARE} is set to false.
* Verify that a xmlns namespace set on the HTML element is not removed by default and it's removed if
* {@link HTMLCleanerConfiguration#NAMESPACES_AWARE} is set to false.
*/
@Test
void cleanHTMLTagWithNamespace()
......@@ -409,7 +443,19 @@ void cleanHTMLTagWithNamespace()
}
/**
* Test that cleaning an empty DIV works (it used to fail, see <a href="https://jira.xwiki.org/browse/XWIKI-4007">XWIKI-4007</a>).
* Check that template tags inside select don't survive, might be security-relevant, DOMPurify contains a similar
* check, see <a href="https://github.com/cure53/DOMPurify/commit/e32ca248c0e9450fb182e52e978631cbd78f1123">commit
* e32ca248c0 in DOMPurify</a>.
*/
@Test
void cleanTemplateInsideSelect()
{
assertHTML("<p><select></select></p>", "<select><template></template></select>");
}
/**
* Test that cleaning an empty DIV works (it used to fail, see <a
* href="https://jira.xwiki.org/browse/XWIKI-4007">XWIKI-4007</a>).
*/
@Test
void cleanEmptyDIV()
......@@ -607,7 +653,7 @@ void divInsideDl()
/**
* Check what happens when the dt-tag is inside div.
*
* <p>
* This should add a wrapping dl but doesn't for HTML 4, but it works in HTML5, see
* {@link HTML5HTMLCleanerTest#divWithDt()}.
*
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment