Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests to explain what the namespace declarations in HTMLParsingHelper are used for #10

Merged
merged 2 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
],
"require": {
"php": "^7.2|8.0.*|8.1.*|8.2.*",
"ext-dom": "*",
"ext-xml": "*"
},
"require-dev": {
Expand Down
10 changes: 5 additions & 5 deletions src/Webfactory/Dom/HTMLParsingHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,15 @@ protected function defineImplicitNamespaces(): array
*/
if ((phpversion('xml') >= '8.1.21') && (phpversion('xml') < '8.1.25')) {
return [
'html' => 'http://www.w3.org/1999/xhtml', // für XPath
'' => 'http://www.w3.org/1999/xhtml', // default ns
'hx' => 'http://purl.org/NET/hinclude', // fuer HInclude http://mnot.github.io/hinclude/; ein Weg um z.B. Controller in Symfony per Ajax zu embedden
'html' => 'http://www.w3.org/1999/xhtml',
'' => 'http://www.w3.org/1999/xhtml',
'hx' => 'http://purl.org/NET/hinclude',
];
}

return [
'' => 'http://www.w3.org/1999/xhtml', // default ns
'html' => 'http://www.w3.org/1999/xhtml', // für XPath
'' => 'http://www.w3.org/1999/xhtml', // ignored by BaseParsingHelper::createXPath(), but defining the default namespace that will be assumed to be active when BaseParsingHelper::parseFragment() is called and no explicit namespace declarations are given
'html' => 'http://www.w3.org/1999/xhtml', // so XPath expressions can use the "html" prefix to match the current HTML variant (unless an explicit mapping is given to BaseParsingHelper::createXPath())
'hx' => 'http://purl.org/NET/hinclude', // fuer HInclude http://mnot.github.io/hinclude/; ein Weg um z.B. Controller in Symfony per Ajax zu embedden
];
}
Expand Down
95 changes: 94 additions & 1 deletion tests/Webfactory/Dom/Test/PolyglotHTML5ParsingHelperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@

namespace Webfactory\Dom\Test;

use Webfactory\Dom\PolyglotHTML5ParsingHelper;

class PolyglotHTML5ParsingHelperTest extends HTMLParsingHelperTest
{
protected function createParsingHelper()
{
return new \Webfactory\Dom\PolyglotHTML5ParsingHelper();
return new PolyglotHTML5ParsingHelper();
}

/**
Expand Down Expand Up @@ -88,4 +90,95 @@ public function svgNamespaceIsNotReconciled()
'<div><svg xmlns="http://www.w3.org/2000/svg" class="x" width="300" height="150" viewBox="0 0 300 150"><path fill="#FF7949" d="M300 5.49c0-2.944-1.057-4.84-2.72-5.49h-2.92c-.79.247-1.632.67-2.505 1.293L158.145 96.56c-4.48 3.19-11.81 3.19-16.29 0L8.146 1.292C7.27.67 6.43.247 5.64 0H2.72C1.056.65 0 2.546 0 5.49V150h300V5.49z"></path></svg></div>'
);
}

/**
* @test
* @dataProvider provideXpathForDocuments
*/
public function xpathParseDocument($xml, $xpathExpression)
{
$document = $this->parser->parseDocument($xml);
$xpath = $this->parser->createXPath($document);

$domNodeList = $xpath->query($xpathExpression);

self::assertCount(1, $domNodeList);
self::assertSame('test', $domNodeList[0]->textContent);
}

public function provideXpathForDocuments()
{
yield 'HTML document that does not use a default namespace' => [
/*
In this document, nodes are not in a namespace at all. Thus, we have to use the
XPath expression "//p" which searches for an item _not associated with a namespace_.

Note that this _should not be done in practice_, since HTML5 has a built-in, undeclared "native"
default namespace for the <html> element.

The libxml XML parser, however, does not know about HTML5 - only about XML. This is why the
Polyglot spec (https://www.w3.org/TR/html-polyglot/#h4_element-level-namespaces) states that

<html xmlns="http://www.w3.org/1999/xhtml">

... should be used to achieve the same semantics for HTML5-aware and XML-only parsers.
*/
'<html><body><p>test</p></body></html>',
'//p',
];

yield 'HTML document that uses a default namespace' => [
/*
In this document, a default namespace is used. All nodes are associated with this namespace.
The XPath expression has to match namespaced nodes, and "//p" would be a node _without_ a
namespace. -> We have to register a namespace on the Xpath expression, and use its prefix.

If we don't give an explicit namespace mapping when creating the xpath expression, the
\Webfactory\Dom\BaseParsingHelper::createXPath() will register the ParsingHelper's implicit
namespaces for us as convenience. That includes the "html" namespace prefix for the URI
according to the current HTML variant (XHTML vs HTML5) in use.
*/
'<html xmlns="http://www.w3.org/1999/xhtml"><body><p>test</p></body></html>',
'//html:p',
];

yield 'HTML document with explicit namespace' => [
/*
Basically, as before, this time using an explicit namespace prefix.
*/
'<html xmlns:foo="http://www.w3.org/1999/xhtml"><foo:body><foo:p>test</foo:p></foo:body></html>',
'//html:p',
];
}

/**
* @test
* @dataProvider provideXpathForFragments
*/
public function xpathParseFragment($xmlFragment, $xpathExpression)
{
$fragment = $this->parser->parseFragment($xmlFragment);
$xpath = $this->parser->createXPath($fragment);

$domNodeList = $xpath->query($xpathExpression);

self::assertCount(1, $domNodeList);
self::assertSame('test', $domNodeList[0]->textContent);
}

public function provideXpathForFragments()
{
yield 'default namespace assumed for fragments' => [
/*
When BaseParsingHelper::parseFragment() is used without passing a mapping of
namespaces, a 'default' assumption is made depending on the ParsingHelper instance.

For HTML5, this assumes fragment elements without namespace declarations live in the
http://www.w3.org/1999/xhtml namespace URI; this corresponds to the 'html' convenience
prefix set up in xpath expressions.
*/
'<p>test</p>',
'//html:p',
];
}
}