Skip to content

Commit

Permalink
accept webmention verification for pages with only mf1
Browse files Browse the repository at this point in the history
if the document contains a link to the target, but that link is not in the parsed result, don't return an error, but also don't return the parsed document

closes #150
  • Loading branch information
aaronpk committed Apr 20, 2020
1 parent e8e20fc commit 37e297d
Show file tree
Hide file tree
Showing 8 changed files with 163 additions and 1 deletion.
4 changes: 4 additions & 0 deletions controllers/Parse.php
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ public function parse(Request $request, Response $response) {
$this->_pretty = true;
}

if($request->get('include-mf1')) {
$opts['include-mf1'] = $request->get('include-mf1') == 'false' ? false : true;
}

$url = $request->get('url');
$html = $request->get('html') ?: $request->get('body');

Expand Down
6 changes: 5 additions & 1 deletion lib/XRay/Formats/HTML.php
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ public static function parse($http, $http_response, $opts=[]) {
}
}

$mf2 = \mf2\Parse($html, $url);
$includeMF1 = true;
if(isset($opts['include-mf1']) && $opts['include-mf1'] == false)
$includeMF1 = false;

$mf2 = \Mf2\parse($html, $url, $includeMF1);

$canonical = false;

Expand Down
19 changes: 19 additions & 0 deletions lib/XRay/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,25 @@ public function parse($http_response, $opts=[]) {
} else {
$found = $this->_findLinkInTree($opts['target'], $document['data']);
$error_description = 'The Microformats at the source URL do not contain a link to the target URL. Check the source URL in a Microformats parser such as php.microformats.io';

if(!$found && isset($document['html'])) {
// If no link was found in the parsed mf2 tree, check for a link in the HTML
$found = $this->_findLinkInHTML($opts['target'], $document['html']);
// If there is a link, and if the HTML document has no mf2, then downgrade to a regular mention
if($found) {
$mf2Data = Formats\HTML::parse($this->http, $http_response, ['include-mf1'=>false]);
if(isset($mf2Data['data']['type']) && $mf2Data['data']['type'] == 'unknown') {
// Since the link was found in the HTML, but not in the parsed tree, it shouldn't return the parsed document
$document['data'] = [
'type' => 'unknown'
];
} else {
// Otherwise, the document did have mf2, but the link wasn't in it (checked earlier), so set found=false
$found = false;
}
}
}

}

if(!$found) {
Expand Down
69 changes: 69 additions & 0 deletions tests/ParseTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -1135,4 +1135,73 @@ public function testRelCanonical() {
$this->assertEquals('https://aaronparecki.com/2019/12/01/10/homeautomation', $data['data']['url']);
$this->assertEquals('https://aaronparecki.com/2019/12/01/10/homeautomation', $data['data']['rels']['canonical']);
}

public function testTargetLinkOutsideHEntry() {
$url = 'http://source.example.com/target-test-link-outside-h-entry';
$response = $this->parse(['url' => $url, 'target' => 'https://target.example.com/']);

$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);

$this->assertEquals('no_link_found', $data['error']);
}

public function testTargetLinkWithBadMf1() {
$url = 'http://source.example.com/target-test-only-bad-mf1';
$response = $this->parse(['url' => $url, 'target' => 'https://target.example.com/']);

$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);

$this->assertEquals('unknown', $data['data']['type']);
}

public function testTargetLinkWithValidMf1() {
$url = 'http://source.example.com/target-test-only-good-mf1';
$response = $this->parse(['url' => $url, 'target' => 'https://target.example.com/']);

$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);

$this->assertEquals('entry', $data['data']['type']);
$this->assertEquals('<a href="https://target.example.com/">target</a>', $data['data']['content']['html']);
}

public function testTargetLinkOutsideValidMf1() {
$url = 'http://source.example.com/target-test-link-outside-valid-mf1';
$response = $this->parse(['url' => $url, 'target' => 'https://target.example.com/']);

$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);

// Since the link was found in the HTML, but not in the parsed tree, it shouldn't return the parsed document
$this->assertEquals('unknown', $data['data']['type']);
}

public function testDisableMf1Parsing() {
$url = 'http://source.example.com/target-test-only-good-mf1';
$response = $this->parse(['url' => $url, 'include-mf1' => 'false']);

$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);

$this->assertEquals('unknown', $data['data']['type']);
}

public function testEnableMf1Parsing() {
$url = 'http://source.example.com/target-test-only-good-mf1';
$response = $this->parse(['url' => $url, 'include-mf1' => 'true']);

$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);

$this->assertEquals('entry', $data['data']['type']);
}

}
17 changes: 17 additions & 0 deletions tests/data/source.example.com/target-test-link-outside-h-entry
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive

<html>
<head>
<title>Test</title>
</head>
<body>
<div class="h-entry">
<p class="e-content">hello world</p>
</div>
<nav><a href="https://target.example.com/">target</a></nav>
</body>
</html>
17 changes: 17 additions & 0 deletions tests/data/source.example.com/target-test-link-outside-valid-mf1
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive

<html>
<head>
<title>Test</title>
</head>
<body>
<div class="hentry">
<p class="entry-content">hello world</p>
</div>
<nav><a href="https://target.example.com/">target</a></nav>
</body>
</html>
16 changes: 16 additions & 0 deletions tests/data/source.example.com/target-test-only-bad-mf1
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive

<html>
<head>
<title>Test</title>
</head>
<body>
<div class="hentry">
<p><a href="https://target.example.com/">target</a></p>
</div>
</body>
</html>
16 changes: 16 additions & 0 deletions tests/data/source.example.com/target-test-only-good-mf1
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive

<html>
<head>
<title>Test</title>
</head>
<body>
<div class="hentry">
<p class="entry-content"><a href="https://target.example.com/">target</a></p>
</div>
</body>
</html>

0 comments on commit 37e297d

Please sign in to comment.