#! /usr/bin/env php
#foo
// dir/file1.html#bar
// dir/file2.html#baz
// ...
//
// Requires: PHP 8.x, HTML5-PHP
// Installation: $ sudo apt install php-masterminds-html5 php-mbstring
error_reporting(E_ALL);
set_include_path("/usr/share/php");
require("Masterminds/HTML5/autoload.php");
use Masterminds\HTML5;
function main($files) {
$exit_code = 0;
if (count($files) == 0) { fwrite(STDERR,"Wrong number of file arguments:"); fwrite(STDERR, implode(", ", $files)); exit(1); }
foreach ($files as $file) {
$bad_anchors = check_file($file);
foreach ($bad_anchors as $a) {
fprintf(STDERR, "%s\t%s\n", $file, $a);
$exit_code = 1;
}
}
exit($exit_code);
}
function check_file($file) {
$html = file_get_contents($file);
if (!$html) { fwrite(STDERR,"Failed to read file:"); fwrite(STDERR, $file); exit(2); }
if (preg_match("/^\s*$/", $html)) return [];
$html5 = new Masterminds\HTML5([
'disable_html_ns' => true,
]);
$dom = $html5->loadHTML($html);
return check_document($dom);
}
function check_document($dom) {
$ids = (new DOMXpath($dom))->query("//@id");
$id_set = ["#" => true, "#top" => true];
foreach ($ids as $id) {
$id_set["#" . $id->value] = true;
}
$bad_anchors = array();
$hrefs = (new DOMXpath($dom))->query("//a/@href");
foreach ($hrefs as $href) {
$value = urldecode(trim($href->value));
if (substr($value, 0, 1) !== "#") continue;
if (!array_key_exists($value, $id_set)) {
$bad_anchors[] = $value;
}
}
return $bad_anchors;
}
main(array_slice($argv, 1));