Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions phpstan.neon
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,23 @@ parameters:
-
function: 'dj()'

ignoreErrors:
-
identifier: class.notFound
path: src/core/etl/src/Flow/ETL/Row/Entry/HTMLEntry.php

-
identifier: class.notFound
path: src/core/etl/src/Flow/ETL/Function/HTMLQuerySelector.php

-
identifier: class.notFound
path: src/core/etl/src/Flow/ETL/Function/HTMLQuerySelectorAll.php

-
identifier: class.notFound
path: src/lib/types/src/Flow/Types/Type/Logical/HTMLType.php

includes:
- tools/phpstan/vendor/spaze/phpstan-disallowed-calls/extension.neon

Expand Down
6 changes: 3 additions & 3 deletions src/core/etl/src/Flow/ETL/DSL/functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
type_xml_element as type_xml_element_new,
types as types_new
};
use Dom\HTMLDocument;
use Flow\Calculator\Rounding;
use Flow\ETL\{Analyze,
Attribute\DocumentationDSL,
Expand Down Expand Up @@ -205,7 +206,6 @@
UnionType
};
use Flow\Types\Type\Types;
use Flow\Types\Value\HTMLDocument;
use UnitEnum;

/**
Expand Down Expand Up @@ -632,7 +632,7 @@ function xml_element_entry(string $name, \DOMElement|string|null $value, ?Metada
* @return Entry<?HTMLDocument>
*/
#[DocumentationDSL(module: Module::CORE, type: DSLType::ENTRY)]
function html_entry(string $name, HTMLDocument|string|null $value, ?Metadata $metadata = null) : Entry
function html_entry(string $name, HTMLDocument|string|null $value, ?Metadata $metadata = null) : Entry // @phpstan-ignore class.notFound,class.notFound
{
return new HTMLEntry($name, $value, $metadata);
}
Expand Down Expand Up @@ -1973,7 +1973,7 @@ function json_schema(string $name, bool $nullable = false, ?Metadata $metadata =
* @return Definition<HTMLDocument>
*/
#[DocumentationDSL(module: Module::CORE, type: DSLType::SCHEMA)]
function html_schema(string $name, bool $nullable = false, ?Metadata $metadata = null) : Definition
function html_schema(string $name, bool $nullable = false, ?Metadata $metadata = null) : Definition // @phpstan-ignore class.notFound
{
return Definition::html($name, $nullable, $metadata);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Exception;

final class RequiredPHPVersionException extends RuntimeException
{
public function __construct(string $className, string $version, ?\Exception $previous = null)
{
parent::__construct(
"To use {$className} class, you need to upgrade your PHP version to: {$version}+.",
previous: $previous
);
}
}
33 changes: 33 additions & 0 deletions src/core/etl/src/Flow/ETL/Function/HTMLQuerySelector.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Function;

use Dom\{Element, HTMLDocument};
use Flow\ETL\Exception\RequiredPHPVersionException;
use Flow\ETL\Row;

final class HTMLQuerySelector extends ScalarFunctionChain
{
public function __construct(
private readonly mixed $value,
private readonly ScalarFunction|string $selector,
) {
if (!\class_exists('\Dom\HTMLDocument')) {
throw new RequiredPHPVersionException('\Dom\HTMLDocument', '8.4');
}
}

public function eval(Row $row) : ?Element
{
$value = (new Parameter($this->value))->asInstanceOf($row, HTMLDocument::class);
$selector = (new Parameter($this->selector))->asString($row);

if (null === $value || null === $selector) {
return null;
}

return $value->querySelector($selector);
}
}
52 changes: 52 additions & 0 deletions src/core/etl/src/Flow/ETL/Function/HTMLQuerySelectorAll.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Function;

use DOM\{Element, HTMLDocument};
use Flow\ETL\Exception\RequiredPHPVersionException;
use Flow\ETL\Row;

final class HTMLQuerySelectorAll extends ScalarFunctionChain
{
public function __construct(
private readonly mixed $value,
private readonly ScalarFunction|string $selector,
) {
if (!\class_exists('\Dom\HTMLDocument')) {
throw new RequiredPHPVersionException('\Dom\HTMLDocument', '8.4');
}
}

/**
* @return null|array<Element>
*/
public function eval(Row $row) : ?array
{
$value = (new Parameter($this->value))->asInstanceOf($row, HTMLDocument::class);
$selector = (new Parameter($this->selector))->asString($row);

if (null === $value || null === $selector) {
return null;
}

$result = $value->querySelectorAll($selector);

if (0 === $result->count()) {
return null;
}

$nodes = [];

foreach ($result as $node) {
if (!$node instanceof Element) {
continue;
}

$nodes[] = $node;
}

return $nodes;
}
}
10 changes: 10 additions & 0 deletions src/core/etl/src/Flow/ETL/Function/ScalarFunctionChain.php
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,16 @@ public function hash(Algorithm $algorithm = new NativePHPHash()) : Hash
return new Hash($this, $algorithm);
}

public function htmlQuerySelector(ScalarFunction|string $path) : HTMLQuerySelector
{
return new HTMLQuerySelector($this, $path);
}

public function htmlQuerySelectorAll(ScalarFunction|string $path) : HTMLQuerySelectorAll
{
return new HTMLQuerySelectorAll($this, $path);
}

/**
* Returns the index of given $needle in string.
*/
Expand Down
8 changes: 4 additions & 4 deletions src/core/etl/src/Flow/ETL/Row/Entry/HTMLEntry.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
namespace Flow\ETL\Row\Entry;

use function Flow\Types\DSL\{type_equals, type_html, type_optional};
use Dom\HTMLDocument;
use Flow\ETL\Row\{Entry, Reference};
use Flow\ETL\Schema\{Definition, Metadata};
use Flow\Types\Type;
use Flow\Types\Value\HTMLDocument;

/**
* @implements Entry<?HTMLDocument>
Expand All @@ -32,7 +32,7 @@ public function __construct(
?Metadata $metadata = null,
) {
if (\is_string($value)) {
$this->value = HTMLDocument::fromString($value);
$this->value = HTMLDocument::createFromString($value);
} else {
$this->value = $value;
}
Expand Down Expand Up @@ -75,7 +75,7 @@ public function isEqual(Entry $entry) : bool
return false;
}

return $entry->value()?->toString() === $this->value?->toString();
return $entry->value()?->saveHtml() === $this->value?->saveHtml();
}

public function map(callable $mapper) : self
Expand All @@ -99,7 +99,7 @@ public function toString() : string
return '';
}

return $this->value->toString();
return $this->value->saveHtml();
}

public function type() : Type
Expand Down
5 changes: 3 additions & 2 deletions src/core/etl/src/Flow/ETL/Schema/Definition.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@
type_xml,
type_xml_element,
types};
use Dom\HTMLDocument;
use Flow\ETL\Exception\{InvalidArgumentException, RuntimeException};
use Flow\ETL\Row\{Entry, EntryReference, Reference};
use Flow\Types\Type;
use Flow\Types\Type\Logical\{ListType, MapType, OptionalType, StructureType};
use Flow\Types\Type\{Native\FloatType, Native\IntegerType, Native\UnionType, TypeFactory};
use Flow\Types\Value\{HTMLDocument, Uuid};
use Flow\Types\Value\Uuid;

/**
* @template-covariant T
Expand Down Expand Up @@ -142,7 +143,7 @@ public static function fromArray(array $definition) : self
/**
* @return Definition<HTMLDocument>
*/
public static function html(string|Reference $entry, bool $nullable = false, ?Metadata $metadata = null) : self
public static function html(string|Reference $entry, bool $nullable = false, ?Metadata $metadata = null) : self // @phpstan-ignore class.notFound
{
return new self($entry, type_html(), $nullable, $metadata);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ enum_entry,
float_entry,
from_array,
from_rows,
html_entry,
int_entry,
json_entry,
list_entry,
Expand Down Expand Up @@ -76,7 +75,6 @@ public function extract(FlowContext $context) : \Generator
),
enum_entry('enum', BackedStringEnum::three),
xml_entry('xml', '<xml><node id="123">test<foo>bar</foo></node></xml>'),
html_entry('html', '<!DOCTYPE html><html lang="en"><head></head><body></body></html>'),
),
);
}
Expand All @@ -86,15 +84,15 @@ enum_entry('enum', BackedStringEnum::three),

self::assertCommandOutputIdentical(
<<<'ASCIITABLE'
+------+------------+-----+---------+----------------------+-------+----------------------+---------+-------------------+----------------------+-------+----------------------+----------------------+
| id | price | 100 | deleted | created-at | phase | array | list | map | items | enum | xml | html |
+------+------------+-----+---------+----------------------+-------+----------------------+---------+-------------------+----------------------+-------+----------------------+----------------------+
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> | <!DOCTYPE html><html |
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> | <!DOCTYPE html><html |
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> | <!DOCTYPE html><html |
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> | <!DOCTYPE html><html |
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> | <!DOCTYPE html><html |
+------+------------+-----+---------+----------------------+-------+----------------------+---------+-------------------+----------------------+-------+----------------------+----------------------+
+------+------------+-----+---------+----------------------+-------+----------------------+---------+-------------------+----------------------+-------+----------------------+
| id | price | 100 | deleted | created-at | phase | array | list | map | items | enum | xml |
+------+------------+-----+---------+----------------------+-------+----------------------+---------+-------------------+----------------------+-------+----------------------+
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> |
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> |
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> |
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> |
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> |
+------+------------+-----+---------+----------------------+-------+----------------------+---------+-------------------+----------------------+-------+----------------------+
5 rows

ASCIITABLE,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Tests\Integration\Function;

use function Flow\ETL\DSL\{df, from_rows, html_entry, ref, row, rows};
use Dom\{Element, HTMLDocument};
use PHPUnit\Framework\Attributes\RequiresPhp;
use PHPUnit\Framework\TestCase;

#[RequiresPhp('>= 8.4')]
final class HTMLQuerySelectorAllTest extends TestCase
{
public function test_invalid_query_all_on_html_document() : void
{
/* @phpstan-ignore-next-line */
$html = HTMLDocument::createFromString('<!DOCTYPE html><html lang="en"><head></head><body><div><span>foobar</span></div></body></html>');

$rows = df()
->read(from_rows(rows(row(html_entry('html_raw', $html)))))
->withEntry('html', ref('html_raw')->htmlQuerySelectorAll('body div p'))
->drop('html_raw')
->fetch();

$results = $rows->toArray()[0]['html'] ?? [];

/* @phpstan-ignore-next-line */
self::assertCount(0, $results);
}

public function test_valid_query_all_on_html_document() : void
{
/* @phpstan-ignore-next-line */
$html = HTMLDocument::createFromString('<!DOCTYPE html><html lang="en"><head></head><body><div><span>foobar</span></div></body></html>');

$rows = df()
->read(from_rows(rows(row(html_entry('html_raw', $html)))))
->withEntry('html', ref('html_raw')->htmlQuerySelectorAll('body div span'))
->drop('html_raw')
->fetch();

$results = $rows->toArray()[0]['html'] ?? [];

/* @phpstan-ignore-next-line */
self::assertCount(1, $results);
/* @phpstan-ignore-next-line */
self::assertContainsOnlyInstancesOf(Element::class, $results);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Tests\Integration\Function;

use function Flow\ETL\DSL\{df, from_rows, html_entry, ref, row, rows};
use Dom\HTMLDocument;
use PHPUnit\Framework\Attributes\RequiresPhp;
use PHPUnit\Framework\TestCase;

#[RequiresPhp('>= 8.4')]
final class HTMLQuerySelectorTest extends TestCase
{
public function test_invalid_query_on_html_document() : void
{
/* @phpstan-ignore-next-line */
$html = HTMLDocument::createFromString('<!DOCTYPE html><html lang="en"><head></head><body><div><span>foobar</span></div></body></html>');

$rows = df()
->read(from_rows(rows(row(html_entry('html_raw', $html)))))
->withEntry('html', ref('html_raw')->htmlQuerySelector('body div p'))
->drop('html_raw')
->fetch();

$results = $rows->toArray()[0]['html'] ?? [];

/* @phpstan-ignore-next-line */
self::assertCount(0, $results);
}
}
Loading