-
-
Notifications
You must be signed in to change notification settings - Fork 51
Add HTMLQuerySelectorAll & HTMLQuerySelector scalar functions
#1960
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
42905bc
Add `HTMLQuerySelectorAll` & `HTMLQuerySelector` scalar functions
stloyd dad705e
Add a `RequiredPHPVersionException`
stloyd 54625ff
Adjust phpstan.neon ignoring rules for < PHP 8.4
stloyd 70a947f
Enforce PHP requirement in a new HTML selector functions
stloyd File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
16 changes: 16 additions & 0 deletions
16
src/core/etl/src/Flow/ETL/Exception/RequiredPHPVersionException.php
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| <?php | ||
|
|
||
| declare(strict_types=1); | ||
|
|
||
| namespace Flow\ETL\Exception; | ||
|
|
||
| final class RequiredPHPVersionException extends RuntimeException | ||
| { | ||
| public function __construct(string $className, string $version, ?\Exception $previous = null) | ||
| { | ||
| parent::__construct( | ||
| "To use {$className} class, you need to upgrade your PHP version to: {$version}+.", | ||
| previous: $previous | ||
| ); | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| <?php | ||
|
|
||
| declare(strict_types=1); | ||
|
|
||
| namespace Flow\ETL\Function; | ||
|
|
||
| use Dom\{Element, HTMLDocument}; | ||
| use Flow\ETL\Exception\RequiredPHPVersionException; | ||
| use Flow\ETL\Row; | ||
|
|
||
| final class HTMLQuerySelector extends ScalarFunctionChain | ||
| { | ||
| public function __construct( | ||
| private readonly mixed $value, | ||
| private readonly ScalarFunction|string $selector, | ||
| ) { | ||
| if (!\class_exists('\Dom\HTMLDocument')) { | ||
| throw new RequiredPHPVersionException('\Dom\HTMLDocument', '8.4'); | ||
| } | ||
| } | ||
|
|
||
| public function eval(Row $row) : ?Element | ||
| { | ||
| $value = (new Parameter($this->value))->asInstanceOf($row, HTMLDocument::class); | ||
| $selector = (new Parameter($this->selector))->asString($row); | ||
|
|
||
| if (null === $value || null === $selector) { | ||
| return null; | ||
| } | ||
|
|
||
| return $value->querySelector($selector); | ||
| } | ||
| } | ||
52 changes: 52 additions & 0 deletions
52
src/core/etl/src/Flow/ETL/Function/HTMLQuerySelectorAll.php
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| <?php | ||
|
|
||
| declare(strict_types=1); | ||
|
|
||
| namespace Flow\ETL\Function; | ||
|
|
||
| use DOM\{Element, HTMLDocument}; | ||
| use Flow\ETL\Exception\RequiredPHPVersionException; | ||
| use Flow\ETL\Row; | ||
|
|
||
| final class HTMLQuerySelectorAll extends ScalarFunctionChain | ||
| { | ||
| public function __construct( | ||
| private readonly mixed $value, | ||
| private readonly ScalarFunction|string $selector, | ||
| ) { | ||
| if (!\class_exists('\Dom\HTMLDocument')) { | ||
| throw new RequiredPHPVersionException('\Dom\HTMLDocument', '8.4'); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * @return null|array<Element> | ||
| */ | ||
| public function eval(Row $row) : ?array | ||
| { | ||
| $value = (new Parameter($this->value))->asInstanceOf($row, HTMLDocument::class); | ||
| $selector = (new Parameter($this->selector))->asString($row); | ||
|
|
||
| if (null === $value || null === $selector) { | ||
| return null; | ||
| } | ||
|
|
||
| $result = $value->querySelectorAll($selector); | ||
|
|
||
| if (0 === $result->count()) { | ||
| return null; | ||
| } | ||
|
|
||
| $nodes = []; | ||
|
|
||
| foreach ($result as $node) { | ||
| if (!$node instanceof Element) { | ||
| continue; | ||
| } | ||
|
|
||
| $nodes[] = $node; | ||
| } | ||
|
|
||
| return $nodes; | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
50 changes: 50 additions & 0 deletions
50
src/core/etl/tests/Flow/ETL/Tests/Integration/Function/HTMLQuerySelectorAllTest.php
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| <?php | ||
|
|
||
| declare(strict_types=1); | ||
|
|
||
| namespace Flow\ETL\Tests\Integration\Function; | ||
|
|
||
| use function Flow\ETL\DSL\{df, from_rows, html_entry, ref, row, rows}; | ||
| use Dom\{Element, HTMLDocument}; | ||
| use PHPUnit\Framework\Attributes\RequiresPhp; | ||
| use PHPUnit\Framework\TestCase; | ||
|
|
||
| #[RequiresPhp('>= 8.4')] | ||
| final class HTMLQuerySelectorAllTest extends TestCase | ||
| { | ||
| public function test_invalid_query_all_on_html_document() : void | ||
| { | ||
| /* @phpstan-ignore-next-line */ | ||
| $html = HTMLDocument::createFromString('<!DOCTYPE html><html lang="en"><head></head><body><div><span>foobar</span></div></body></html>'); | ||
|
|
||
| $rows = df() | ||
| ->read(from_rows(rows(row(html_entry('html_raw', $html))))) | ||
| ->withEntry('html', ref('html_raw')->htmlQuerySelectorAll('body div p')) | ||
| ->drop('html_raw') | ||
| ->fetch(); | ||
|
|
||
| $results = $rows->toArray()[0]['html'] ?? []; | ||
|
|
||
| /* @phpstan-ignore-next-line */ | ||
| self::assertCount(0, $results); | ||
| } | ||
|
|
||
| public function test_valid_query_all_on_html_document() : void | ||
| { | ||
| /* @phpstan-ignore-next-line */ | ||
| $html = HTMLDocument::createFromString('<!DOCTYPE html><html lang="en"><head></head><body><div><span>foobar</span></div></body></html>'); | ||
|
|
||
| $rows = df() | ||
| ->read(from_rows(rows(row(html_entry('html_raw', $html))))) | ||
| ->withEntry('html', ref('html_raw')->htmlQuerySelectorAll('body div span')) | ||
| ->drop('html_raw') | ||
stloyd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ->fetch(); | ||
|
|
||
| $results = $rows->toArray()[0]['html'] ?? []; | ||
|
|
||
| /* @phpstan-ignore-next-line */ | ||
| self::assertCount(1, $results); | ||
| /* @phpstan-ignore-next-line */ | ||
| self::assertContainsOnlyInstancesOf(Element::class, $results); | ||
| } | ||
| } | ||
31 changes: 31 additions & 0 deletions
31
src/core/etl/tests/Flow/ETL/Tests/Integration/Function/HTMLQuerySelectorTest.php
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| <?php | ||
|
|
||
| declare(strict_types=1); | ||
|
|
||
| namespace Flow\ETL\Tests\Integration\Function; | ||
|
|
||
| use function Flow\ETL\DSL\{df, from_rows, html_entry, ref, row, rows}; | ||
| use Dom\HTMLDocument; | ||
| use PHPUnit\Framework\Attributes\RequiresPhp; | ||
| use PHPUnit\Framework\TestCase; | ||
|
|
||
| #[RequiresPhp('>= 8.4')] | ||
| final class HTMLQuerySelectorTest extends TestCase | ||
| { | ||
| public function test_invalid_query_on_html_document() : void | ||
| { | ||
| /* @phpstan-ignore-next-line */ | ||
| $html = HTMLDocument::createFromString('<!DOCTYPE html><html lang="en"><head></head><body><div><span>foobar</span></div></body></html>'); | ||
|
|
||
| $rows = df() | ||
| ->read(from_rows(rows(row(html_entry('html_raw', $html))))) | ||
| ->withEntry('html', ref('html_raw')->htmlQuerySelector('body div p')) | ||
| ->drop('html_raw') | ||
| ->fetch(); | ||
|
|
||
| $results = $rows->toArray()[0]['html'] ?? []; | ||
|
|
||
| /* @phpstan-ignore-next-line */ | ||
| self::assertCount(0, $results); | ||
| } | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.