ã¨ã³ããªã¼æ¸ãããã¨ããæã«ãªã£ã¦PlaggerCookbookã®ã¬ã·ãã«
無料でメル友を作る為のサイトplagger
ãªãã¦æ¸ãã¦ããã®ãã¿ã¤ãããæ¢ã«ããã®ããªï¼ã ãã¶åã«
CSS selector ã§æ½åºãããã®ã¯ Plagger ã«ãã»ãããã
CSS selector to XPath - Bulknews::Subtech - subtech
ã£ã¦æ¸ãã¦ããããHTML::Selector::XPathãmiyagawaさん作ãªã®ã§ããããã ãã©ãã¨ããããç¿ä½ã¨ãããã¨ã§æ¸ãããã¤ãè²¼ã£ã¦ã¿ãã
assets/plugins/Filter-EntryFullText/*.yamlã§xpathãªã
extract_xpath: title: //h2[@id="title"] body: //div[@class="section"]
ã¨ãæ¸ãã¨ããã
extract_selector: title: h2#title body: div.section
ãªãã¦ãã風ã«æ¸ããã
Index: EntryFullText.pm =================================================================== --- EntryFullText.pm (ãªãã¸ã§ã³ 1947) +++ EntryFullText.pm (ä½æ¥ã³ãã¼) @@ -258,7 +258,7 @@ my($self, $args) = @_; my $data; - unless ($self->{extract} || $self->{extract_xpath}) { + unless ($self->{extract} || $self->{extract_xpath} || $self->{extract_selector}) { Plagger->context->log(error => "YAML doesn't have either 'extract' nor 'extract_xpath'"); return; } @@ -271,19 +271,35 @@ } } - if ($self->{extract_xpath}) { + if ($self->{extract_xpath} || $self->{extract_selector}) { eval { require HTML::TreeBuilder::XPath }; if ($@) { Plagger->context->log(error => "HTML::TreeBuilder::XPath is required. $@"); return; } + my $selector = eval { + require HTML::Selector::XPath; + HTML::Selector::XPath->new; + }; + + if ($self->{extract_selector} && $@) { + Plagger->context->log(error => "HTML::Selector::XPath is required. $@"); + return; + } + + my $extractor = $self->{extract_selector} ? 'extract_selector' : 'extract_xpath'; + my $tree = HTML::TreeBuilder::XPath->new; $tree->parse($args->{content}); $tree->eof; - for my $capture (keys %{$self->{extract_xpath}}) { - my @children = $tree->findnodes($self->{extract_xpath}->{$capture}); + for my $capture (keys %{$self->{$extractor}}) { + my $xpath = $self->{extract_xpath}->{$capture} || do { + $selector->selector($self->{extract_selector}->{$capture}); + $selector->to_xpath; + }; + my @children = $tree->findnodes($xpath); if (@children) { no warnings 'redefine'; local *HTML::Element::_xml_escape = \&xml_escape; @@ -291,7 +307,7 @@ ? $children[0]->as_XML : $children[0]->getValue; } else { - Plagger->context->log(error => "Can't find node matching $self->{extract_xpath}->{$capture}"); + Plagger->context->log(error => "Can't find node matching $self->{$extractor}->{$capture}"); } } }