エントリー書くか、という時になってPlaggerCookbookのレシピに
無料でメル友を作る為のサイトplagger
なんて書いてあるのをみつけた。既にあるのかな?だいぶ前に
CSS selector で抽出かけるのは Plagger にもほしいかも
CSS selector to XPath - Bulknews::Subtech - subtech
って書いてあるし、HTML::Selector::XPathもmiyagawaさん作なのでありそうだけど、とりあえず習作ということで書いたやつを貼ってみる。
assets/plugins/Filter-EntryFullText/*.yamlでxpathなら
extract_xpath: title: //h2[@id="title"] body: //div[@class="section"]
とか書くところを
extract_selector: title: h2#title body: div.section
なんていう風に書ける。
Index: EntryFullText.pm =================================================================== --- EntryFullText.pm (リビジョン 1947) +++ EntryFullText.pm (作業コピー) @@ -258,7 +258,7 @@ my($self, $args) = @_; my $data; - unless ($self->{extract} || $self->{extract_xpath}) { + unless ($self->{extract} || $self->{extract_xpath} || $self->{extract_selector}) { Plagger->context->log(error => "YAML doesn't have either 'extract' nor 'extract_xpath'"); return; } @@ -271,19 +271,35 @@ } } - if ($self->{extract_xpath}) { + if ($self->{extract_xpath} || $self->{extract_selector}) { eval { require HTML::TreeBuilder::XPath }; if ($@) { Plagger->context->log(error => "HTML::TreeBuilder::XPath is required. $@"); return; } + my $selector = eval { + require HTML::Selector::XPath; + HTML::Selector::XPath->new; + }; + + if ($self->{extract_selector} && $@) { + Plagger->context->log(error => "HTML::Selector::XPath is required. $@"); + return; + } + + my $extractor = $self->{extract_selector} ? 'extract_selector' : 'extract_xpath'; + my $tree = HTML::TreeBuilder::XPath->new; $tree->parse($args->{content}); $tree->eof; - for my $capture (keys %{$self->{extract_xpath}}) { - my @children = $tree->findnodes($self->{extract_xpath}->{$capture}); + for my $capture (keys %{$self->{$extractor}}) { + my $xpath = $self->{extract_xpath}->{$capture} || do { + $selector->selector($self->{extract_selector}->{$capture}); + $selector->to_xpath; + }; + my @children = $tree->findnodes($xpath); if (@children) { no warnings 'redefine'; local *HTML::Element::_xml_escape = \&xml_escape; @@ -291,7 +307,7 @@ ? $children[0]->as_XML : $children[0]->getValue; } else { - Plagger->context->log(error => "Can't find node matching $self->{extract_xpath}->{$capture}"); + Plagger->context->log(error => "Can't find node matching $self->{$extractor}->{$capture}"); } } }