utf8ãã©ã°ãä¸æ°ã«è½ã¨ãã¢ã¸ã¥ã¼ã« Unicode::RecursiveDowngrad
http://blog.livedoor.jp/nipotan/archives/50228106.html
ããã¼
ããã·ã¥ãé
åã®utfï¼ãã©ã°ãä¸æ¬ã§è½ã¨ãããã¨ãã¯ããã使ãã¨ãããããã
さっきのWeb::Scraperテストコードã§è©¦ãã¦ã¿ãã
è½ã¨ãå
use Web::Scraper; use LWP::Simple; use Data::Dumper; my $scraper = scraper { process '//th[@class="rank2"]/../td', 'items[]' => scraper { process '//div[@class="title"]/p/a', 'text[]' => 'TEXT'; process '//div[@class="item"]/a', 'link[]' => '@href'; process '//div[@class="item"]/a/img', 'image[]' => '@src'; process '//div[@class="price"]/span', 'price[]' => 'TEXT'; }; }; my $result = $scraper->scrape(get("http://kakaku.com/game/game-console/")); print Dumper($result);
çµæï¼
$VAR1 = { 'items' => [ { 'link' => [ 'http://kakaku.com/item/20502010150/' ], 'text' => [ "Wii [\x{30a6}\x{30a3}\x{30fc}] (Wii\x{30ea}\x{30e2}\x{30b3}\x{30f3}\x{30b8}\x{30e3}\x{30b1}\x{30c3}\x{30c8}\x{540c}\x{68b1})" ], 'price' => [ "\x{a5}17,759" ], 'image' => [ 'http://img.kakaku.com/images/productimage/m/20502010150.jpg' ] }, { 'link' => [ 'http://kakaku.com/item/K0000080401/' ], 'text' => [ "\x{30d7}\x{30ec}\x{30a4}\x{30b9}\x{30c6}\x{30fc}\x{30b7}\x{30e7}\x{30f3}3 HDD 250GB \x{30c1}\x{30e3}\x{30b3}\x{30fc}\x{30eb}\x{30fb}\x{30d6}\x{30e9}\x{30c3}\x{30af} CECH-2000B" ], 'price' => [ "\x{a5}31,078" ], 'image' => [ 'http://img.kakaku.com/images/productimage/m/K0000080401.jpg' ] }, { 'link' => [ 'http://kakaku.com/item/20504010167/' ], 'text' => [ "PSP \x{30d7}\x{30ec}\x{30a4}\x{30b9}\x{30c6}\x{30fc}\x{30b7}\x{30e7}\x{30f3}\x{30fb}\x{30dd}\x{30fc}\x{30bf}\x{30d6}\x{30eb} \x{30d4}\x{30a2}\x{30ce}\x{30fb}\x{30d6}\x{30e9}\x{30c3}\x{30af} PSP-3000 PB" ], 'price' => [ "\x{a5}15,400" ], 'image' => [ 'http://img.kakaku.com/images/productimage/m/20504010167.jpg' ] } ] };
ãã©ã°ä»ãæååã«ãªã£ã¦ãã
Unicode::RecursiveDowngradã使ãã¨â¦
use Web::Scraper; use LWP::Simple; use Data::Dumper; use Unicode::RecursiveDowngrade; my $scraper = scraper { process '//th[@class="rank2"]/../td', 'items[]' => scraper { process '//div[@class="title"]/p/a', 'text[]' => 'TEXT'; process '//div[@class="item"]/a', 'link[]' => '@href'; process '//div[@class="item"]/a/img', 'image[]' => '@src'; process '//div[@class="price"]/span', 'price[]' => 'TEXT'; }; }; my $result = $scraper->scrape(get("http://kakaku.com/game/game-console/")); $result = Unicode::RecursiveDowngrade->new->downgrade($result); print Dumper($result);
çµæï¼
$VAR1 = { 'items' => [ { 'link' => [ 'http://kakaku.com/item/20502010150/' ], 'text' => [ 'Wii [ã¦ã£ã¼] (Wiiãªã¢ã³ã³ã¸ã£ã±ããå梱)' ], 'price' => [ '\17,759' ], 'image' => [ 'http://img.kakaku.com/images/productimage/m/20502010150.jpg' ] }, { 'link' => [ 'http://kakaku.com/item/K0000080401/' ], 'text' => [ 'ãã¬ã¤ã¹ãã¼ã·ã§ã³3 HDD 250GB ãã£ã³ã¼ã«ã»ãã©ã㯠CECH-2000B' ], 'price' => [ '\31,078' ], 'image' => [ 'http://img.kakaku.com/images/productimage/m/K0000080401.jpg' ] }, { 'link' => [ 'http://kakaku.com/item/20504010167/' ], 'text' => [ 'PSP ãã¬ã¤ã¹ãã¼ã·ã§ã³ã»ãã¼ã¿ãã« ãã¢ãã»ãã©ã㯠PSP-3000 PB' ], 'price' => [ '\15,400' ], 'image' => [ 'http://img.kakaku.com/images/productimage/m/20504010167.jpg' ] } ] };
ããã¼ãç´ æ´ããã