����ϡ� Kazuho@Cybozu Labs: Lingua::JA::Summarize 0.02�Ǥ���
����ǥ�Ȥ�tab("\t" ; "\x09")��Ȥ��ΤϤ��ޤ��礦��tab��ɬ��space("\x20")�˳�ĥ���ޤ��礦��
10: use base qw(Exporter Class::Accessor::Fast Class::ErrorHandler); 18: __PACKAGE__->mk_accessors(qw(mecab default_cost ng omit_number singlechar_factor alnum_as_word url_as_word jaascii_as_word));
32: sub new { 33: my ($proto, $fields) = @_; 34: my $class = ref $proto || $proto; 35: $fields = {} unless defined $fields; 36: my $self = bless { %$fields }, $class; 37: 38: $self->{mecab} = 'mecab' unless $self->{mecab}; 39: $self->{default_cost} = 800 unless $self->{default_cost}; 40: $self->{ng} = NG unless defined $self->{ng}; 41: $self->{omit_number} = 1 unless defined $self->{omit_number}; 42: $self->{singlechar_factor} = 0.5 unless defined $self->{singlechar_factor}; 43: $self->{alnum_as_word} = 1 unless defined $self->{alnum_as_word}; 44: $self->{url_as_word} = 1 unless defined $self->{url_as_word}; 45: $self->{jaascii_as_word} = 1 unless defined $self->{jaascii_as_word}; 46: 47: return $self; 48: }
my %Fields = ( mecab => 'mecab', default_cost => 800, ng => NG(), omit_number => 1, singlechar_factor => 0.5, alnum_as_word => 1, url_as_word => 1, jaascii_as_word, => 1, ); __PACKAGE__->mk_accessors(keys %Fields); sub new { my ($proto, $fields) = @_; my $class = ref $proto || $proto; return bless { %Fields, %$fields }, $class; }
�����֤��DRY (Don't Repeat Yourself)�ˤʤ�ޤ����͡�
use Data::Dumper; my %Default = ( Kazuho => 'Cybozu', Naoya => 'Hatena' ); print Dumper(\%Default); my $args = { Naoya => 'Hamachi' }; my $result = { %Default, %$args }; print Dumper($result);
�ʤ󤫽��ؤ��Ф���ʴ��������ޤ��͡����ϡ�@hash{@keys} = @value�ˤĤ��ơ�����Ϥ狼��ˤ����ʤ�Τǡ�������ɬ�פʻ��ʳ��Ϥ��Ȥ��ޤ��礦��
21: sub NG () { 22: my %map; 23: @map{('(', ')', '#', ',')} = (); 24: @map{qw(! " $ % & ' * + - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~)} = (); 25: @map{ 26: qw(�� �� ʬ �� �� �� ǯ �� �ɥ� 27: �� �� �� �� �� ϻ �� Ȭ �� �� ɴ �� �� �� ��)} = (); 28: @map{qw(�� �� �� �� �� ��)} = (); 29: \%map; 30: }
sub NG () { my %map = map { $_ => 1 } ( '(', ')', '#', ',', qw( ! " $ % & ' * + - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~ �� �� ʬ �� �� �� ǯ �� �ɥ� �� �� �� �� �� ϻ �� Ȭ �� �� ɴ �� �� �� �� �� �� �� �� �� �� ), ); return \%map; }
my %NG = map { $_ => 1 } ( '(', ')', '#', ',', qw( ! " $ % & ' * + - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~ �� �� ʬ �� �� �� ǯ �� �ɥ� �� �� �� �� �� ϻ �� Ȭ �� �� ɴ �� �� �� �� �� �� �� �� �� �� ), ); my %Fields = ( mecab => 'mecab', default_cost => 800, ng => \%NG, omit_number => 1, singlechar_factor => 0.5, alnum_as_word => 1, url_as_word => 1, jaascii_as_word => 1, );
70: sub analyze_file { 71: my ($self, $file) = @_; 72: 73: my $fh; 74: open($fh, '<', "$file") || croak("failed to open: $file: $!"); 75: my $slash = $/; 76: undef $/; 77: my $text = <$fh>; 78: $/ = $slash; 79: close $fh; 80: 81: $self->analyze($text); 82: }
open() || die�ǤϤʤ���open ... or die�ˤ��ޤ��礦�����ȡ�perl�ˤ�local()�����뤳�Ȥ�Ф��Ƥ����ޤ��礦��
sub analyze_file { my ($self, $file) = @_; open my $fh, '<:raw' $file or croak "failed to open: $file: $!"; my $text = do{ local $/; <$fh> }; # Perl Best Practices pp.213 close $fh; $self->analyze($text); }
84: sub analyze { # .... 97: # write text to temporary file 98: my ($fh, $tempfile) = tmpnam(); 99: print $fh $text; 100: close $fh; 101: 102: # open mecab 103: my $mecab = $self->mecab; 104: my $def_cost = $self->default_cost; 105: open($fh, '-|', 106: $mecab . 107: " --node-format='%m\t%pn\t%pw\t%H\n'" . 108: " --unk-format='%m\t$def_cost\t$def_cost\tUnkType\n'" . 109: " --bos-format='\n'" . 110: " --eos-format='\n'" . 111: " $tempfile") 112: || croak("failed to call mecab ($mecab): $!");
�����С�IPC::Open2()��Ȥäơ�����ե������Ȥ�ʤ��Ȥ����Τ��������ΤǤ���������ϥƥ��˥å��Ȥ��ƤϹ��٤ʤΤǡ�����ʤ������ϰ���ե������ȤäƤ⤤���Ǥ��礦��������������open�Ϥ��������ʤ�����ͳ��perldoc perlsec���������������ʲ��Τ褦�ˤ��٤��Ǥ���
local(%ENV); $ENV{PATH} = '/usr/local/bin:/usr/bin:/bin'; delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; open $fh, '-|' or exec {$mecab}, "--node-format='%m\t%pn\t%pw\t%H\n'", "--unk-format='%m\t$def_cost\t$def_cost\tUnkType\n'", "--bos-format='\n'", "--eos-format='\n'", $tempfile or croak "failed to call mecab ($mecab): $!";
�����ԤΤȤ����ϡ�Perl Cookbook���������Ǥκ��ɤν�Ǥ��礦�����ܸ��Ǥϻ��ƽ�������äƤޤ���
���ȡ����Ǥ� Perl 5.8 ���Ф��ΤǤ����顢Literal��UTF-8�ǽ񤤤�����better practice�ǤϤ���ޤ��������ԤϤޤ��Υ��ϥ���¿�����Ǥ���Ƥ���ȤϤ����������ΤǤ����������Ĥ���ɮ����򤤤������Ƥ���Τǡ���ǯ��ˤϽ��Ҥʤ�������η��Ǥ��Ȥɤ������Ǥ��礦��������󤳤��ˤ��ɡ��񤤤Ƥ����ޤ���
Dan the Best Practitioner