�����Ǥ����������ź��λ��֤Ǥ���

����ϡ� Kazuho@Cybozu Labs: Lingua::JA::Summarize 0.02�Ǥ���

�ޤ��ϡ�Editor�dz����Ƥߤʤ��ȸ��Ť餤�Ȥ�����

����ǥ�Ȥ�tab("\t" ; "\x09")��Ȥ��ΤϤ��ޤ��礦��tab��ɬ��space("\x20")�˳�ĥ���ޤ��礦��

���֥��ȥåפο��ϡ��Ķ��ˤ�äưۤʤ�ޤ����ˤ�ؤ�餺�㤦�褦��ɽ�������ΤϤ����ʤ���Фʤ�ޤ���perl�ǤϤ������̿Ū�ǤϤ���ޤ��󤬡�python��haskell�Ǥ���̿Ū�Ǥ�����Perl�����Ǥ���YAML�ʤɤ�񤤤Ƥ������Ϥ���̿Ū�ˤʤ�ޤ���

���⥿�֥֡����פ�Ȥ��ʤȤ����ΤǤϤʤ��ΤǤ�������Υ��ǥ����Ǥϡ����֥����򲡤��ȡ�Ŭ�ڤʿ��Υ��ڡ���������Ƥ����褦������Ǥ��ޤ�(Emacs��cperl-mode�Ϥ��줬ɸ����ä��Ȼפ�)���������Ƥ��ޤ��Ф����ΤǤ���

���˽񤤤��Τϡ�perltidy�ǰ쵤��ľ���Ƥ��ޤ��Ф����Ǥ��礦��

���ˡ�accessor�λȤ�����accessor��Ȥ���硢ɬ��accessor��ͳ�ˤ��ޤ��礦���޳�

10: use base qw(Exporter Class::Accessor::Fast Class::ErrorHandler);
18: __PACKAGE__->mk_accessors(qw(mecab default_cost ng omit_number singlechar_factor alnum_as_word url_as_word jaascii_as_word));

�Ȥ��ʤ��顢

32: sub new {
33:     my ($proto, $fields) = @_;
34:     my $class = ref $proto || $proto;
35:     $fields = {} unless defined $fields;
36:     my $self = bless { %$fields }, $class;
37:     
38:     $self->{mecab} = 'mecab' unless $self->{mecab};
39:     $self->{default_cost} = 800 unless $self->{default_cost};
40:     $self->{ng} = NG unless defined $self->{ng};
41:     $self->{omit_number} = 1 unless defined $self->{omit_number};
42:     $self->{singlechar_factor} = 0.5 unless defined $self->{singlechar_factor};
43:     $self->{alnum_as_word} = 1 unless defined $self->{alnum_as_word};
44:     $self->{url_as_word} = 1 unless defined $self->{url_as_word};
45:     $self->{jaascii_as_word} = 1 unless defined $self->{jaascii_as_word};
46:     
47:     return $self;
48: }

�Ǥ���̵���Ǥ���

�ޤ����޳�Hash��ȤäƤ���ΤǤ����顢��������Ȥ��äȤ��줤�ˤʤ�ޤ���

my %Fields = 
    (
     mecab             => 'mecab',
     default_cost      => 800,
     ng                => NG(),
     omit_number       => 1,
     singlechar_factor => 0.5,
     alnum_as_word     => 1,
     url_as_word       => 1,
     jaascii_as_word,  => 1,
    );

__PACKAGE__->mk_accessors(keys %Fields);

sub new {
    my ($proto, $fields) = @_;
    my $class = ref $proto || $proto;
    return bless { %Fields, %$fields }, $class;
}

�����֤��DRY (Don't Repeat Yourself)�ˤʤ�ޤ����͡�

�����Ǥ�new�ϡ�perl��������褫������Τˤʤ�ޤ����֤ʤ�Ǥ��줬ư����?�פȤ狼��ʤ����ϡ��ʲ���code���ƤߤƲ�������

use Data::Dumper;
my %Default = ( Kazuho => 'Cybozu', Naoya => 'Hatena' );
print Dumper(\%Default);
my $args   = { Naoya => 'Hamachi' };
my $result = { %Default, %$args };
print Dumper($result);

�ʤ󤫽��ؤ��Ф���ʴ��������ޤ��͡����ϡ�@hash{@keys} = @value�ˤĤ��ơ�����Ϥ狼��ˤ����ʤ�Τǡ�������ɬ�פʻ��ʳ��Ϥ��Ȥ��ޤ��礦��

21: sub NG () {
22:     my %map;
23:     @map{('(', ')', '#', ',')} = ();
24:     @map{qw(! " $ % & ' * + - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~)} = ();
25:     @map{
26:     qw(�� �� ʬ �� �� �� ǯ �� �ɥ�
27:        �� �� �� �� �� ϻ �� Ȭ �� �� ɴ �� �� �� ��)} = ();
28:     @map{qw(�� �� �� �� �� ��)} = ();
29:     \%map;
30: }

����ʤ��Ȥ��ʤ��Ƥ⡢

sub NG () {
    my %map =
        map { $_ => 1 }
            (
             '(', ')', '#', ',',
             qw( ! " $ % & ' * + - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~
                 �� �� ʬ �� �� �� ǯ �� �ɥ�
                 �� �� �� �� �� ϻ �� Ȭ �� �� ɴ �� �� �� ��
                �� �� �� �� �� �� ),
             );
    return \%map;
}

�Ǥ����ΤǤ��������⡢�������ۤɤ�%Fields�ΰ����Ǥ�������������ʤΤǤ����顢

my %NG =
    map { $_ => 1 }
        (
         '(', ')', '#', ',',
         qw( ! " $ % & ' * + - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~
             �� �� ʬ �� �� �� ǯ �� �ɥ�
             �� �� �� �� �� ϻ �� Ȭ �� �� ɴ �� �� �� ��
            �� �� �� �� �� �� ),
         );
my %Fields = 
    (
     mecab             => 'mecab',
     default_cost      => 800,
     ng                => \%NG,
     omit_number       => 1,
     singlechar_factor => 0.5,
     alnum_as_word     => 1,
     url_as_word       => 1,
     jaascii_as_word   => 1,
    );

�����������ΤǤ���

���ˡ�open�λȤ�����

70: sub analyze_file {
71:     my ($self, $file) = @_;
72:     
73:     my $fh;
74:     open($fh, '<', "$file") || croak("failed to open: $file: $!");
75:     my $slash = $/;
76:     undef $/;
77:     my $text = <$fh>;
78:     $/ = $slash;
79:     close $fh;
80:     
81:     $self->analyze($text);
82: }

open() || die�ǤϤʤ���open ... or die�ˤ��ޤ��礦�����ȡ�perl�ˤ�local()�����뤳�Ȥ�Ф��Ƥ����ޤ��礦��

sub analyze_file {
    my ($self, $file) = @_;
    open my $fh, '<:raw' $file or croak "failed to open: $file: $!";
    my $text = do{ local $/; <$fh> }; # Perl Best Practices pp.213
    close $fh;
    $self->analyze($text);
}

�����Damian������Perl6::Slurp�ˤ��Τ�����Υ���ʤȤ����򲡤��դ����㤦�Ȥ����Τ⤰���Ǥ���

�Ĥ��ˡ�mecab��ƤӽФ��Ȥ����Ǥ���

84: sub analyze {
        # ....
97:     # write text to temporary file
98:     my ($fh, $tempfile) = tmpnam();
99:     print $fh $text;
100:    close $fh;
101:     
102:    # open mecab
103:    my $mecab = $self->mecab;
104:    my $def_cost = $self->default_cost;
105:    open($fh, '-|',
106:    $mecab .
107:    " --node-format='%m\t%pn\t%pw\t%H\n'" .
108:    " --unk-format='%m\t$def_cost\t$def_cost\tUnkType\n'" .
109:    " --bos-format='\n'" .
110:    " --eos-format='\n'" .
111:    " $tempfile")
112:    || croak("failed to call mecab ($mecab): $!");

�����С�IPC::Open2()��Ȥäơ�����ե������Ȥ�ʤ��Ȥ����Τ��������ΤǤ���������ϥƥ��˥å��Ȥ��ƤϹ��٤ʤΤǡ�����ʤ������ϰ���ե������ȤäƤ⤤���Ǥ��礦��������������open�Ϥ��������ʤ�����ͳ��perldoc perlsec���������������ʲ��Τ褦�ˤ��٤��Ǥ���

  local(%ENV);
  $ENV{PATH} = '/usr/local/bin:/usr/bin:/bin';
  delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
  open $fh, '-|'
    or exec {$mecab}, 
         "--node-format='%m\t%pn\t%pw\t%H\n'",
         "--unk-format='%m\t$def_cost\t$def_cost\tUnkType\n'",
         "--bos-format='\n'",
         "--eos-format='\n'",
         $tempfile 
    or croak "failed to call mecab ($mecab): $!";

�����ԤΤȤ����ϡ�Perl Cookbook���������Ǥκ��ɤν�Ǥ��礦�����ܸ��Ǥϻ��ƽ�������äƤޤ���

���ȡ����Ǥ� Perl 5.8 ���Ф��ΤǤ����顢Literal��UTF-8�ǽ񤤤�����better practice�ǤϤ���ޤ��������ԤϤޤ��Υ��ϥ���¿�����Ǥ���Ƥ���ȤϤ����������ΤǤ����������Ĥ���ɮ����򤤤������Ƥ���Τǡ���ǯ��ˤϽ��Ҥʤ�������η��Ǥ��Ȥɤ������Ǥ��礦��������󤳤��ˤ��ɡ��񤤤Ƥ����ޤ���

����Ǥ������Ϥ����դǡ�

Dan the Best Practitioner