�Ȥ����櫓�ǡ����η������к���
[Perl]UTF8-flagged strings affects regexps with the "i" modifier - use GFx::WebLog;Perl�Ǥ�utf8�ե饰�դ���ʸ������Ф���uc/lc/"i"����ɽ�������Ҥ������٤��Τ�����H::F::Lite�Ǥ�/i��ȤäƤ���Τǡ����/i��Ȥ�ʤ��褦�ˤ��Ƥߤ�ȡ�®�٤���������
id:gfx�μ�ĥ�ϡ��ʲ��Τ褦�ˤ��ƳΤ��˳Τ�����롣
use strict; use warnings; use Benchmark qw/cmpthese timethese/; { use bytes; my $str = 'dankogai = ������'; cmpthese timethese - 1 => { '//i' => sub { $str =~ /DAN/i }, '/[]/' => sub { $str =~ /[Dd][Aa][Nn]/ }, '/(?i:)/' => sub { $str =~ /(?i:dan)/ }, }; } { use utf8; my $str = 'dankogai = ������'; cmpthese timethese - 1 => { '//i' => sub { $str =~ /DAN/i }, '/[]/' => sub { $str =~ /[Dd][Aa][Nn]/ }, '/(?i:)/' => sub { $str =~ /(?i:dan)/ }, }; }
bytes
Rate /(?i:)/ /[]/ //i /(?i:)/ 2435851/s -- -1% -6% /[]/ 2453219/s 1% -- -5% //i 2583577/s 6% 5% --
utf8
//i 445390/s -- -1% -79% /(?i:)/ 449756/s 1% -- -79% /[]/ 2163925/s 386% 381% --
����ʤ顢���餫����/dan/i
��/[Dd][Aa][Nn]/
�ˤ��Ƥ��ޤ��Ф����ǤϤʤ������Ȥ����櫓�Ǻ��������Τ������顣
use strict; use warnings; use Benchmark qw/cmpthese timethese/; sub qri { my $pat = join '', map { '[' . uc($_) . lc($_) . ']' } split //, shift; qr($pat); } use utf8; binmode STDOUT => ':utf8'; my $str = '����ѧ�ڧҧ�, ���ӧѧ�ڧ� GFX!'; my $pat = qri('����ѧ�ڧҧ�'); print $pat; cmpthese timethese - 1 => { '//i' => sub { $str =~ /����ѧ�ڧҧ�/i }, '/[]/' => sub { $str =~ /[����][����][����][����][����][����][����]/ }, '/(?i:)/' => sub { $str =~ /(?i:����ѧ�ڧҧ�)/ }, 'qri+o' => sub { $str =~ /$pat/o }, 'qr' => sub { $str =~ /$pat/ }, };
Rate //i /(?i:)/ qr qri+o /[]/ //i 58040/s -- -2% -82% -89% -90% /(?i:)/ 59077/s 2% -- -82% -89% -89% qr 330830/s 470% 460% -- -38% -41% qri+o 530963/s 815% 799% 60% -- -6% /[]/ 562195/s 869% 852% 70% 6% --
α�����Ȥ��ơ�precompile���줿regexp��Ȥ����ˤϡ�//o
����ꤹ�뤳�Ȥ����뤬�������̤�����̤ȸ������������������󡢥���ե��٥åȤ����ǤϤʤ���ʸ����ʸ���ζ��̤�����ʸ���Ǥ���Ф��Υƥ��˥å������Ƥ�ʸ���˻Ȥ��롣
Dan the Regular Expressionist
my $pat = join '', map { uc($_) eq lc($_) ? $_ : '[' . uc($_) . lc($_) . ']' } split //, shift;
���٥������ȡ�