YAPC::Asia::2008

���Ǥ����򤬽񤫤�Ƥ��ޤ�����

[��] Unicode ��16�ʿ��μ��λ��Ȥ�����ɽ���ʤɤǸ����᤹
pack �� Encode::decode ��Ȥ����ɤ��ߤ�����
�ϤƤʥ֥å��ޡ��� - miyagawa�Υ֥å��ޡ��� / 2008ǯ05��11��
���� HTML::Entities::decode / regexp �Ǥ� chr(hex($1)) �Τۤ����狼��䤹���ʤ�����

�����֤��Ƥ��������β��ͤϤ���Τǡ�

HTML::Entities��Ȥ�

�ޤ���HTML::Entities��decode_entities()��Ȥ��Ȥ�����ˡ������ޤ������줬�٥��ȥץ饯�ƥ������ʡ�

#!/usr/local/bin/perl
use strict;
use warnings;
use Encode;
use HTML::Entities;

my $eucjp = "Dan Kogai \xbe\xae\xbb\xf4\xc3\xc6 断固害";
my $utf8  = decode('eucjp', $eucjp);

binmode STDOUT, ':utf8';
print $utf8, "\n";
print decode_entities($utf8), "\n";

�������Ȥ��Ƥϡ����UTF-8�ˤ��Ƥ����Ȥ������Ȥ��󤲤��ޤ�������ʤ�UTF-8�ˤ��������ǡ�ʸ������(character reference)����ʬ��(ASCII�ʤΤ�)���ΤޤޤʤΤǡ����εդ��ȡ�decode_entities()��UTF-8�����줿��ʬ�ȸ���ʸ�������ɤ�����򤸤ä��Х����󤬽���Ƥ��ޤ��ޤ���

Dan the Perl Monger

����ɽ���Ǥ��

�ɤ����Ƥ�����ɽ���Ǥ�ꤿ�����Ȥ������Ϥ���ʴ����Ǥ��礦����

#!/usr/local/bin/perl
use strict;
use warnings;
use Encode;

my $eucjp = "Dan Kogai \xbe\xae\xbb\xf4\xc3\xc6 断固害";
my $utf8  = decode('eucjp', $eucjp);

binmode STDOUT, ':utf8';
print $utf8, "\n";
$utf8 =~ s/([0-9A-Fa-f]{2,6});/chr hex $1/eg;
print $utf8, "\n";

�����������ξ��ˤ�&�Τ褦��̾���ǻ��Ȥ�����Τ��ĤäƤ��ޤ��ޤ����ʤ�٤�HTML::Entities��Ĥ��ä������褤�Ǥ��礦��

Dan the Man with Too Many Bits and Bytes to Transcode