404 Blog Not Found:perl, python & ruby - chr() vs. Unicode�Ȥꤢ����chr��Ĺ���ʤä��Τ�ord����entry�Ȥ������Ȥǡ�
#!/usr/local/bin/perl use strict; use warnings; use utf8; binmode STDOUT, ':utf8'; sub say { print @_, "\n" };
say ord "\x{61}"; say ord "\x{3b1}"; say ord "\x{5F3E}"; say ord "\x{2A6B2}";
97 945 24382 173746
python���롼�� - faerie �������� Python - 0x10000 �ʾ�� Unicode ʸ���򰷤��ˤ�--enable-unicode=ucs4 �դ��ǥӥ�ɤ���Ф����餷����
���ʤ��Ȥ�Mac OS X��/usr/bin/python (v2.3.5)�ϡ�--enable-unicode=ucs4 �ǤϤʤ��ä���
�Ĥ��Ǥˡ�eval("u\'\\U%08X\'" % n) ��� (r'\U%08X' % n).decode("unicode_escape") �Τۤ����ɤ���
�ʾ��Ƨ�ޤ��ơ��ʲ��ϥǥե���Ȥλ��͡����ʤ��unicode object������ɽ����UTF-16���Ȥ������󡣤ޤ��ϰʲ��򤽤Τޤ޼¹Ԥ��Ƥߤ롣
print ord(u'\U00000061') print ord(u'\U000003b1') print ord(u'\U00005F3E') print ord(u'\U0002A6B2')
97 945 24382 Traceback (most recent call last): File "uniord.py", line 10, in ? print ord(u'\U0002A6B2') TypeError: ord() expected a character, but string of length 2 found
def uniord(c): if len(c) == 1: return ord(c) else: return 0x10000 + (ord(c[0]) - 0xD800) * 0x400 + (ord(c[1]) - 0xDC00) print uniord(u'\U00000061') print uniord(u'\U000003b1') print uniord(u'\U00005F3E') print uniord(u'\U0002A6B2')
�ȤϤ���������Ϥ����ޤǰ�ʸ���������㡣�ºݤˤϡ�ʸ���󤫤������ʸ�����İ�ļ��Ф��ʤ��ƤϤʤ�ʤ���Perl��split //, $str
�ʤ���unpack 'U*', $str
def uniunpack(ustr): result = [] i = 0 while i < len(ustr) : o = ord(ustr[i]) if 0xD800 <= o and o < 0xDC00: i += 1 o -= 0xD800 o *= 0x400 o += 0x10000 + (ord(ustr[i]) - 0xDC00) result.append(o) i += 1 return result str = u'\u0061\u03b1\u5F3E\U0002A6B2' print str.encode('utf-8') print map(ord, str) print uniunpack(str)
a����𪚲 [97, 945, 24382, 55401, 57010] [97, 945, 24382, 173746]
$KCODE = 'u' require 'jcode'
str = [0x61, 0x3b1, 0x5F3E, 0x2A6B2].pack('U*') p str str.unpack('U*').each{ |ord| p ord }
"a����𪚲" 97 945 24382 173746
�Ȥ���ɽ��ˡ���Ѱդ���Ƥ��ʤ��Τ������Τ����ꡢRubyist�Τߤʤ���Ϥɤ��ʤ��äƤ���Τ���������str = 'a����' + [0x2A6B2].pack('U')
�Ȥ�������Ȥ�str = "a����#{ [0x2A6B2].pack('U') }"
Dan the Man with Too Many Characters to Juggle in Too Many Languages