perlXSã§STLã®std::mapã使ã£ã¦ã¿ã
ããã®ã¨ããC++ã§ã³ã¼ããæ¸ãã¦ãããã§ããããã£ã±ãããã¤ãperlãã使ãããã
ã¨ãããã¨ã§XSã«ã¤ãã¦ãåå¼·ä¸ã§ãã
ããç°¡åãªãã®ãªãæ¸ããããã«ãªã£ã¦ãã¾ããããperlãã渡ããããã·ã¥ãC++å´ã§std::mapã¨ãã¦åãåããããã¨ãã£ãç¹æ®ãªã±ã¼ã¹ã§ã¯ã¾ã£ã¦ãã¾ã£ãã®ã§ã¡ã¢ã£ã¦ããã¾ãã
ããããæ©ã¿ã¯ãã¾ããããçµè«ããè¨ãã¨ãhollyなblogãããã®ã¨ããã§ã¾ãã«ドンピシャな記事ãæ¸ãã¦ããã¦ããã®ã§ããããåèã«é å¼µã£ã¦ã¿ã¾ããã
以ä¸ããµã³ãã«ã³ã¼ãã¨å®è·µæé ã§ãã
C++ã³ã¼ã
hashã渡ãã¦std::mapãè¿ãã¨ããã±ã¼ã¹ãæ³å®ãã¦ããã®ã§ã以ä¸ã®ãããªã¯ã©ã¹ãæºåãã¾ãããã³ãµã¤ã³é¡ä¼¼åº¦ãè¨ç®ããã³ã¼ãã§ãã
vector_tool.hã¨ãã¦ä»¥ä¸ãç¨æãã¾ãã
#include <iostream> #include <map> #include <string> #include <cmath> typedef std::map<std::string, double> vec; typedef vec::iterator VecIt; class VectorTool { public: VectorTool() { } ~VectorTool() { } double cosine_similarity( vec &vector_1, vec &vector_2){ double inner_product = 0.0; for(vec::iterator itr = vector_1.begin(); itr != vector_1.end(); ++itr){ if(double value_2 = vector_2[itr->first]){ inner_product += itr->second * value_2; } } double norm_1 = 0.0; for(vec::iterator itr = vector_1.begin(); itr != vector_1.end(); ++itr){ norm_1 += pow(itr->second, 2); } norm_1 = sqrt(norm_1); double norm_2 = 0.0; for(vec::iterator itr = vector_2.begin(); itr != vector_2.end(); ++itr){ norm_2 += pow(itr->second, 2); } norm_2 = sqrt(norm_2); if(norm_1 && norm_2){ return inner_product / (norm_1 * norm_2); } else{ return 0.0; } } vec unit_length(vec &vector) { double norm = VectorTool::norm(vector); for(vec::iterator itr = vector.begin(); itr != vector.end(); ++itr){ vector[itr->first] = itr->second / norm; } return vector; } double norm(vec &vector){ double norm; for (vec::iterator itr = vector.begin(); itr != vector.end(); ++itr) { norm += pow(itr->second, 2); } norm = sqrt(norm); return norm; } private: };
ã³ãµã¤ã³é¡ä¼¼åº¦ãè¨ç®ããã¡ã½ãã以å¤ã«ãã¡ããã¡ããã¨ã¡ã½ãããããã¾ãããæ£ç´ããã辺ã¯ã©ãã§ãããã§ãã
ãã¤ã³ãã¨ãã¦ã¯ããã®ã³ã¼ãã§ã¯ãã¯ãã«ãstd::mapã®ãã¼ã¿æ§é ã§åãæ±ã£ã¦ããã¨ããã¨ããã«ãªãã¾ãã
perlããã®å¼ã³åºãã¤ã¡ã¼ã¸
test.plã¨ãã¦ä»¥ä¸ãç¨æãã¾ãã
use strict; use warnings; use VectorTool::XS; my $vec_1 = { abc => 123.45, bcd => 234.56, cde => 345.67 }; my $vec_2 = { abc => 123.45, bcd => 234.56, cde => 345.67 }; my $tool = VectorTool::XS->new; #--ã³ãµã¤ã³é¡ä¼¼åº¦ my $ret = $tool->cosine_similarity( $vec_1, $vec_2 ); print "SIM : ", $ret, "\n"; #-- ãã«ã ï¼æ£è¦ååï¼ print "NORM: ", $tool->norm($vec_1), "\n"; #-- åä½é·ã«ããæ£è¦å $vec_1 = $tool->unit_length($vec_1); #-- ãã«ã ï¼æ£è¦åå¾ï¼ print "NORM: ", $tool->norm($vec_1), "\n";
VectorTool::XSã¨ããã®ãä»åã¤ããããã±ã¼ã¸ã®ã¤ããã§ãã
ãã¯ãã«ã¯åç´ãªããã·ã¥ãªãã¡ã¬ã³ã¹ã¨ãã¦ãã¾ãã
ãã¯ãã«ã®ä¸èº«ã®å¤ã¯ãå®é¨çã«ã¯ã©ãã§ãããã£ãã®ã§ã$vec_1ã¨$vec_2ã§åããã®ã«ãã¦ãã¾ãã
ã¹ã¯ãªããå©ãã¨çµæã¯ãããªé¢¨ã«ãªãã¾ããï¼è¨äºã®å 容çã«ã¯ãã®çµæèªä½ã¯ã©ãã§ãããã§ãï¼
godzilla:VectorTool-XS miki$ perl -Mblib test.pl SIM : 1 NORM: 435.59849058508 NORM: 1
typemap
typemapã¨ã¯perlã®ãã¼ã¿åã¨C/C++ã®ãã¼ã¿åãå¤æããããã®ã«ã¼ã«è¨è¿°ã§ãã
ããã©ã«ãã§ç¨æããã¦ãããã®ä»¥å¤ã«ãèªåã§å®ç¾©ã§ããã®ã§ãã¤ã±ã¦ãXS使ãã¯ã¿ãªèªåã§typemapãæ¸ãã¿ããã§ãã
ã¡ãªã¿ã«ãã®typemapã¯å ¨é¢çã«hollyãªblogããããã®ã³ããã§ãã
T_VEC sv_setref_pv($arg, CLASS, (void *)$var); TYPEMAP vec T_STRING_MAP INPUT T_STRING_MAP { HV *hv; HE *he; vec t_sm; if(SvROK($arg) && SvTYPE(SvRV($arg)) == SVt_PVHV) { hv = (HV *)SvRV($arg); if(hv_iterinit(hv) == 0) { warn(\"${Package}::$func_name() -- $var is empty hash reference\"); XSRETURN_UNDEF; } } else { warn(\"${Package}::$func_name() -- $var is not a hash reference\"); XSRETURN_UNDEF; } while((he = hv_iternext(hv)) != NULL) { SV *svkey = HeSVKEY_force(he); SV *svval = HeVAL(he); //SV *svkey = hv_iterkeysv(he); //SV *svval = hv_iterval(hv, he); t_sm.insert(vec::value_type(std::string(SvPV_nolen(svkey)), SvNV(svval))); } $var = t_sm; } OUTPUT T_STRING_MAP { if($var.empty()){ warn(\"${Package}::$func_name() -- map is empty\"); XSRETURN_UNDEF; } HV *hv = (HV *)sv_2mortal((SV *)newHV()); for(VecIt it = $var.begin(); it != $var.end(); it++) { hv_store(hv, (it->first).c_str(), (it->first).size(), newSVnv(it->second), 0); } SvSetSV($arg, newRV_noinc((SV *)hv)); }
ããããã¨åªæã®ãããªã³ã¼ããã¤ããªã£ã¦ããã¾ããã
ååé¨åã¯ãªãã¸ã§ã¯ããä½ãé¨åï¼ã³ã³ã¹ãã©ã¯ã¿ï¼ã®ãã¼ã¿æ§é ã®å¤æã«ã¤ãã¦ã®è¨è¿°ã§ãã
ããã¯ä»¥ä¸ã®è¨äºã®ã¾ãã¾ã§ãã
perlxså ¥éãã®3ãhttp://blog.livedoor.jp/kurt0027/archives/51850105.html
å¾åé¨åã¯perlã®ããã·ã¥ã¨STLã®std::mapãå¤æããè¨è¿°ã§ãããããhollyããã®è¨äºããæåãã¾ããã
perlxså ¥éãã®5ãhttp://blog.livedoor.jp/kurt0027/archives/51855521.html
ãã ãhollyããã®ä¾ã ã¨std::map
ããã«ãã¦ãtypemapãã¾ãã«åªæã§ããããã¾ãèªãã§ããã°ãªãã¨ãªãæ³åã¯ã¤ãã¾ããããããªã®ç´ ã§æ¸ãã£ã¦ãããã¦ãã¡ãã£ã¨ç¡çã§ãããå£ãã泡ãåºããã§ãã
XSã³ã¼ã
ãã¦ãã¦ãããããXSã§ãã
typemapããã°ã£ããããã§XSé¨åã¯ã¨ã¦ãã·ã³ãã«ã§ãã
#include "vector_tool.h" #include "EXTERN.h" #include "perl.h" #include "XSUB.h" #include "ppport.h" MODULE = VectorTool::XS PACKAGE = VectorTool::XS VectorTool * VectorTool::new() double VectorTool::cosine_similarity(vec vector_1, vec vector_2) vec VectorTool::unit_length(vec vector) double VectorTool::norm(vec vector)
C++ã®ã³ã¼ãã¯vector_tool.hã«ã¾ã¨ãã¦ããã®ã§ããããã¤ã³ã¯ã«ã¼ãããã ãã§OKã§ãããã¦ãã
Makefile.PL
ãã£ã¨å¿ãã¦ã¯ãããªãMakefile.PLã§ãã
use 5.010000; use ExtUtils::MakeMaker; # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. WriteMakefile( NAME => 'VectorTool::XS', VERSION_FROM => 'lib/VectorTool/XS.pm', # finds $VERSION PREREQ_PM => {}, # e.g., Module::Name => 1.1 ($] >= 5.005 ? ## Add these new keywords supported since 5.005 (ABSTRACT_FROM => 'lib/VectorTool/XS.pm', # retrieve abstract from module AUTHOR => 'miki <[email protected]>') : ()), LIBS => [''], # e.g., '-lm' DEFINE => '', # e.g., '-DHAVE_SOMETHING' INC => '-I.', # e.g., '-I. -I/usr/include/other' # Un-comment this if you add C files to link with later: OBJECT => '$(O_FILES)', # link all the C files too CC => "g++", LD => "g++", XSOPT => '-C++' );
ã»ã¼h2xsã§çæãããã¾ã¾ã§ãããä¸ã®ã»ãã«ããCC,LD,XSOPTã¯å¾ãã追è¨ãã¦ãã¾ãã
ãã¨CãCï¼ï¼ã®ãã¡ã¤ã«ãå¥ã«ããã¤ãç¨æãã¦ãªãã¸ã§ã¯ããã¡ã¤ã«ãçæãããããªå ´åã¯OBJECT => '$(O_FILES)'ã¯å¿
è¦ã«ãªãã¾ããããã§åæã«ãªãã¸ã§ã¯ããã¡ã¤ã«ããªã³ã¯ãã¦ãããããã«ãªãã¾ãã
ãã³ããã¼ã¯
ãã¦ãããã¤ãmakeããã¨ãä¸å¿æå¾ éãã«åä½ããã¢ã¸ã¥ã¼ã«ãçæãããã¾ããããã£ããï¼
ã§ãããé度ã®æ¹ã¯ãã¾ãéãããã¾ããã
åããããªãã¨ãããperlã³ã¼ããå¥ã¢ã¸ã¥ã¼ã«ã¨ãã¦æ¸ãã¦ããã¦ãããã¨ã®ãã³ããã¼ã¯ãã¨ã£ã¦ã¿ã¾ããã
use strict; use warnings; use VectorTool::XS; use VectorTool::PurePerl; use Benchmark qw(timethese cmpthese); # ãã¯ãã«ã¯é©å½ my $vec_1 = { abc => 123.45, bcd => 234.56, cde => 345.67 }; my $vec_2 = { abc => 123.45, bcd => 234.56, cde => 345.67 }; # XSçã¨PurePerlç my $tool_xs = VectorTool::XS->new; my $tool_pp = VectorTool::PurePerl->new; my $loop = 1000000; my $r = timethese( $loop, { pp => \&pp, xs => \&xs, } ); cmpthese $r; sub pp { my $ret = $tool_pp->cosine_similarity( $vec_1, $vec_2 ); } sub xs { my $ret = $tool_xs->cosine_similarity( $vec_1, $vec_2 ); }
çµæã¯ããã§ãã
godzilla:VectorTool-XS miki$ perl -Mblib bench.pl Benchmark: timing 1000000 iterations of pp, xs... pp: 7 wallclock secs ( 6.89 usr + 0.00 sys = 6.89 CPU) @ 145137.88/s (n=1000000) xs: 9 wallclock secs (10.16 usr + 0.00 sys = 10.16 CPU) @ 98425.20/s (n=1000000) Rate xs pp xs 98425/s -- -32% pp 145138/s 47% --
ãXSã«ããã®ã«pure perlããé ãã¨ã¯ãªã«ãã¨ã ï¼ãã¨ãã°ãæã£ã¦ã¿ã¾ããããããèããã°å½ç¶ããããã¾ããã
ã³ãµã¤ã³é¡ä¼¼åº¦ã®è¨ç®ã®ããã«ã·ã³ãã«ãªå¦çã®å ´åããã®é¨åã§ã®è¨èªéã§ã®ããã©ã¼ãã³ã¹å·®ãããããããtypemapã®ãããªè¤éãªåå¤æå¦çã®æ¹ãã³ã¹ããé«ãã¤ãã¦ãã®ã§ã¯ãªãã§ããããã
ã¤ã¾ãããã£ã¨æ¼ç®ã«æéãããããããªå¦çã§ããã°ã¯ã£ããã¨XSãå¼·ãã¯ãã
ãªã®ã§C++ã¨perlã®ã¢ã¸ã¥ã¼ã«å´ã§åã 100ä¸åã«ã¼ãããããã«ãã¦ã¿ã¦ããã³ããã¼ã¯ã¹ã¯ãªããããã¯ï¼åã ãå¼ã³åºãããã«æ§æãå¤ãã¦ã¿ã¾ããã
ã¾ã£ãã馬鹿ãããå¤æ´ã§ãããããã¨æ示çã«XSãåãããããã®å¤æ´ã§ãã
ãã®çµæãããã
godzilla:VectorTool-XS miki$ perl -Mblib bench.pl Benchmark: timing 1 iterations of pp, xs... pp: 5 wallclock secs ( 4.98 usr + 0.01 sys = 4.99 CPU) @ 0.20/s (n=1) (warning: too few iterations for a reliable count) xs: 0 wallclock secs ( 0.22 usr + 0.00 sys = 0.22 CPU) @ 4.55/s (n=1) (warning: too few iterations for a reliable count) s/iter pp xs pp 4.99 -- -96% xs 0.220 2168% --
ãããC++å¼·ãï¼å¼·ãããï¼
ããã¨ããããã³ãã§ã¯ããã¾ããã100ä¸ã«ã¼ããC++å´ã«æã£ã¦ãããã¨ã§å§åçãªããã©ã¼ãã³ã¹ã®å·®ã¨ãªãã¾ããã
ã¾ã¨ã
XSã§std::mapã使ãæ¹æ³ãhollyããã®ãããã§ãããããã¾ããã
ã§æè¨ã¨ãã¦å¾ããã¨ã¯
- ç°¡åãªå¦çãXSã«ãã¦ãåå¤æã§ã®ã³ã¹ãã®æ¹ãé«ãã¤ãããã§ããã°éå¹æ
- ç´ç²ã«ã大éã«ã¼ãã§ã®æ¼ç®å¦çã¹ãã¼ãããæ¯è¼ããã¨C++ã¯å§åç
- ãã£ã±ãXSã¯å¤æ çãã§ãããªãã ãã¡ãã£ã¨æ¥½ããâª
ã¨ãã£ãã¨ããã«ãªãã¾ãã
èªåã¯ãããã¼ã¿è§£æã®ãããªãã¨ããã¦ããã®ã§ããã£ã¤ãå¦çé¨åã¯C++ã§æ¸ãã¦ãperlã§è²ã ãªå¦çã¨çµã¿åããããã¨ããã¹ã¿ã¤ã«ãçæ³çããã