å ¨éæãå ¨æéã®æ¥ãã¨ã®å種平åå¤ãæ¨æºåå·®ãè¨ç®ããSQL
stockdb ã®ãã¼ã¿ããã¨ã«ãå ¨éæãå ¨æéã®æ¥ãã¨ã®å種平åå¤ãæ¨æºåå·®ãè¨ç®ããSQLãæ¸ããã®ã§ã¡ã¢ã
- çµå¤ãåºæ¥é«ãä¾¡æ ¼å¤åçã®ããããã«ã¤ãã¦ãéå»3,5,10,15,25,50,75æ¥ã®å¹³åå¤ã¨æ¨æºåå·®ãä¸æ°ã«ç®åºãã¾ãã
- ãããªãã«æéã¯ãããã¾ãã確èªããç°å¢ã ã¨1æéãããã§ããã
- Windowé¢æ°ãããã
-- å¸å ´ãéãã¦ããæ¥ã®ä¸è¦§ CREATE MATERIALIZED VIEW days as ( WITH ds AS ( SELECT distinct date FROM rates ), x AS ( SELECT date, row_number() OVER (ORDER BY date) AS index, lag(date, 1, null) OVER (ORDER BY date) AS prev, lag(date, 3, null) OVER (ORDER BY date) AS before_3_days, lag(date, 5, null) OVER (ORDER BY date) AS before_5_days, lag(date, 10, null) OVER (ORDER BY date) AS before_10_days, lag(date, 15, null) OVER (ORDER BY date) AS before_15_days, lag(date, 25, null) OVER (ORDER BY date) AS before_25_days, lag(date, 50, null) OVER (ORDER BY date) AS before_50_days, lag(date, 75, null) OVER (ORDER BY date) AS before_75_days FROM ds ORDER BY date desc ) SELECT * FROM x WHERE prev IS NOT NULL ); -- åºæ¥é«0(volume=0)ã®ãã¼ã¿ãæãã¦ããã®ã§ããããè£å®ãããã¥ã¼ãä½ã CREATE MATERIALIZED VIEW rates_filled AS ( WITH all_stock_and_days as ( SELECT d.*, s.id as stock_id FROM days as d, stocks as s ), x as ( SELECT a.stock_id, a.date, CASE WHEN r.date IS NOT NULL THEN r.date ELSE (SELECT max(date) FROM rates WHERE stock_id = a.stock_id and date <= a.date ) END as actual FROM all_stock_and_days AS a LEFT JOIN rates as r ON a.stock_id = r.stock_id and a.date = r.date ) SELECT x.stock_id, x.date, CASE WHEN x.actual = x.date THEN r.open ELSE r.close END as open, CASE WHEN x.actual = x.date THEN r.close ELSE r.close END as close, CASE WHEN x.actual = x.date THEN r.high ELSE r.close END as high, CASE WHEN x.actual = x.date THEN r.low ELSE r.close END as low, CASE WHEN x.actual = x.date THEN r.volume ELSE 0 END as volume FROM x LEFT JOIN rates as r ON x.stock_id = r.stock_id and x.actual = r.date ); CREATE UNIQUE INDEX rates_filled_stock_id_date_index ON rates_filled (stock_id, date); -- åæ¥ããã®ä¾¡æ ¼å¤åçãè¨ç® CREATE MATERIALIZED VIEW ratios AS ( WITH x AS ( SELECT d.*, s.id FROM days AS d, stocks AS s ) SELECT x.id as stock_id, x.date, (r1.close-r2.close)/r2.close as ratio FROM x LEFT JOIN rates_filled as r1 on x.id = r1.stock_id AND r1.date = x.date LEFT JOIN rates_filled as r2 on x.id = r2.stock_id AND r2.date = x.prev WHERE r2.close IS NOT NULL ); CREATE UNIQUE INDEX ratios_stock_id_date_index ON ratios (stock_id, date); -- å ¨éæãå ¨æéã®æ¥ãã¨ã®å種平åå¤ãæ¨æºåå·®ããè¨ç® -- çµå¤ãåºæ¥é«ãä¾¡æ ¼å¤åçã®ããããã«ã¤ãã¦ãéå»3,5,10,15,25,50,75æ¥ã®å¹³åå¤ã¨æ¨æºåå·®ãç®åºãã CREATE MATERIALIZED VIEW ma AS ( SELECT r.stock_id, r.date, r.volume, avg(r.close) OVER from_3_days_ago as avg_close_3days, stddev(r.close) OVER from_3_days_ago as sd_close_3days, avg(r.close) OVER from_5_days_ago as avg_close_5days, stddev(r.close) OVER from_5_days_ago as sd_close_5days, avg(r.close) OVER from_10_days_ago as avg_close_10days, stddev(r.close) OVER from_10_days_ago as sd_close_10days, avg(r.close) OVER from_15_days_ago as avg_close_15days, stddev(r.close) OVER from_15_days_ago as sd_close_15days, avg(r.close) OVER from_25_days_ago as avg_close_25days, stddev(r.close) OVER from_25_days_ago as sd_close_25days, avg(r.close) OVER from_50_days_ago as avg_close_50days, stddev(r.close) OVER from_50_days_ago as sd_close_50days, avg(r.close) OVER from_75_days_ago as avg_close_75days, stddev(r.close) OVER from_75_days_ago as sd_close_75days, avg(ra.ratio) OVER from_3_days_ago as avg_ratio_3days, stddev(ra.ratio) OVER from_3_days_ago as sd_ratio_3days, avg(ra.ratio) OVER from_5_days_ago as avg_ratio_5days, stddev(ra.ratio) OVER from_5_days_ago as sd_ratio_5days, avg(ra.ratio) OVER from_10_days_ago as avg_ratio_10days, stddev(ra.ratio) OVER from_10_days_ago as sd_ratio_10days, avg(ra.ratio) OVER from_15_days_ago as avg_ratio_15days, stddev(ra.ratio) OVER from_15_days_ago as sd_ratio_15days, avg(ra.ratio) OVER from_25_days_ago as avg_ratio_25days, stddev(ra.ratio) OVER from_25_days_ago as sd_ratio_25days, avg(ra.ratio) OVER from_50_days_ago as avg_ratio_50days, stddev(ra.ratio) OVER from_50_days_ago as sd_ratio_50days, avg(ra.ratio) OVER from_75_days_ago as avg_ratio_75days, stddev(ra.ratio) OVER from_75_days_ago as sd_ratio_75days, avg(r.volume) OVER from_3_days_ago as avg_volume_3days, stddev(r.volume) OVER from_3_days_ago as sd_volume_3days, avg(r.volume) OVER from_5_days_ago as avg_volume_5days, stddev(r.volume) OVER from_5_days_ago as sd_volume_5days, avg(r.volume) OVER from_10_days_ago as avg_volume_10days, stddev(r.volume) OVER from_10_days_ago as sd_volume_10days, avg(r.volume) OVER from_15_days_ago as avg_volume_15days, stddev(r.volume) OVER from_15_days_ago as sd_volume_15days, avg(r.volume) OVER from_25_days_ago as avg_volume_25days, stddev(r.volume) OVER from_25_days_ago as sd_volume_25days, avg(r.volume) OVER from_50_days_ago as avg_volume_50days, stddev(r.volume) OVER from_50_days_ago as sd_volume_50days, avg(r.volume) OVER from_75_days_ago as avg_volume_75days, stddev(r.volume) OVER from_75_days_ago as sd_volume_75days FROM rates_filled as r LEFT JOIN ratios AS ra ON ra.stock_id = r.stock_id AND ra.date = r.date WINDOW from_3_days_ago AS ( PARTITION BY r.stock_id ORDER BY r.stock_id, r.date desc ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING ), from_5_days_ago AS ( PARTITION BY r.stock_id ORDER BY r.stock_id, r.date desc ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING ), from_10_days_ago AS ( PARTITION BY r.stock_id ORDER BY r.stock_id, r.date desc ROWS BETWEEN CURRENT ROW AND 9 FOLLOWING ), from_15_days_ago AS ( PARTITION BY r.stock_id ORDER BY r.stock_id, r.date desc ROWS BETWEEN CURRENT ROW AND 14 FOLLOWING ), from_25_days_ago AS ( PARTITION BY r.stock_id ORDER BY r.stock_id, r.date desc ROWS BETWEEN CURRENT ROW AND 24 FOLLOWING ), from_50_days_ago AS ( PARTITION BY r.stock_id ORDER BY r.stock_id, r.date desc ROWS BETWEEN CURRENT ROW AND 49 FOLLOWING ), from_75_days_ago AS ( PARTITION BY r.stock_id ORDER BY r.stock_id, r.date desc ROWS BETWEEN CURRENT ROW AND 74 FOLLOWING ) order by r.date desc ); CREATE UNIQUE INDEX ma_stock_id_date_index ON ma(stock_id, date);
æ¥æ¬æ ªã®æ¥è¶³ãã¼ã¿ããã¼ã«ã«ã®ãã¼ã¿ãã¼ã¹ã«åãè¾¼ããã¼ã«ãä½ã£ã
æ¥æ¬æ ªã®æ¥è¶³ãã¼ã¿ããã¼ã«ã«ã®ãã¼ã¿ãã¼ã¹ã«åãè¾¼ããã¼ã«ãä½ã£ã¦ã¿ã¾ããã
- Quandl ã§å ¬éããã¦ããæ¥æ¬æ ªã®æ¥è¶³ãã¼ã¿ãåå¾ãã¦ããã¼ã«ã«ã®PostgreSQLã«åãè¾¼ã¿ã¾ãã
- Tokyo Stock Exchangeãã¼ã¿ãã¼ã¹ã®å ¨ãã¼ã¿ãåãè¾¼ãã®ã§ãETFã®ãã¼ã¿ãªã©ãå«ã¾ãã¾ãã
- ã¨ãããããç´è¿500æ¥åã®ãã¼ã¿ãåå¾ããããã«ãã¦ãã¾ãã å¸å ´ãéãã¦ããæ¥ã®ãã¼ã¿ã®ã¿ãªã®ã§ã大ä½2å¹´åã§ãã
- æªã¤ã³ãã¼ãã®ãã¼ã¿ã®ã¿åãè¾¼ãã®ã§ãåããã¼ããã¨ã©ã¼ããã£ã¦ãåå®è¡ããã°OKã
- æ¯æ¥cronã§å®è¡ããã°ãææ°ã®æ¥è¶³ãã¼ã¿ã使ããç¶æ ã«ãªãã¯ãã§ãã
- DBã®ãã¼ãã«æ§æã¯ä»¥ä¸ã
äºåæºå
$ git --version git version 1.8.3.1 $ docker -v Docker version 1.10.2, build c3959b1 $ docker-compose -v docker-compose version 1.6.2, build 4d72027
使ãæ¹
$ git clone https://github.com/unageanu/stock-db.git $ cd stock-db $ vi .env # POSTGRES_PASSWORDãQUANDL_API_KEYãè¨å®ãã¾ãã # 以ä¸ã¯è¨å®ä¾ --- POSTGRES_USER=postgres POSTGRES_PASSWORD=mysecretpassword QUANDL_API_KEY=myquandlapikey QUANDL_API_VERSION=2015-04-09 --- $ docker-compose up -d # PostgreSQLãèµ·å $ bundle install $ bundle exec ruby -I src ./src/importer.rb
Quandlã®APIãã¼åå¾ã¯ãã¡ããåç §ã éæã¯4000å¼±ããã®ã§ãåãè¾¼ã¿ã«ã¯2,3æéãããã¾ãã
ãªããããããéã«æ¸ãã¦ããã®ã§ããäºæ¿ãã ããã
Clairãã¤ã³ã¹ãã¼ã«ãã¦ãDockerã¤ã¡ã¼ã¸ã®èå¼±æ§ã¹ãã£ã³ãããæé
Dockerã¤ã¡ã¼ã¸ã®èå¼±æ§ã¹ãã£ã³ãã¼ã«ãClairã ãã¤ã³ã¹ãã¼ã«ãã¦ããã¼ã«ã«ã®ã¤ã¡ã¼ã¸ããã§ãã¯ããæé ã§ããå¾®å¦ã«ã¯ã¾ã£ãã®ã§ã¡ã¢ã
0.ç°å¢
$ cat /etc/redhat-release CentOS Linux release 7.2.1511 (Core) $ docker -v Docker version 1.10.2, build c3959b1
1.PostgreSQLãã¤ã³ã¹ãã¼ã«ãã¦èµ·å
$ docker pull postgres:latest $ docker run --name postgres -p 5432:5432 -e POSTGRES_PASSWORD=<ãã¹ã¯ã¼ã> -d postgres
2.clairãã¤ã³ã¹ãã¼ã«ãã¦èµ·å
$ mkdir ./clair_config $ curl -L https://raw.githubusercontent.com/coreos/clair/master/config.example.yaml -o ./clair_config/config.yaml $ vi ./clair_config/config.yaml # database - source ã以ä¸ã®éãå¤æ´ --- database: # PostgreSQL Connection string # http://www.postgresql.org/docs/9.4/static/libpq-connect.html source: postgresql://postgres:<ãã¹ã¯ã¼ã>@postgres:5432?sslmode=disable --- $ docker run -p 6060-6061:6060-6061 --link postgres:postgres -v /tmp:/tmp -v $PWD/clair_config:/config quay.io/coreos/clair -config=/config/config.yaml
èå¼±æ§ãã¼ã¿ã®èªã¿è¾¼ã¿ãéå§ãããã®ã§ã"updater: update finished" ã表示ãããã¾ã§å¾ ã¡ã¾ãã(1æéããããããã¾ããã»ã»ã»)
3.ãã¼ã«ã«ãã§ãã¯ãã¼ã«ãã¤ã³ã¹ãã¼ã«ãã¦å®è¡ã
$ sudo yum -y install golang $ export GOPATH=~/.go $ go get -u github.com/coreos/clair/contrib/analyze-local-images $ docker pull <ãã§ãã¯ãããã¤ã¡ã¼ã¸> $ sudo $GOPATH/bin/analyze-local-images <ãã§ãã¯ãããã¤ã¡ã¼ã¸>
ç¶ç¶çã«å®è¡ããã«ã¯ã©ãããããããã ããã»ã»ã»ã
追è¨: docker-compose.yml
docker-compose.yml
ãä½ã£ã¦ã¿ã¾ããã
version: '2' services: postgres: container_name: clair_postgres image: postgres:latest environment: POSTGRES_PASSWORD: <ãã¹ã¯ã¼ã> ports: - "5432:5432" volumes: - ./data:/var/lib/postgresql/data clair: container_name: clair_clair image: quay.io/coreos/clair ports: - "6060-6061:6060-6061" links: - postgres volumes: - /tmp:/tmp - ./config:/config command: [-config, /config/config.yaml]
GitHub ã® Dockerfile ãã Docker Image ãèªåãã«ãããè¨å®æé
Docker Hub ã® Automated Builds ã使ãã¨ãGitHub ã¾ã㯠Bitbucket ã® Dockerfile ã®å¤æ´ãæ¤ç¥ãã¦ãDocker Image ãèªåãã«ããããã¨ãã§ãã¾ãã
- ã¬ãã¸ããªã¸ã®ã³ããããæ¤ç¥ãã¦ãDocker Hub ã® Docker Image ãèªåã§ãã«ããã¾ãã
- ã¿ã°ããã©ã³ãã®è¿½å ãæ¤åºãã¦ãèªåã§ã¿ã°ã»ãã©ã³ãåãTAGã¨ãã Docker Image ãä½ããã¨ãå¯è½ã
ä¸éãè¨å®ã試ãã¦ã¿ãã®ã§ãã¡ã¢ã§ãã
- unagenau/docker-jiji2 ã«é ç½®ãã¦ãã Dockerfile ãèªåãã«ãããããã«ãã¦ã¿ã¾ããã
- Docker Hub ã®ã¢ã«ã¦ã³ãããªãå ´åã¯ããã¡ãããä½æãã¦ãã ããã
1. Docker Hub ã®ã¢ã«ã¦ã³ã 㨠GitHubã¢ã«ã¦ã³ãã¨ãé£æºããã
ã¾ãåãã«ãDocker Hub ã®ã¢ã«ã¦ã³ã 㨠GitHubã¢ã«ã¦ã³ãã¨ãé£æºãããå¿ è¦ãããã¾ãã
Docker Hubã«ãã°ã¤ã³ãã¦ãå³ä¸ã®ã¡ãã¥ã¼ãã Settings ãé¸æãã¾ãã
ç¶ãã¦ã ä¸ã®ã¿ããã Linked Accounts & Services ãé¸æã
GitHub ãé¸æã
ãªã³ã¯æ¹æ³ãé¸æãã¾ãã èªåã§hookãè¨å®ãã¦ããããããã®ã§ã Public and Private ãé¸æãã¾ããã
é£æºç¢ºèªã®ç»é¢ã表示ãããã®ã§ãé£æºã許å¯ãã¦å®äºã
2. èªåãã«ããè¡ãã¬ãã¸ããªãä½ã
GitHubã¢ã«ã¦ã³ãã¨ã®é£æºãã§ããããèªåãã«ããè¡ãã¬ãã¸ããªãä½ãã¾ãã
å³ä¸ã®ã¡ãã¥ã¼ãã Create - Create Automated Build ãé¸æã
GitHub ãã¯ãªãã¯ã
Dockerfile ããã¹ããã¦ããã¬ãã¸ããªãé¸æãã¾ãã
ããããè¨å®ãã¾ãã
- Short Description ã«èª¬æãå ¥å
- Click here to customize ãã¯ãªãã¯ããã¨ã詳細è¨å®ç»é¢ãéãã¾ãã
- Dockerfile ãã«ã¼ããã£ã¬ã¯ããªã«ç½®ãã¦ããªãã£ãã®ã§ã詳細ç»é¢ã® Dockerfile Locationã§æå®ãã¦ãã¾ãã
- ã¾ããã¿ã°ã®è¿½å ãæ¤åºãã¦ãã¿ã°åä»ã®ã¤ã¡ã¼ã¸ãä½æããè¨å®ãè¡ã£ã¦ã¿ã¾ããã
è¨å®ãå®äºå¾ãCreate ãã¯ãªãã¯ããã¨ãã¬ãã¸ããªãä½æããã¾ãã
3. ã¤ã¡ã¼ã¸ããã«ããã¦ã¿ã
ã¬ãã¸ããªãã§ããããGitHubã¬ãã¸ããªã«å¤æ´ãã³ããããã or ã¿ã°ã追å ããã¨ãèªåã§ãã«ããè¡ããã¾ãã ã¾ããUIããæåã§ãã«ããå®è¡ãããã¨ãã§ãã¾ããæåãã«ããããå ´åã¯ãBuild Settings ã® Trigger ãã¿ã³ãã¯ãªãã¯ããã°OKã
Build Details ã¿ãã§ããã«ãã¿ã¹ã¯ã®å®è¡ç¶æ³ã確èªã§ãã¾ãã
ä¸æ¦ Quque ã«ç©ã¾ããå¾ã 10åç¨åº¦å¾ ã¤ã¨ãã«ãããã¾ããã
ã¬ã³ã¸ãã¬ã¤ã¯ææ³ã§ã®ãã¬ã¼ããã¢ã·ã¹ãããBotã®ãµã³ãã«
FXã·ã¹ãã ãã¬ã¼ããã¬ã¼ã ã¯ã¼ã¯ãJijiã ã®ãµã³ãã«ãã®3ã
ã¬ã³ã¸ãã¬ã¤ã¯ææ³ã使ã£ããã¬ã¼ããã¢ã·ã¹ãããBotãä½ã£ã¦ã¿ã¾ããã
FX Wroks ããã®ãµã¤ã ã«æ²è¼ããã¦ãããã¬ã³ã¸ãã¬ã¤ã¯ãçãã·ã³ãã«ãªé å¼µããææ³ãããã®ã¾ã¾Jijiã«ç§»æ¤ãã¦ã¿ããã®ã§ãã
åä½
以ä¸ã®ãããªåä½ããã¾ãã
- 1) Botãã¬ã¼ããç£è¦ããã¬ã³ã¸ãã¬ã¤ã¯ããã§ãã¯
- æ¡ä»¶ã¯ããµã¤ãã®å 容ã¨åçã8æéã¬ã¼ãã100pipså ã§æ¨ç§»ãããã¨ãä¸orä¸ã«æãããã¨ãã¾ããã
- å¾ ã¤æéãpipsã¯ããã©ã¡ã¼ã¿ã§èª¿æ´ã§ããããã«ãã¦ãã¾ãã
- 2) ã¬ã³ã¸ãã¬ã¤ã¯ãæ¤åºããããã¹ããã«éç¥ãéä¿¡ãã¾ã
- ããããå¤ãã®ã§ãä»åã¯éç¥ãéã£ã¦å¤æããå½¢ã«ã
- 3) éç¥ãåãã¦æçµå¤æãè¡ãããã¬ã¼ããå®è¡ã
- éç¥ã«ãããã¿ã³ãæ¼ããã¨ã§ã売orè²·ã§æè¡æ³¨æãå®è¡ã§ããããã«ãã¦ãã¾ãã
- 決æ¸ã¯ããã¬ã¼ãªã³ã°ã¹ãããã§ã
軽ãåããã¦ã¿ãææ³
軽ããã¹ããã¦ã¿ã¾ããããæã£ãããããããã«å¼ã£ãããæãã§ããã
ããã¯ãã¾ãã¾ãã
ããã¯ããã¬ã¤ã¯ã¨å¤å®ãããæç¹ã§ä¸ããçµãã£ã¦ããã»ã»ã
ããã¯ãä¸åº¦ä¸ã«ãã¬ã¤ã¯ãããã¨ãéæ¹åã«é²ãã§ãã¾ãã»ã»ã»ã
ãã¬ã¤ã¯ã®æ¡ä»¶ã調æ´ãã¦ã¿ãã移åå¹³åã§ã®ãã¬ã³ããã§ãã¯ã¨çµã¿åããããªã©ãã«ã¹ã¿ãã¤ãºãã¦ä½¿ã£ã¦ã¿ã¦ãã ããã
ã³ã¼ã
# === ã¬ã³ã¸ãã¬ã¤ã¯ã§ãã¬ã¼ããè¡ãã¨ã¼ã¸ã§ã³ã class RangeBreakAgent include Jiji::Model::Agents::Agent def self.description <<-STR ã¬ã³ã¸ãã¬ã¤ã¯ã§ãã¬ã¼ããè¡ãã¨ã¼ã¸ã§ã³ãã - æå®æé(ããã©ã«ãã¯8æé)ã®ã¬ã¼ããä¸å®ã®pipsã«åã¾ã£ã¦ããç¶æ ããã ã¬ã³ã¸ãæããã¿ã¤ãã³ã°ã§éç¥ãéä¿¡ã - éç¥ãããã¬ã¼ãå¯å¦ãå¤æããåå¼ãå®è¡ã§ãã¾ãã - 決æ¸ã¯ãã¬ã¼ãªã³ã°ã¹ãããã§è¡ãã¾ãã STR end # UIããè¨å®å¯è½ãªããããã£ã®ä¸è¦§ def self.property_infos [ Property.new('target_pair', '対象ã¨ããé貨ãã¢', 'USDJPY'), Property.new('range_period', 'ã¬ã³ã¸ãå¤å®ããæé(å)', 60 * 8), Property.new('range_pips', 'ã¬ã³ã¸ç¸å ´ã¨ã¿ãªãå¤å¹ (pips)', 100), Property.new('trailing_stop_pips', 'ãã¬ã¼ã«ã¹ãããã§æ±ºæ¸ããå¤å¹ (pips)', 30), Property.new('trade_units', 'åå¼æ°é', 1) ] end def post_create pair = broker.pairs.find { |p| p.name == @target_pair.to_sym } @checker = RangeBreakChecker.new( pair, @range_period.to_i, @range_pips.to_i) end def next_tick(tick) # ã¬ã³ã¸ãã¬ã¤ã¯ãããã©ãããã§ã㯠result = @checker.check_range_break(tick) # ãã¬ã¤ã¯ãã¦ãããéç¥ãéã send_notification(result) if result[:state] != :no end def execute_action(action) case action when 'range_break_buy' then buy when 'range_break_sell' then sell else 'ä¸æãªã¢ã¯ã·ã§ã³ã§ã' end end def state { checker: @checker.state } end def restore_state(state) @checker.restore_state(state[:checker]) if state[:checker] end private def sell broker.sell(@target_pair.to_sym, @trade_units.to_i, :market, { trailing_stop: @trailing_stop_pips.to_i }) '売注æãå®è¡ãã¾ãã' end def buy broker.buy(@target_pair.to_sym, @trade_units.to_i, :market, { trailing_stop: @trailing_stop_pips.to_i }) '買注æãå®è¡ãã¾ãã' end def send_notification(result) message = "#{@target_pair} #{result[:price]}" \ + ' ãã¬ã³ã¸ãã¬ã¤ã¯ãã¾ãããåå¼ãã¾ãã?' @notifier.push_notification(message, [create_action(result)]) logger.info "#{message} #{result[:state]} #{result[:time]}" end def create_action(result) if result[:state] == :break_high { 'label' => '買注æãå®è¡', 'action' => 'range_break_buy' } else { 'label' => '売注æãå®è¡', 'action' => 'range_break_sell' } end end end class RangeBreakChecker def initialize(pair, period, range_pips) @pair = pair @range_pips = range_pips @candles = Candles.new(period * 60) end def check_range_break(tick) tick_value = tick[@pair.name] result = check_state(tick_value, tick.timestamp) @candles.reset unless result == :no # ä¸åº¦ãã¬ã¤ã¯ããããä¸æ¦ç¶æ ããªã»ãããã¦æ¬¡ã®ãã¬ã¤ã¯ãå¾ ã¤ @candles.update(tick_value, tick.timestamp) { state: result, price: tick_value.bid, time: tick.timestamp } end def state @candles.state end def restore_state(state) @candles.restore_state(state) end private # ã¬ã³ã¸ãã¬ã¤ã¯ãã¦ãããã©ããå¤å®ãã def check_state(tick_value, time) highest = @candles.highest lowest = @candles.lowest return :no if highest.nil? || lowest.nil? return :no unless over_period?(time) diff = highest - lowest return :no if diff >= @range_pips * @pair.pip calculate_state( tick_value, highest, diff ) end def calculate_state( tick_value, highest, diff ) center = highest - diff / 2 pips = @range_pips / 2 * @pair.pip if tick_value.bid >= center + pips return :break_high elsif tick_value.bid <= center - pips return :break_low end :no end def over_period?(time) oldest_time = @candles.oldest_time return false unless oldest_time (time.to_i - oldest_time.to_i) >= @candles.period end end class Candles attr_reader :period def initialize(period) @candles = [] @period = period @next_update = nil end def update(tick_value, time) time = Candles.normalize_time(time) if @next_update.nil? || time > @next_update new_candle(tick_value, time) else @candles.last.update(tick_value, time) end end def highest high = @candles.max_by { |c| c.high } high.nil? ? nil : BigDecimal.new(high.high, 10) end def lowest low = @candles.min_by { |c| c.low } low.nil? ? nil : BigDecimal.new(low.low, 10) end def oldest_time oldest = @candles.min_by { |c| c.time } oldest.nil? ? nil : oldest.time end def reset @candles = [] @next_update = nil end def new_candle(tick_value, time) limit = time - period @candles = @candles.reject { |c| c.time < limit } @candles << Candle.new @candles.last.update(tick_value, time) @next_update = time + (60 * 5) end def state { candles: @candles.map { |c| c.to_h }, next_update: @next_update } end def restore_state(state) @candles = state[:candles].map { |s| Candle.from_h(s) } @next_update = state[:next_update] end def self.normalize_time(time) Time.at((time.to_i / (60 * 5)).floor * 60 * 5) end end class Candle attr_reader :high, :low, :time def initialize(high = nil, low = nil, time = nil) @high = high @low = low @time = time end def update(tick_value, time) price = extract_price(tick_value) @high = price if @high.nil? || @high < price @low = price if @low.nil? || @low > price @time = time if @time.nil? end def to_h { high: @high, low: @low, time: @time } end def self.from_h(hash) Candle.new(hash[:high], hash[:low], hash[:time]) end private def extract_price(tick_value) tick_value.bid end end
æ©æ¢°å¦ç¿æç¿ã: ãµãã¼ããã¯ã¿ã¼ãã·ã³
ãå ¥é æ©æ¢°å¦ç¿ãæç¿ãã12æ¥ç®ãã12ç« ã¢ãã«æ¯è¼ãã§ãã
æå¾ã®ã¢ã«ã´ãªãºã ããµãã¼ããã¯ã¿ã¼ãã·ã³(SVM)ãå¦ã³ãæå¾ã«åããã¼ã¿ã»ããã«ãã¸ã¹ãã£ãã¯å帰ãkè¿åæ³ãªã©ãä»ã¾ã§å¦ãã§ããã¢ã«ã´ãªãºã ãé©ç¨ãã¦æ¯è¼ãã¾ãã
# åæºå > setwd("12-Model_Comparison/")
ãµãã¼ããã¯ã¿ã¼ãã·ã³(SVM)
ãµãã¼ããã¯ã¿ã¼ãã·ã³ã¯ãåé¡ã¢ãã«ã®ä¸ã¤ã§ããã¸ã¹ãã£ãã¯å帰ã¨éããéç·å½¢ã®æ±ºå®å¢çãæã¤ãã¼ã¿ããã¾ãåé¡ãããã¨ãã§ãã¾ãã
ä¾ãã°ã以ä¸ã®ãããªãã¼ã¿ã決å®å¢çã1ã¤ã®ç·ã§è¡¨ããªãããããã¸ã¹ãã£ãã¯å帰ã§ã¯ãã¾ãåé¡ã§ãã¾ããã
> library('ggplot2') # ãã¼ã¿ãèªã¿è¾¼ã¿ > df <- read.csv(file.path('data', 'df.csv')) > head(df) X Y Label 1 0.2655087 0.52601906 1 2 0.3721239 0.07333542 1 3 0.5728534 0.84974175 1 4 0.9082078 0.42305801 0 > ggplot(df, aes(x = X, y = Y, color = factor(Label))) + geom_point() > ggsave(filename="plot01.png")
試ãã«ããã¸ã¹ãã£ãã¯å帰ã使ã£ã¦ãä»»æã®ç¹ãã©ã®ã©ãã«ã«å±ãããå¤å®ãã¦ã¿ã¾ãã
> logit.fit <- glm(Label ~ X + Y, family = binomial(link = 'logit'), data = df) > logit.predictions <- ifelse(predict(logit.fit) > 0, 1, 0) > mean(with(df, logit.predictions == Label)) [1] 0.5156
精度ã¯51.5%ãã¯ã©ã¹ã¯2ã¤ããªãã®ã§ãã©ã³ãã ã«é¸ãã å ´åã¨ã»ã¼å¤ãããªã精度ã§ãã
次ã«ãSVMã§åé¡ããå ´åã®ç²¾åº¦ãè¨æ¸¬ãã¦ã¿ã¾ãã
> library('e1071') > svm.fit <- svm(Label ~ X + Y, data = df) > svm.predictions <- ifelse(predict(svm.fit) > 0, 1, 0) > mean(with(df, svm.predictions == Label)) [1] 0.7204
72%ã«ãªãã¾ããããã¸ã¹ãã£ãã¯å帰ãããã¾ãåé¡ã§ãã¦ããããã§ãã
åã¢ãã«ãã©ã®ãããªå¤å®ãè¡ã£ã¦ããããå³ç¤ºãã¦ã¿ã¾ãã
> library("reshape") > df <- cbind(df, data.frame(Logit = ifelse(predict(logit.fit) > 0, 1, 0), SVM = ifelse(predict(svm.fit) > 0, 1, 0))) > predictions <- melt(df, id.vars = c('X', 'Y')) > ggplot(predictions, aes(x = X, y = Y, color = factor(value))) + geom_point() + facet_grid(variable ~ .) > ggsave(filename="plot02.png")
ä¸ãããåé¡å¯¾è±¡ã®ãã¼ã¿ããã¸ã¹ãã£ãã¯å帰ã§åç¹ãåé¡ããçµæãSVMã§åç¹ãåé¡ããçµæãã§ãã
ãã¸ã¹ãã£ãã¯å帰ã¯å ¨é¨0ã«ãã¦ããã»ã»ã»ãä¸æ¹ãSVMã ã¨å ãã¼ã¿ã®ç¹å¾´ãå®å ¨ã§ã¯ãªãã§ãããã¾ãæãã¦ããããã§ãã
ã«ã¼ãã«ããªãã¯
SVMã§ã¯ãã«ã¼ãã«ããªãã¯ã¨ããææ³ã使ã£ã¦ãéç·å½¢ã®æ±ºå®å¢çãçæã§ããããã«ãªã£ã¦ãã¾ãã ã«ã¼ãã«ããªãã¯ã¯ãæ°å¦çãªå¤æã§å ã®ãã¼ã¿ã»ãããæ°ãã空éã«ç§»åãããã¨ã§ã決å®å¢çãç·å½¢ã§ãè¨è¿°ããããããã¾ãã
ä½ç¨®é¡ãã®ã«ã¼ãã«ã§åé¡ã試ããçµæãæ¯è¼ãã¦ã¿ã¾ãã
svm
é¢æ°ã§ã¯ãå¼æ°ã§å©ç¨ããã«ã¼ãã«ãå¤æ´ã§ããã®ã§ãããã使ãã¾ãã
> df <- df[, c('X', 'Y', 'Label')] # ç·å½¢ã«ã¼ãã« > linear.svm.fit <- svm(Label ~ X + Y, data = df, kernel = 'linear') # å¤é å¼ã«ã¼ãã« > polynomial.svm.fit <- svm(Label ~ X + Y, data = df, kernel = 'polynomial') # ã¬ã¦ã¹ã«ã¼ãã« > radial.svm.fit <- svm(Label ~ X + Y, data = df, kernel = 'radial') # ã·ã°ã¢ã¤ãã«ã¼ãã« > sigmoid.svm.fit <- svm(Label ~ X + Y, data = df, kernel = 'sigmoid') > df <- cbind(df, data.frame(LinearSVM = ifelse(predict(linear.svm.fit) > 0, 1, 0), PolynomialSVM = ifelse(predict(polynomial.svm.fit) > 0, 1, 0), RadialSVM = ifelse(predict(radial.svm.fit) > 0, 1, 0), SigmoidSVM = ifelse(predict(sigmoid.svm.fit) > 0, 1, 0))) > predictions <- melt(df, id.vars = c('X', 'Y')) > ggplot(predictions, aes(x = X, y = Y, color = factor(value))) + geom_point() + facet_grid(variable ~ .) > ggsave(filename="plot03.png")
ç·å½¢ã«ã¼ãã«ãå¤é å¼ã«ã¼ãã«ã¯ãã¸ã¹ãã£ãã¯å帰ã¨åæ§ããã¾ãåé¡ã§ãã¦ããªãæãã ã¬ã¦ã¹ã«ã¼ãã«ã¯ãæ£è§£ã«è¿ãå¢çãçæã§ãã¦ãã¾ãã
ä»åº¦ã¯ããã©ã¡ã¼ã¿ãå¤ãããã¿ã¼ã³ã試ãã¦ã¿ã¾ãã å¤é å¼ã«ã¼ãã«ã§ã¯ã次æ°ããã©ã¡ã¼ã¿ã¨ãã¦æå®ã§ããã®ã§ã3,5,10,12ã®ãã¿ã¼ã³ã§å¤å®ãè¡ã£ã¦ã¿ã¾ãã
# 次æ°ãå¤æ´ãããã¿ã¼ã³ > polynomial.degree3.svm.fit <- svm( Label ~ X + Y, data = df, kernel = 'polynomial', degree = 3) > polynomial.degree5.svm.fit <- svm( Label ~ X + Y, data = df, kernel = 'polynomial', degree = 5) > polynomial.degree10.svm.fit <- svm( Label ~ X + Y, data = df, kernel = 'polynomial', degree = 10) > polynomial.degree12.svm.fit <- svm( Label ~ X + Y, data = df, kernel = 'polynomial', degree = 12) > df <- df[, c('X', 'Y', 'Label')] > df <- cbind(df, data.frame( Degree3SVM = ifelse(predict(polynomial.degree3.svm.fit) > 0, 1, 0), Degree5SVM = ifelse(predict(polynomial.degree5.svm.fit) > 0, 1, 0), Degree10SVM = ifelse(predict(polynomial.degree10.svm.fit) > 0, 1, 0), Degree12SVM = ifelse(predict(polynomial.degree12.svm.fit) > 0, 1, 0) )) > predictions <- melt(df, id.vars = c('X', 'Y')) > ggplot(predictions, aes(x = X, y = Y, color = factor(value))) + geom_point() + facet_grid(variable ~ .) > ggsave(filename="plot04.png")
次æ°ã3,5ã®å ´åã¯ãæå®ãªãã®å ´åã¨å¤ããã¾ãããã10,12ã ã¨å°ãå¤å®ã§ããããã«ãªã£ã¦ãã¾ãã
次ã¯ãã¬ã¦ã¹ã«ã¼ãã«ã® cost
ãã©ã¡ã¼ã¿ãå¤ãã¦ã¿ã¾ãã
cost
ã¯æ£ååã®å¼·ãã示ããã©ã¡ã¼ã¿ã§ãå¤ã大ããããã¨æ£ååãå¼·ãåããè¨ç·´ãã¼ã¿ã«å½ã¦ã¯ã¾ãã«ãããªãã¾ãã
> radial.cost1.svm.fit <- svm(Label ~ X + Y, data = df, kernel = 'radial', cost = 1) > radial.cost2.svm.fit <- svm(Label ~ X + Y, data = df, kernel = 'radial', cost = 2) > radial.cost3.svm.fit <- svm(Label ~ X + Y, data = df, kernel = 'radial', cost = 3) > radial.cost4.svm.fit <- svm(Label ~ X + Y, data = df, kernel = 'radial', cost = 4) > df <- df[, c('X', 'Y', 'Label')] > df <- cbind(df, data.frame(Cost1SVM = ifelse(predict(radial.cost1.svm.fit) > 0, 1, 0), Cost2SVM = ifelse(predict(radial.cost2.svm.fit) > 0, 1, 0), Cost3SVM = ifelse(predict(radial.cost3.svm.fit) > 0, 1, 0), Cost4SVM = ifelse(predict(radial.cost4.svm.fit) > 0, 1, 0))) > predictions <- melt(df, id.vars = c('X', 'Y')) > ggplot(predictions, aes(x = X, y = Y, color = factor(value))) + geom_point() + facet_grid(variable ~ .) > ggsave(filename="plot05.png")
æå¾ã«ã ã·ã°ã¢ã¤ãã«ã¼ãã«ã® gamma
ãã©ã¡ã¼ã¿ã試ãã¦çµããã
> sigmoid.gamma1.svm.fit <- svm(Label ~ X + Y, data = df, kernel = 'sigmoid', gamma = 1) > sigmoid.gamma2.svm.fit <- svm(Label ~ X + Y, data = df, kernel = 'sigmoid', gamma = 2) > sigmoid.gamma3.svm.fit <- svm(Label ~ X + Y, data = df, kernel = 'sigmoid', gamma = 3) > sigmoid.gamma4.svm.fit <- svm(Label ~ X + Y, data = df, kernel = 'sigmoid', gamma = 4) > df <- df[, c('X', 'Y', 'Label')] > df <- cbind(df, data.frame(Gamma1SVM = ifelse(predict(sigmoid.gamma1.svm.fit) > 0, 1, 0), Gamma2SVM = ifelse(predict(sigmoid.gamma2.svm.fit) > 0, 1, 0), Gamma3SVM = ifelse(predict(sigmoid.gamma3.svm.fit) > 0, 1, 0), Gamma4SVM = ifelse(predict(sigmoid.gamma4.svm.fit) > 0, 1, 0))) > predictions <- melt(df, id.vars = c('X', 'Y')) > ggplot(predictions, aes(x = X, y = Y, color = factor(value))) + geom_point() + facet_grid(variable ~ .) > ggsave(filename="plot06.png")
ã¬ã³ããå¤ããã¨æ±ºå®å¢çã®å½¢ãå¤ããã¾ãã(ã¨ãããæ¸ãã¦ããã¦ãªãã»ã»ã»)
ã¢ã«ã´ãªãºã ãæ¯è¼ãã
SVNã¨ãããã¾ã§ã«å¦ç¿ããããã¸ã¹ãã£ãã¯å帰/kè¿åæ³ããåããã¼ã¿ã»ããã«å½ã¦ã¯ãã¦æ¯è¼ãã¦ã¿ã¾ãã
# 対象ãã¼ã¿ã®èªã¿è¾¼ã¿ã¨ã¯ãªã¼ãã³ã° > load(file.path('data', 'dtm.RData')) > set.seed(1) # è¨ç·´ãã¼ã¿ã¨ãã¹ããã¼ã¿ã«åå² > training.indices <- sort(sample(1:nrow(dtm), round(0.5 * nrow(dtm)))) > test.indices <- which(! 1:nrow(dtm) %in% training.indices) > train.x <- dtm[training.indices, 3:ncol(dtm)] > train.y <- dtm[training.indices, 1] > test.x <- dtm[test.indices, 3:ncol(dtm)] > test.y <- dtm[test.indices, 1] > rm(dtm)
ã¾ãã¯ããã¸ã¹ãã£ãã¯å帰ã
> library('glmnet') > regularized.logit.fit <- glmnet(train.x, train.y, family = c('binomial'))
æé©ãª Lambda
ã®å¤ãã6ç« ã¨åæ§ã®æé ã§æ±ãã¾ãã
> lambdas <- regularized.logit.fit$lambda > performance <- data.frame() > for (lambda in lambdas) { predictions <- predict(regularized.logit.fit, test.x, s = lambda) predictions <- as.numeric(predictions > 0) mse <- mean(predictions != test.y) performance <- rbind(performance, data.frame(Lambda = lambda, MSE = mse)) } > ggplot(performance, aes(x = Lambda, y = MSE)) + geom_point() + scale_x_log10() > ggsave(filename="lambda.png")
1e-03
ããããããããã min
ã使ã£ã¦MSEãæå°ã«ãªã Lambda
ãè¨ç®ãã¾ãã
# MSEãæå°ã«ãªã `Lambda` ãè¨ç®ã # ä»åã®ãã¼ã¿ã ã¨æå°ã«ãªããã®ã2ã¤åå¨ããã®ã§ãmax ã使ã£ã¦å¤§ãã(=ããæ£ååãå³ãã)æ¹ãé¸æãã¦ãã > best.lambda <- with(performance, max(Lambda[which(MSE == min(MSE))]))
Lambda
ã決ã¾ã£ãã®ã§ãæçµçãªMSEãè¨ç®ãã¾ãã
> mse <- with(subset(performance, Lambda == best.lambda), MSE) [1] 0.06830769
0.06 ã«ãªãã¾ããã
次ã¯ãç·å½¢ã«ã¼ãã«SVMã
> library('e1071') > linear.svm.fit <- svm(train.x, train.y, kernel = 'linear') > predictions <- predict(linear.svm.fit, test.x) > predictions <- as.numeric(predictions > 0) > > mse <- mean(predictions != test.y) > mse [1] 0.128
0.12 ã§ããã¸ã¹ãã£ãã¯ã¹å帰ãããæªãçµæã«ãªãã¾ããã
次ãã¬ã¦ã¹ã«ã¼ãã«SVMã
> linear.svm.fit <- svm(train.x, train.y, kernel = 'radial') > predictions <- predict(linear.svm.fit, test.x) > predictions <- as.numeric(predictions > 0) > mse <- mean(predictions != test.y) > mse [1] 0.1421538
ä»åã®ãã¼ã¿ã»ããã§ã¯ããã¸ã¹ãã£ãã¯å帰ããç·å½¢ã«ã¼ãã«SVMãããæªãçµæã«ãªãã¾ããã ããã¯ããã®ãã¼ã¿ã§ã¯çæ³çãªæ±ºå®å¢çãç·å½¢ã«è¿ãå¯è½æ§ã示åãã¦ãã¾ãã
æå¾ã¯ãkè¿åæ³ã§ãã
> library('class') > knn.fit <- knn(train.x, test.x, train.y, k = 50) > predictions <- as.numeric(as.character(knn.fit)) > mse <- mean(predictions != test.y) > mse [1] 0.1396923
kã®å¤ã5ï½50ã§å¤ãã¦ãä¸çªè¯ãå¤ã使ããããã¥ã¼ãã³ã°ãã¦ã¿ã¾ãã
> performance <- data.frame() # 5ï½50ã§kã®å¤ãå¤ãã¦è©¦è¡ > for (k in seq(5, 50, by = 5)) { knn.fit <- knn(train.x, test.x, train.y, k = k) predictions <- as.numeric(as.character(knn.fit)) mse <- mean(predictions != test.y) performance <- rbind(performance, data.frame(K = k, MSE = mse)) } # ãã£ã¨è¯ãkã®å¤ãåãåºãã > best.k <- with(performance, K[which(MSE == min(MSE))]) > best.mse <- with(subset(performance, K == best.k), MSE) > best.mse [1] 0.09169231
ãã¥ã¼ãã³ã°ã®çµæã0.09ã¾ã§æ¹åãã¾ããã
ã¨ãããã¨ã§ããã®åé¡ã«å¯¾ãã¦ã¯ãã¸ã¹ãã£ãã¯å帰ãä¸çªé©ãã¦ããã¨ããçµè«ã«ãªãã¾ããã
以ä¸ãããå¾ãããæè¨ã
- å®éã®ãã¼ã¿ã»ããã«åãçµãã¨ãã¯ãè¤æ°ã®ã¢ã«ã´ãªãºã ã試ããæ¹ãè¯ãã
- æé©ãªã¢ã«ã´ãªãºã ã¯ãåé¡ã®æ§é ã«ä¾åããã
- ã¢ãã«ã®æ§è½ã¯ããã©ã¡ã¼ã¿ã«ãä¾åãããè¯ãçµæãå¾ãããã°ãã©ã¡ã¼ã¿ã®èª¿æ´ã«ãæéãããã
ææ³
- å帰åæã¨ããæé©åã«ã¤ãã¦ãå ¥éãããã¯ã§ããããªã
- çè«ããããã©ãå ·ä½çã«ãã¼ã¿ã触ã£ã¦ãåããã¦å¦ã³ããããã°ã©ãåãã«ã¯æªããªãæ¬ãã¨ã(è£è¡¨ç´ã«ããã®ãããªã³ã³ã»ããã®æ¬ã§ãã¨æ¸ããã¦ãã¾ãã)
- ãã ãAmazonã®æ¸è©ã®éããçè«ç説æã¯ãã¾ããªãã®ã§ãããã¯å¥ã®æ¬ãå¿
è¦ãªæãã§ãã
- ãã®æ¬ã®ç¥èã ã㧠Wikipedia ã¨ãã«è¡ã㨠ï¾ï½§ï½¯ ã¦ãªãã¾ãã
- æ©æ¢°å¦ç¿èªä½ã¯é¢ç½ãã£ããããã使ã£ã¦ä½ãä½ãããã
ããã¾ãã
æ©æ¢°å¦ç¿æç¿ã: ã½ã¼ã·ã£ã«ã°ã©ãã®åæ
ãå ¥é æ©æ¢°å¦ç¿ãæç¿ãã11æ¥ç®ãã11ç« ã½ã¼ã·ã£ã«ã°ã©ãã®åæãã§ãã
Twitterã®ã½ã¼ã·ã£ã«ã°ã©ãã®å¯è¦åãããããã°ã©ããããã¹ã¹ã¡ã®åéãæ¨è¦ããã·ã¹ãã ãä½ãã¾ãã
# åæºå > setwd("11-SNA/")
ãã¼ã«ã«ã³ãã¥ããã£æ§é ã®å¯è¦å
æåã®ä¾ã§ã¯ãã¦ã¼ã¶ã¼ johnmyleswhite
ã®ãã©ãã¯ã¼ããã©ã®ãããªã³ãã¥ããã£æ§é ãæã£ã¦ããããåæãã¾ãã
ã¦ã¼ã¶ã¼ johnmyleswhite
ã¨ãã®ã¦ã¼ã¶ã¼ãç´æ¥ãã©ãã¼ãã¦ããã¦ã¼ã¶ã¼ã®ã°ã©ã(ã¦ã¼ã¶ã¼ãä¸å¿ã¨ããã¨ã´ãããã¯ã¼ã¯ã¨å¼ã³ã¾ã)ãèªã¿è¾¼ã¿ã
åãã©ãã¯ã¼éã®è·é¢ãç®åºãããããã¨ã« hclust
ã§é層çã¯ã©ã¹ã¿ãªã³ã°ãè¡ãã¾ãã
# ã°ã©ããã¼ã¿ã®èªã¿è¾¼ã¿ > user <- 'johnmyleswhite' > user.ego <- read.graph("data/johnmyleswhite/johnmyleswhite_ego.graphml", format='graphml') # ãã¼ãéã®è·é¢ãç®åº > user.sp <- shortest.paths(user.ego) # é層çã¯ã©ã¹ã¿ãªã³ã°ã§ããã©ãã¯ã¼ã®ã³ãã¥ããã£æ§é ãç®åº > user.hc <- hclust(dist(user.sp)) # å¯è¦å > png(paste('../images/', user, '_dendrogram.png', sep=''), width=1680, height=1050) > plot(user.hc) > dev.off()
ã°ã©ãããããã£ããã¨2ã¤ã®å¤§ããªã³ãã¥ããã£ãããããã®ä¸ã«ããã«å°ããªãµãã³ãã¥ããã£ãããæ§æã«ãªã£ã¦ãããã¨ãããããã¨ã
ã°ã©ããã¼ã¿ãããã¹ã¹ã¡ã®åéãæ¨è¦ãã
ãåéã®åéãã¯åéã«ãªã確çãé«ããã®ä»®å®ã®ãã¨ãã°ã©ããã¼ã¿ãããã¹ã¹ã¡ã®åéãæ¨è¦ãã¦ã¿ã¾ãã
ã¾ãã¯ãã°ã©ããã¼ã¿ã®èªã¿è¾¼ã¿ã
# ãã¹ã¹ã¡ãã©ãã¯ã¼ãæ¨è¦ãã対象ã¨ããã¦ã¼ã¶ã¼å > user <- "drewconway" # ã°ã©ããã¼ã¿ã®èªã¿è¾¼ã¿ > user.graph <- suppressWarnings(read.graph(paste("data/", user, "/", user, "_net.graphml", sep = ""), format = "graphml"))
ã°ã©ãããããåéã®åéããåéåè£ã¨ãã¦åãåºãã¾ãã ãå¤ãã®åéãåéã¨ãã¦ããåè£ã¯é©æ§ãé«ããã¨ã¿ãªãã¦é ä½ã¥ããã¦ãã½ã¼ãã
# "drewconway" ããã©ãã¼ãã¦ããã¦ã¼ã¶ã¼(=åé)ã®ä¸è¦§ãåãåºã > friends <- V(user.graph)$name[neighbors(user.graph, user, mode = "out") + 1] [1] "311nyc" "aaronkoblin" "abumuqawama" "acroll" "adamlaiacano" [6] "aeromax" # ã°ã©ãã®ã¨ãã¸ã®ä¸è¦§ãåãåºã > user.el <- get.edgelist(user.graph) > head(user.el) [,1] [,2] [1,] "drewconway" "311nyc" [2,] "drewconway" "aaronkoblin" [3,] "drewconway" "abumuqawama" [4,] "drewconway" "acroll" [5,] "drewconway" "adamlaiacano" [6,] "drewconway" "aeromax" # åéã®åéã2çªç®ã®è¦ç´ (ã¿ã¼ã²ãã)ã«å«ã¾ããè¡ãåãåºãã # ãã ãããã§ã«ãã©ãã¼æ¸ã¿(=åé)ã«ãªã£ã¦ããã¦ã¼ã¶ã¼ã¯é¤ã > non.friends <- sapply(1:nrow(user.el), function(i) { ifelse(any(user.el[i,] == user | !user.el[i,1] %in% friends) | user.el[i,2] %in% friends, FALSE, TRUE) }) > non.friends.el <- user.el[which(non.friends == TRUE),] > head(non.friends.el) [,1] [,2] [1,] "000988" "1000timesyes" [2,] "000988" "10ch" [3,] "000988" "1mrankhan" [4,] "000988" "1ndus" [5,] "000988" "500startups" [6,] "000988" "_hoffman" # åéåè£ãã¨ã®åéã®æ°ãéè¨ > friends.count <- table(non.friends.el[,2]) > head(friends.count) ___emma __damonwang__ __dave __davidflanagan __iriss 1 1 2 3 1 __neha 1 # ãã¼ã¿ãã¬ã¼ã ã«å¤æ > friends.followers <- data.frame(list(Twitter.Users = names(friends.count), Friends.Following=as.numeric(friends.count)), stringsAsFactors = FALSE) > head(friends.followers) Twitter.Users Friends.Following 1 ___emma 1 2 __damonwang__ 1 3 __dave 2 4 __davidflanagan 3 5 __iriss 1 6 __neha 1 # åéåè£ã¨ãã¦ã®æé©åº¦ã示ãææ¨ã¨ãã¦ãååéåè£ããã©ãã¼ãã¦ããåéæ¯çãè¨ç®ãã¦ä½¿ãã # å¤ãã®åéãåéã¨ãã¦ããåè£ã¯é©æ§ãé«ãã¨ã¿ãªãã > friends.followers$Friends.Norm <- friends.followers$Friends.Following / length(friends) > head(friends.followers) Twitter.Users Friends.Following Friends.Norm 1 ___emma 1 0.003816794 2 __damonwang__ 1 0.003816794 3 __dave 2 0.007633588 4 __davidflanagan 3 0.011450382 5 __iriss 1 0.003816794 6 __neha 1 0.003816794 # ãå§ã度ã®ææ¨ã§ã½ã¼ã > friends.followers <- friends.followers[with(friends.followers, order(-Friends.Norm)),]
ãã¼ã¿ãã§ããã®ã§ããå§ã度é ã«ä¸ä½6人ã表示ãã¦ã¿ã¾ãã
# ä¸ä½6人ãåå¾ã > head(friends.followers) Twitter.Users Friends.Following Friends.Norm 13388 cshirky 80 0.3053435 21403 fredwilson 58 0.2213740 6950 bigdata 57 0.2175573 14062 dangerroom 57 0.2175573 55153 shitmydadsays 55 0.2099237 2025 al3x 54 0.2061069