51. 2013.07.20 第32回R勉強会@東京(#TokyoR) 51
sqldfパッケージで全商品の組合せの調整残差計算
library(sqldf)
d <- read.table("購買履歴データ.txt", header=T) # データフレームで読込み 1列目:CustID、2列目:ItemID、3列目:Qty ※Qtyは未使用
res <- sqldf("
select T1.ItemX as ItemX, T1.ItemY as ItemY,
coalesce(T2.CustCount, 0) as BuyX, coalesce(T3.CustCount, 0) as BuyY, coalesce(T4.CustCount, 0) as BuyXY
from (
--商品の組合せ n(n-1)/2 通り
select T1.ItemId as ItemX, T2.ItemId as ItemY
from (select ItemID from d group by ItemID) as T1
inner join (select ItemID from d group by ItemID) as T2
on T1.ItemId < T2.ItemId
) as T1
--商品Xの購買者数
left outer join (select T1.ItemId as ItemX, count(T1.CustID) as CustCount from d as T1 group by T1.ItemId) as T2
on T1.ItemX = T2.ItemX
--商品Yの購買者数
left outer join (select T1.ItemId as ItemY, count(T1.CustID) as CustCount from d as T1 group by T1.ItemId) as T3
on T1.ItemY = T3.ItemY
--商品XYの購買者数
left outer join (
select T1.ItemId as ItemX, T2.ItemId as ItemY, count(T1.CustID) as CustCount
from d as T1
inner join d as T2
on T1.CustID = T2.CustID
and T1.ItemId < T2.ItemId
group by T1.ItemId, T2.ItemId
) as T4
on T1.ItemX = T4.ItemX
and T1.ItemY = T4.ItemY
")
res$n <- as.numeric(sqldf("select count(CustID) as n from (select CustID from d group by CustID)")) # ユニークモニターID
res$BuyXY.est <- res$BuyX * res$BuyY / res$n # 併買者の期待数
res$BuyXY.stdres <- (res$BuyXY - res$BuyXY.est) / sqrt(res$BuyXY.est * (1 - res$BuyX / res$n) * (1 - res$BuyY / res$n)) # 調整残差
res$support <- res$BuyXY / res$n # アソシエーション分析の指示度
res$lift <- (res$BuyXY * res$n) / (res$BuyX * res$BuyY) # アソシエーション分析のリフト 1~2秒で終わります
52. 2013.07.20 第32回R勉強会@東京(#TokyoR) 52
sqldfパッケージで全商品の組合せの調整残差計算
library(sqldf)
d <- read.table("購買履歴データ.txt", header=T) # データフレームで読込み 1列目:CustID、2列目:ItemID、3列目:Qty ※Qtyは未使用
res <- sqldf("
select T1.ItemX as ItemX, T1.ItemY as ItemY,
coalesce(T2.CustCount, 0) as BuyX, coalesce(T3.CustCount, 0) as BuyY, coalesce(T4.CustCount, 0) as BuyXY
from (
--商品の組合せ n(n-1)/2 通り
select T1.ItemId as ItemX, T2.ItemId as ItemY
from (select ItemID from d group by ItemID) as T1
inner join (select ItemID from d group by ItemID) as T2
on T1.ItemId < T2.ItemId
) as T1
--商品Xの購買者数
left outer join (select T1.ItemId as ItemX, count(T1.CustID) as CustCount from d as T1 group by T1.ItemId) as T2
on T1.ItemX = T2.ItemX
--商品Yの購買者数
left outer join (select T1.ItemId as ItemY, count(T1.CustID) as CustCount from d as T1 group by T1.ItemId) as T3
on T1.ItemY = T3.ItemY
--商品XYの購買者数
left outer join (
select T1.ItemId as ItemX, T2.ItemId as ItemY, count(T1.CustID) as CustCount
from d as T1
inner join d as T2
on T1.CustID = T2.CustID
and T1.ItemId < T2.ItemId
group by T1.ItemId, T2.ItemId
) as T4
on T1.ItemX = T4.ItemX
and T1.ItemY = T4.ItemY
")
res$n <- as.numeric(sqldf("select count(CustID) as n from (select CustID from d group by CustID)")) # ユニークモニターID
res$BuyXY.est <- res$BuyX * res$BuyY / res$n # 併買者の期待数
res$BuyXY.stdres <- (res$BuyXY - res$BuyXY.est) / sqrt(res$BuyXY.est * (1 - res$BuyX / res$n) * (1 - res$BuyY / res$n)) # 調整残差
res$support <- res$BuyXY / res$n # アソシエーション分析の指示度
res$lift <- (res$BuyXY * res$n) / (res$BuyX * res$BuyY) # アソシエーション分析のリフト
商品の組み合わせ
各商品の購買者数
併買者数
調整残差、アソシエーション分析の指標を計算