A Code for Models
A.1 Generalized Linear Model
Xtrainsub <-
Xtrain %>%
filter(as.integer(as.factor(gameid)) < 5)
priormod <- glm(formula = result ~ log(r) + theta,
data = Xtrainsub,
family = "binomial")
mu0r <- summary(priormod)[["coefficients"]]["log(r)","Estimate"]
mu0theta <- summary(priormod)[["coefficients"]]["theta","Estimate"]
fit_glm <- function(dat, S = 10000, B = 500){
model.glm <- function(){
# Liklihood function (for N observations)
for(i in 1:N){
logit(prob[i]) <- beta_int*int[i] +
beta_home*home[i] +
beta_r*logr[i] +
beta_theta*theta[i]
result[i] ~ dbern(prob[i])
}
# Priors
# we expect less variation in the distance parameter,
# because shot success rate should get worse
# as distance increases under baseline circumstances.
beta_int ~ dnorm(0, 0.1)
beta_home ~ dnorm(0, 0.1)
beta_r ~ dnorm(mu0r, 0.01)
beta_theta ~ dnorm(mu0theta, 0.1)
}
datlist.glm <-
list(
int = rep(1, nrow(dat)),
logr = log(dat$r),
theta = dat$theta,
result = dat$result,
home = dat$home,
N = nrow(dat),
mu0r = mu0r,
mu0theta = mu0theta
)
params.glm <- c("beta_int",
"beta_home",
"beta_r",
"beta_theta")
initslist <- list(list("beta_int"=0,
"beta_r"=0,
"beta_theta"=0,
"beta_home"=0))
sim <-
jags(data = datlist.glm,
n.chains = 1, n.iter = S, n.burnin = B, n.thin = 1,
inits=initslist,
parameters.to.save = params.glm,
model.file=model.glm
)
sim.mcmc <- as.data.frame(as.mcmc(sim)[[1]])
# Changing from a baseline mean + a shift amount
# to two different means based on the type of game.
sim.mcmc <-
sim.mcmc %>%
mutate(beta_intA = beta_int,
beta_intH = beta_int + beta_home) %>%
select(beta_intA, beta_intH, beta_r, beta_theta)
return(sim.mcmc)
}
A.2 Hierarchical Generalized Linear Model
Xtrainsub <-
Xtrain %>%
filter(as.integer(as.factor(gameid)) < 5)
priormod <- glm(formula = result ~ log(r) + theta,
data = Xtrainsub,
family = "binomial")
mu0r <- summary(priormod)[["coefficients"]]["log(r)","Estimate"]
mu0theta <- summary(priormod)[["coefficients"]]["theta","Estimate"]
fit_players <- function(dat = NA, S = 10000, B = 500){
model.player <- function(){
# Likelihood function for N observations
for(i in 1:N){
# the parameters now vary by player id.
logit(prob[i]) <- beta_int[player[i]]*int[i] +
beta_home[player[i]]*home[i] +
beta_r[player[i]]*logr[i] +
beta_theta[player[i]]*theta[i]
result[i] ~ dbern(prob[i])
}
# Priors
for(j in 1:M){
beta_int[j] ~ dnorm(beta_int0,tau_int)
beta_home[j] ~ dnorm(beta_home0, tau_int)
beta_r[j] ~ dnorm(beta_r0,tau_r)
beta_theta[j] ~ dnorm(beta_theta0,tau_theta)
}
# Hyperpriors
beta_int0 ~ dnorm(0, 0.1)
beta_home0 ~ dnorm(0, 0.1)
beta_r0 ~ dnorm(mu0r, 0.01)
beta_theta0 ~ dnorm(mu0theta, 0.1)
tau_int ~ dgamma(10, 100)
tau_r ~ dgamma(10, 0.2)
tau_theta ~ dgamma(10, 10)
}
datlist.player <-
list(
logr = log(dat$r),
theta = dat$theta,
home = dat$home,
result = dat$result,
player = as.integer(as.factor(dat$globalplayerid)),
N = nrow(dat),
int = rep(1, nrow(dat)),
M = n_distinct(dat$globalplayerid),
mu0r = mu0r,
mu0theta = mu0theta
)
# we want posteriors for the overall effects
# and for the individual player effects
params <- c("beta_int",
"beta_home",
"beta_r",
"beta_theta",
"beta_int0",
"beta_home0",
"beta_r0",
"beta_theta0")
M <- datlist.player$M
initslist <- list(
list("beta_int"=rep(0,M),
"beta_home"=rep(0,M),
"beta_r"=rep(0,M),
"beta_theta"=rep(0,M),
"beta_int0"=0,
"beta_home0"=0,
"beta_r0"=0,
"beta_theta0"=0,
"tau_int"=1,
"tau_r"=1,
"tau_theta"=1
))
sim.player <-
jags(data = datlist.player,
n.iter = S, n.chains = 1, n.burnin = B, n.thin = 1,
inits=initslist,
parameters.to.save = params,
model.file=model.player
)
sim.mcmc.player <- as.data.frame(as.mcmc(sim.player)[[1]])
# Changing from a baseline mean + a shift amount
# to two different means based on the type of game.
hometext <- paste0("`beta_intH[",1:M,"]` =
`beta_int[",1:M,"]` +
`beta_home[",1:M,"]`",
collapse=",\n")
awaytext <- paste0("`beta_intA[",1:M,"]` =
`beta_int[",1:M,"]`",
collapse=",\n")
sim.mcmc.player <- eval(parse(text=
paste0("sim.mcmc.player %>%
mutate(",hometext,",
beta_intH0 = beta_int0 + beta_home0)","%>%
rename(",awaytext,",
beta_intA0 = beta_int0)"
))) %>%
select(grep("(beta_int)|(beta_theta)|(beta_r)",names(.)))
colorder <- order(colnames(sim.mcmc.player))
sim.mcmc.player <- sim.mcmc.player[ , colorder]
# Renaming mixed effects columns from default factor levels
# (integers) to the corresponding player ids
factorids <-
str_extract_all(names(sim.mcmc.player), "[[:digit:]]+") %>%
as.numeric()
fids <- data.frame(factorid = factorids,
order = 1:length(factorids))
datmap <- dat %>%
mutate(factorid = as.integer(as.factor(globalplayerid))) %>%
select(globalplayerid, factorid)
gameids <- merge(datmap, fids, all.x=FALSE,all.y=TRUE) %>%
unique() %>%
mutate(globalplayerid = ifelse(is.na(globalplayerid),
0,
globalplayerid)) %>%
arrange(order)
names(sim.mcmc.player) <-
str_replace_all(names(sim.mcmc.player), "[[:digit:]]+",
as.character(gameids$globalplayerid))
return(sim.mcmc.player)
}
A.3 Discounted Likelihood Hierarchical Model
fit_game <- function(dat = NA, g0 = NA, delta = NA,
S = 10000, B = 500){
model.game <- function(){
for(i in 1:N){
# delta = discount rate for game g relative to anchor game g0
wt[i] <- delta^abs(games[i]-g0)
# player-level random effects
logit(prob[i]) <- beta_int[player[i]]*int[i] +
beta_home[player[i]]*home[i] +
beta_r[player[i]]*logr[i] +
beta_theta[player[i]]*theta[i]
# likelihood function
p1[i] <- prob[i]^result[i]
p2[i] <- (1-prob[i])^(1-result[i])
# discounted likelihood function
pi[i] <- (p1[i] * p2[i])^wt[i]
# defines correct discounted likelihood function
y[i] ~ dbern(pi[i])
}
# Priors
for(j in 1:M){
beta_int[j] ~ dnorm(beta_int0,tau_int)
beta_home[j] ~ dnorm(beta_home0, tau_int)
beta_r[j] ~ dnorm(beta_r0,tau_r)
beta_theta[j] ~ dnorm(beta_theta0,tau_theta)
}
# Hyperoriors
beta_int0 ~ dnorm(0, 0.1)
beta_home0 ~ dnorm(0, 0.1)
beta_r0 ~ dnorm(mu0r, 0.01)
beta_theta0 ~ dnorm(mu0theta, 0.1)
tau_int ~ dgamma(10, 100)
tau_r ~ dgamma(10, 0.2)
tau_theta ~ dgamma(10, 10)
}
datlist.game <-
list(
int = rep(1, nrow(dat)),
logr = log(dat$r),
theta = dat$theta,
result = dat$result,
home = dat$home,
player = as.integer(as.factor(dat$globalplayerid)),
N = nrow(dat),
M = n_distinct(dat$globalplayerid),
mu0r = mu0r,
mu0theta = mu0theta,
delta = delta,
games = as.integer(as.factor(dat$gameid)),
g0 = g0,
y = rep(1, nrow(dat))
)
params <- c("beta_int",
"beta_r",
"beta_home",
"beta_theta",
"beta_int0",
"beta_home0",
"beta_r0",
"beta_theta0")
M <- n_distinct(dat$globalplayerid)
initslist <- list(list("beta_int"=rep(0,M),
"beta_r"=rep(0,M),
"beta_theta"=rep(0,M),
"beta_int0"=0,
"beta_r0"=0,
"beta_theta0"=0,
"tau_int"=1,
"tau_r"=1,
"tau_theta"=1
))
sim.game <- jags(data = datlist.game,
n.iter = S, n.chains = 1, n.burnin = B, n.thin = 1,
inits = initslist,
parameters.to.save = params,
model.file=model.game
)
sim.mcmc.game <- as.data.frame(as.mcmc(sim.game)[[1]])
# Changing from a baseline mean + a shift amount
# to two different means based on the type of game.
hometext <- paste0("`beta_intH[",1:M,"]` =
`beta_int[",1:M,"]` +
`beta_home[",1:M,"]`",
collapse=",\n")
awaytext <- paste0("`beta_intA[",1:M,"]` =
`beta_int[",1:M,"]`",
collapse=",\n")
sim.mcmc.game <- eval(parse(text=
paste0("sim.mcmc.game %>%
mutate(",hometext,",
beta_intH0 = beta_int0 + beta_home0)"," %>%
rename(",awaytext,",
beta_intA0 = beta_int0)"
))) %>%
select(grep("(beta_int)|(beta_theta)|(beta_r)",names(.)))
colorder <- order(colnames(sim.mcmc.game))
sim.mcmc.game <- sim.mcmc.game[ , colorder]
# Renaming mixed effects columns from default factor levels
# (integers) to the corresponding player ids
factorids <-
str_extract_all(names(sim.mcmc.game), "[[:digit:]]+") %>%
as.numeric()
fids <- data.frame(factorid = factorids,
order = 1:length(factorids))
datmap <- dat %>%
mutate(factorid = as.integer(as.factor(globalplayerid))) %>%
select(globalplayerid, factorid)
gameids <- merge(datmap, fids, all.x=FALSE,all.y=TRUE) %>%
unique() %>%
mutate(globalplayerid = ifelse(is.na(globalplayerid),
0,
globalplayerid)) %>%
arrange(order)
names(sim.mcmc.game) <-
str_replace_all(names(sim.mcmc.game),
"[[:digit:]]+",
as.character(gameids$globalplayerid))
return(sim.mcmc.game)
}