Motivations

The motivation for this model is typically when dealing with survey data it is nice to be able to discriminate between the “student” difficulty (\(\alpha\)), the question difficulty (\(\beta\)) and the discrimination of the questions \(\gamma\).

Data Generating Process

\[P(y_n = 1) = logit^{-1}(\gamma_{kk|n|}(\alpha_{jj|n|} -\beta_{kk|n|}))\]

J <- 30 # Students
K <- 10 # Questions
N <- J * K
alpha <- runif(K, .5, 2) #slopes
beta <- runif(K, -2, 3) # intercepts
theta_mu <- 0 # population mean of person ability
theta_sig <- 1 # population sd of  person ability
theta <-rnorm(J, theta_mu, theta_sig) # generate 500 ability parameters
slope.ability <-outer(theta, alpha) # multiply the slope vector by the ability vector
intercept <- matrix(rep(beta, J), nrow = J, byrow = TRUE)
prob <- plogis(intercept + slope.ability) # 1/(1 + exp(.))
data <-ifelse(runif(N) < prob, 1, 0) # generate matrix of Bernoulli 0-1 responses

Now we can format our data.

tidy_data <- data %>% 
  as_tibble() %>% 
  mutate(person_id = row_number()) %>% 
  gather(item, response, -person_id) %>% 
  mutate(item = as.numeric(as.factor(item)))
## Warning: `as_tibble.matrix()` requires a matrix with column names or a `.name_repair` argument. Using compatibility `.name_repair`.
## This warning is displayed once per session.

Model

Now we can build our Stan model using this reference.

Stan

writeLines(readLines("stan_2pl.stan"))
// 2PL IRT in Stan
// Modified from the Stan Users Guide <https://mc-stan.org/docs/>
data {
  int<lower=1> J;              // number of students
  int<lower=1> K;              // number of questions
  int<lower=1> N;              // number of observations
  int<lower=1,upper=J> jj[N];  // student for observation n
  int<lower=1,upper=K> kk[N];  // question for observation n
  int<lower=0,upper=1> y[N];   // correctness for observation n
}

parameters {
  real mu_beta;                // mean question difficulty
  vector[J] alpha;             // ability for j - mean
  vector[K] beta;              // difficulty for k
  vector<lower=0>[K] gamma;    // discrimination of k
  //real<lower=0> sigma_beta;    // scale of difficulties
  //real<lower=0> sigma_gamma;   // scale of log discrimination
}

model {
  alpha ~ std_normal(); // normal(y| 0,1)
  //Can make these hierarchical priors if desired
  //beta ~ normal(0, sigma_beta);
  //gamma ~ lognormal(0, sigma_gamma);
  beta ~ normal(0, 5);
  gamma ~ lognormal(0, 2);
  mu_beta ~ cauchy(0, 5);
  //sigma_beta ~ cauchy(0, 5);
  //sigma_gamma ~ cauchy(0, 5);
  y ~ bernoulli_logit(gamma[kk] .* (alpha[jj] - (beta[kk] + mu_beta)));
}

Data Prep

Of course, we need to prepare our data for Stan.

stan_dat <- list(
  J = length(unique(tidy_data[["person_id"]])),
  K = length(unique(tidy_data[["item"]])),
  N = nrow(tidy_data),
  jj = tidy_data[["person_id"]],
  kk = tidy_data[["item"]],
  y = tidy_data[["response"]]
)

Modeling

library(rstan)
## Loading required package: StanHeaders
## rstan (Version 2.18.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## 
## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr':
## 
##     extract
rstan_options(auto_write = TRUE)
model <- stan_model("stan_2pl.stan")

Now we can run our compiled model with our data:

fit_2pl <- sampling(model, stan_dat, 
                    cores = 2, chains = 2, iter = 2000, refresh = 0)
## Warning: There were 47 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: Examine the pairs() plot to diagnose sampling problems

Model Checking

util <- new.env()
source('stan_utilities.R', local=util)
util$check_all_diagnostics(fit_2pl)

If this were for a more serious application I would also check the pair plot and the traces.

Inferences

First we can look at our question difficulties. If an item has a higher value then it requires a higher level of ability to get it “correct.”

print(fit_2pl, pars = "beta")
## Inference for Stan model: stan_2pl.
## 2 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=2000.
## 
##           mean se_mean  sd   2.5%  25%   50%   75%  98% n_eff Rhat
## beta[1]  -0.79    0.16 2.3  -6.15 -2.0 -0.51  0.62 3.14   214    1
## beta[2]  -1.46    0.14 2.5  -7.14 -2.7 -1.11  0.17 2.65   305    1
## beta[3]  -1.49    0.15 2.6  -7.73 -2.9 -1.16  0.35 2.72   323    1
## beta[4]  -4.75    0.15 3.2 -11.48 -6.7 -4.55 -2.49 0.66   455    1
## beta[5]   3.25    0.11 2.2  -0.44  1.8  2.97  4.36 8.37   397    1
## beta[6]   3.01    0.14 2.5  -1.52  1.4  2.76  4.47 8.51   325    1
## beta[7]   0.49    0.17 3.5  -7.96 -1.3  0.63  2.61 6.74   431    1
## beta[8]   3.98    0.13 2.6  -0.47  2.3  3.81  5.51 9.64   416    1
## beta[9]  -4.00    0.17 3.2 -11.22 -5.8 -3.59 -1.77 0.95   337    1
## beta[10]  0.09    0.14 1.8  -3.38 -1.1  0.17  1.25 3.50   175    1
## 
## Samples were drawn using NUTS(diag_e) at Tue Feb 26 22:00:53 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Similarlly we can look at our item \(\gamma\)s to check for discrimination.

print(fit_2pl, pars = "gamma")
## Inference for Stan model: stan_2pl.
## 2 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=2000.
## 
##            mean se_mean    sd 2.5%  25%  50%   75%   98% n_eff Rhat
## gamma[1]   2.11    0.19  3.86 0.16 0.60 1.18  2.18 10.22   413  1.0
## gamma[2]   6.61    0.89 12.51 0.29 1.04 2.28  5.88 52.83   196  1.0
## gamma[3]   1.28    0.54  4.92 0.07 0.25 0.49  0.93  3.74    84  1.0
## gamma[4]   0.31    0.01  0.21 0.07 0.17 0.26  0.38  0.86   616  1.0
## gamma[5]   2.02    0.43  6.20 0.06 0.32 0.68  1.38 14.24   208  1.0
## gamma[6]   2.87    1.03  8.06 0.02 0.25 0.67  1.60 32.59    61  1.1
## gamma[7]   0.28    0.04  0.42 0.01 0.06 0.13  0.32  1.43   129  1.0
## gamma[8]   0.45    0.02  0.41 0.03 0.17 0.33  0.59  1.50   614  1.0
## gamma[9]   0.49    0.02  0.36 0.11 0.25 0.39  0.60  1.51   275  1.0
## gamma[10] 12.16    0.92 17.52 0.39 2.78 5.71 13.98 61.90   364  1.0
## 
## Samples were drawn using NUTS(diag_e) at Tue Feb 26 22:00:53 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Summarising with ICC

Now we can pull out some of our valuess and plot the ICCs for the different items.

library(tidybayes)
## NOTE: As of tidybayes version 1.0, several functions, arguments, and output column names
##       have undergone significant name changes in order to adopt a unified naming scheme.
##       See help('tidybayes-deprecated') for more information.
difficulties <- extract(fit_2pl)["beta"][[1]] %>% 
  as_tibble() %>% 
  colMeans()

discrimination <- extract(fit_2pl)["gamma"][[1]] %>% 
  as_tibble() %>% 
  colMeans()

instrument <- tibble(difficulties = difficulties, discrimination = discrimination) %>% 
  mutate(item_id = sprintf("Item %s", 1:length(difficulties)))

ability_range <- seq(-4,4, .05)

probs <- crossing(instrument, ability_range) %>% 
  rowwise() %>% 
  mutate(prob = arm::invlogit(discrimination*(ability_range-difficulties)))

And now we can look at our ICCs.

probs %>% 
  ggplot(aes(ability_range, prob, group = item_id, color = item_id))+
  geom_line(se = FALSE)+
  theme_minimal()
## Warning: Ignoring unknown parameters: se

We could also take the time to look at our student abilities.

extract(fit_2pl)["alpha"][[1]] %>% 
  as_tibble() %>% 
  colMeans() %>% 
  hist(breaks = 30, main = "Histogram of Student Abilities")



Research and Methods Resources
me.dewitt.jr@gmail.com



Winston- Salem, NC

Michael DeWitt


Copyright © 2018 Michael DeWitt. All rights reserved.