################################################## ##### Adapt-size Test Simulation for Multi- ##### ##### -variate Graded Response Model(MGRM) ##### ##### with D-optimal Item and One-Phase ##### ##### Constraint-Weighted Item Selection ##### ##### and compound Stopping criterion ##### ##### Please cite the paper when using the R code. The citation is: ##### Wang, C., Weiss, D., & Shang, Z. (2019). Variable-length stopping ##### rules for multidimensional computerized adaptive testing. Psychometrika. ##### ##### Created: 04/26/2017 ##### ##### Updated: 11/15/2017 ##### ################################################## ################################################## ##### Function: MGRM_stoprulessimu ##### ################################################## # Inputs: # RESPmat: a N by J numerical matrix # thetas: a N by p numerical matrix # true value of theta in simulation study # not a input # theta_cov: a p by p numerical matrix # prior information matrix of theta # usually provided by population parameter # estimation with reduced precision # itembank: a list of : # J: numerical value of the itembank size # ncat is the response categories, a numeric vector with length J # p, numerical value of number of latent dimension # D, numerical value of scaling parameter # params, numerical matrix of item parameters, first p columns are discriminating # parameters and the rest columns are intercepts, number of columns should be equal to # p + max(ncat) -1 # est_method: estimation method, single character, takes value "map", "mle" and "eap" # constr: vector of length 2, upper and lower bound of estimation # select_method: selection method takes value "d" and "a" # "d" for D-optimal Selection method # "a" for A-optimal Selection method # aweights: numeric vector of length p # the weights of weighted A-optimal # cw: logical arguments, if true, use the constraint-weights # dm_constr: 2 by p numerical matrix of domain constraint # first row is upper bound and second is lower bound # Lmax: numerical value representing maximal test length allowed # need to be integer, though type is double # stop_method : vector of stopping method, first element is primary # takes values "det" or "se" as primary # and second is secondary, # takes "theta" or "se" as secondary # det_thd: numerical value of threshold on determinant # used in primary stopping "det" # se_thd: numerical vector of length K, # threshold on SE as primary stopping rule # used in primary stopping "se" # theta_thd : numerical value of threshold on theta convergence # used in secondary stopping "theta" # se_stop_thd : number, threshold on SE as secondary stopping rule # used in secondary stopping "se" # int_mat: numerical matrix of size N by p, # serves as initial values of optimization ################################################# ##### Outputs a list of ##### All the input settings ##### thetas: true matrix of latent attributes ##### params: matrix of item parameters ##### first p column is the discriminating parameter ##### p+1 to p+ncat[j]-1 is the level intercept ##### if ncat[j] < max(ncat), the rest is 1e8 ################################################## ##### Notes: The item bank contains only ##### ##### simple testing items that requires ##### ##### one attribute per item ##### MGRM_test <- function(RESPmat, thetas = NULL, theta_cov, itembank, est_method = "map", constr = c(-4,4), select_method = "d", aweights = NULL, cw = FALSE, dm_constr = NULL, Lmax = 120, det_thd = NULL, theta_thd = 0.01, int_mat = NULL, stop_method = c("det","theta"), se_thd = NULL, se_stop_thd = 0.005) { ## Prepara itembank attach(itembank) N <- nrow(RESPmat) # get number of rows of response matrix J <- nrow(params) # get number of items from matrix "params" in the list "itembank" pr_mu <- rep(0, p) # vector of length p, all 0s params <- itembank$params # extract the item parameters params[, c(1:p)] = D * params[, c(1:p)] # multiply the discriminating parameters by scaling parameter Q <- t(sign(abs(params[, c(1:p)]))) #p X J indicator matrix ## Testing if input arguments are valid if ( ! est_method %in% c("mle", "map", "eap")) { stop("Estimation Method not available") } # check if the input estimation method is not one of our three options if (is.null(dm_constr)) { dm_constr <- matrix(rep(rowSums(Q), 2), nrow=2, byrow=T) dm_constr[2, ] <- 0 } # default domain constaints are to use number of items in each domain in the itembank # as upper bound and zero as lower bound if (is.null(det_thd)) { nu <- 2 * pi ^ (p / 2) * (qchisq(0.95, p)) ^ (p / 2) / (p * gamma(p / 2)) vs <- 4 / 3 * pi * 1 ^ 3 det_thd <- nu ^ 2 / vs ^ 2 } # default value of det_thd is 477.2452 (it is a function of dimension p, in the above computation, # qchisq is the quantile function of chi-squared distribution and gamma is the gamma function) if (is.null(aweights)) { aweights <- rep(1, p) } if(is.null(se_thd) && stop_method[1] == "se"){ se_thd <- rep(0.4,p) } ## Load functions source("/home/shang063/Documents/MGRM/funcs/MGRM_est.R") # call MGRM_est, used for theta estimation source("/home/shang063/Documents/MGRM/funcs/MGRM_itemselect.R") # call MGRM_detfi, used for computation of determinant of Fisher Information ## Prepare to start the test du <- dm_constr[1, ] # upper bounds for each domain, a vector of length p dl <- dm_constr[2, ] # lower bounds for each domain, a vector of length p invcov <- solve(theta_cov) # compute the inverse of covariance matrix time1 <- Sys.time() # record the current system time test_items <- list() #test_items is a list of length N, each element is a vector of tested # item index of individual test-taker resp <- list() # resp is a list of length N, each element is a vector of tested # responses of individual test-taker thetahat_trace <- list() # thetahat_trace is a list of length N, each element is a matrix, # the jth column is estimated theta vector (of length p) after the jth item se_trace <- list() # se_trace is a list of length N, each element is a matrix, # the jth column is estimation standard error vector (of length p) after the jth item det_trace <- list() # se_trace is a list of length N, each element is a vector of the determinant of # Fisher Informaiton matrix on theta after the jth item test_length <- matrix(Lmax, N, 2) # test_length is N by 2 matrix, first columns is primary test length and # second column is secondary test length respi <- rep(0, Lmax) # respi is a vector that records the current response stop_crit <- replicate(N, matrix(NA, Lmax, 2), simplify = FALSE) # a list of length N, each element is a matrix of size Lmax by 2, first column is # whether the primary criterion satisfied through out the test, second column is # whether the second criterion is satisfied ## Beginning the Test for (i in 1:N) { # initializing time2 <- Sys.time() test_items[[i]] <- rep(0, J) det_trace[[i]] <- rep(0, Lmax) thetahat_trace[[i]] <- matrix(0, p, J) #trace of estimated theta se_trace[[i]] <- matrix(0, p, J) # trace of estimation SE thetahat_i <- pr_mu + runif(p, - 0.1, 0.1) # initial value of thetahat if ( ! is.null(int_mat)) { thetahat_i <- int_mat[i,] } # the weights wti <- rep(1, p) remain_item <- c(1:J) domain_i <- rep(0, p) domain_wt <- rep(1, p) # Begin test before minimal length reached j <- 1 # item index while (any(domain_i < dl)) { # compute constraint weights remain_item <- setdiff(remain_item, test_items[[i]]) remain_J <- length(remain_item) f1 <- (du - domain_i - 1)/du f2 <- (Lmax-dl - j + domain_i)/(Lmax-dl) domain_wt <- f1 * f2 if (cw) { item_wt <- colSums(Q[,remain_item] * domain_wt) } else { item_wt <- rep(1, remain_J) } # criterion vector crit_vec <- rep(0, remain_J) # compute item selection for (jj in 1:length(remain_item)) { item <- c(test_items[[i]][c(1:j-1)],remain_item[jj]) # the subset of tested as well as the new item crit_vec[jj] <- MGRM_detfi(xhat = thetahat_i, params = params[item, ,drop = FALSE], ncat = ncat[item], method = select_method, p = p, invcov = invcov, aweights = aweights) # compute determinant } if (select_method == "a") { crit_vec <- max(abs(crit_vec)) + 0.01 + crit_vec } # make sure all criterion to be positive select_crit <- item_wt * crit_vec idx <- which(select_crit == max(select_crit)) if (length(idx) > 1) { idx <- sample(idx, 1) } # record selection and estimate theta test_items[[i]][j] <- remain_item[idx] domain_i <- domain_i + Q[ ,test_items[[i]][j]] respi[j] <- RESPmat[i, test_items[[i]][j]] temp <- MGRM_est(resp = respi[1:j], params = params[test_items[[i]][c(1:j)], , drop=FALSE], ncat = ncat[test_items[[i]][c(1:j)]], method = est_method, p = p, pr_mu = pr_mu, invcov = invcov) det_trace[[i]][j] <- temp$detfi # determinant thetahat_trace[[i]][,j] <- thetahat_i <- temp$theta_hat se_trace[[i]][,j] <- temp$std_error # proceed j = j+1 #print(domain_i) } # Begin Stopping Rules stop_test <- FALSE second_length <- FALSE do_include <- rep(TRUE, p) print("Finished minimal test, begins stopping rules") while (j <= Lmax) { # primary stopping criterion if (stop_method[1] == "det") { stop_crit[[i]][j - 1, 1] <- det_trace[[i]][j - 1] >= det_thd #if true, criterion satisfied } else if (stop_method[1] == "se") { do_include <- (se_trace[[i]][,j - 1] - se_thd) >= 0 # if true include domain stop_crit[[i]][j - 1, 1] <- ! any(do_include) # if true, criterion satisfied } if (stop_method[2] == "theta" && j > (p + 1)) { # compute secondary rule past <- rep(1, p) for(k in 1:p){ past[k] <- max(abs(thetahat_trace[[i]][, j - k] - thetahat_trace[[i]][, j - k - 1])) } stop_crit[[i]][j - 1, 2] <- max(past) <= theta_thd # if true, theta convergence # record the first time for secondary stopping if(stop_crit[[i]][j - 1 , 2] && !second_length){ test_length[i, 2] <- j - 1 second_length <- TRUE print(paste("Patient", i)) print(paste("Stopped by Secondary-rule at Item", j - 1)) #print(domain_i) } } else if (stop_method[2]=="se" && j > (p + 1)) { # compute secondary rule past <- rep(1,p) for(k in 1:p){ past[k] <- max(se_trace[[i]][, j - k - 1 ] - se_trace[[i]][, j- k]) } stop_crit[[i]][j - 1, 2] <- max(past) <= se_stop_thd # if true, se convergence # record the first time for secondary stopping if(stop_crit[[i]][j - 1, 2] && !second_length){ test_length[i, 2] <- j second_length <- TRUE print(paste("Patient", i)) print(paste("Stopped by Secondary-rule at Item", j - 1)) #print(domain_i) } } stop_test <- any(stop_crit[[i]][j - 1, ]) if (stop_test) { print(paste("Patient", i)) print(paste("Stopped by Compound-rule at Item", j - 1)) test_length[i, 1] <- j - 1 #print(domain_i) break } # compute constraint weights remain_item <- setdiff(remain_item, test_items[[i]]) remain_J <- length(remain_item) f1 <- (du - domain_i - 1)/du f2 <- (Lmax-dl - j + 1 + domain_i)/(Lmax-dl) domain_wt <- f1 * f2 if(stop_method[1]=="se"){ domain_wt[which(!do_include)] <- -10000 one_wt <- rep(1, p) one_wt[which(!do_include)] <- -10000 if (cw) { item_wt <- colSums(Q[, remain_item] * domain_wt) } else { item_wt <- colSums(Q[, remain_item] * one_wt) } } else { if (cw) { item_wt <- colSums(Q[,remain_item] * domain_wt) } else { item_wt <- rep(1, remain_J) } } # criterion vector crit_vec <- rep(0, remain_J) # compute item selection for (jj in 1:length(remain_item)) { item <- c(test_items[[i]][c(1:j-1)],remain_item[jj]) # the subset of tested as well as the new item crit_vec[jj] <- MGRM_detfi(xhat = thetahat_i, params = params[item, ,drop = FALSE], ncat = ncat[item], method = select_method, p = p, invcov = invcov, aweights = aweights) # compute determinant } if (select_method == "a") { crit_vec <- max(abs(crit_vec)) + 0.01 + crit_vec } # make sure all criterion to be positive select_crit <- item_wt * crit_vec idx <- which(select_crit == max(select_crit)) if (length(idx) > 1) { idx <- sample(idx, 1) } # record selection and estimate theta test_items[[i]][j] <- remain_item[idx] domain_i <- domain_i + Q[ ,test_items[[i]][j]] respi[j] <- RESPmat[i, test_items[[i]][j]] temp <- MGRM_est(resp = respi[1:j], params = params[test_items[[i]][c(1:j)], , drop=FALSE], ncat = ncat[test_items[[i]][c(1:j)]], method = est_method, p = p, pr_mu = pr_mu, invcov = invcov) det_trace[[i]][j] <- temp$detfi # determinant thetahat_trace[[i]][,j] <- thetahat_i <- temp$theta_hat se_trace[[i]][,j] <- temp$std_error # proceed j = j+1 #print(domain_i) } ## put the remainder of the tests into matrix of ## theta_trace and se_trace test_items[[i]] <- test_items[[i]][test_items[[i]]!=0] temp <- length(test_items[[i]]) det_trace[[i]] <- det_trace[[i]][1:temp] thetahat_trace[[i]] <- thetahat_trace[[i]][, c(1:temp)] se_trace[[i]] <- se_trace[[i]][, c(1:temp)] stop_crit[[i]] <- stop_crit[[i]][c(1:temp), ] resp[[i]] <- respi[1:temp] runtimei <- difftime(Sys.time(),time2,unit="sec") runtime <- difftime(Sys.time(),time1,unit="sec") print(paste("Subject",i,"Runtime:",runtimei)) print(paste("Test Length",length(test_items[[i]]))) print(paste("Domain",domain_i)) print(paste("SE",se_trace[[i]][,ncol(se_trace[[i]])])) print(paste("Total time:",runtime)) print("#########################") } #theta_hat <- matrix(0,N,p) #for(i in 1:N){ # theta_hat[i,] <- xhat_trace[[i]][,L] #} #sqrt(mean((theta_hat-thetas)^2)) ##output results to txt files return(list(resp=resp, se_trace=se_trace, det_trace = det_trace, thetahat_trace=thetahat_trace, test_items= test_items, test_length = test_length, stop_crit=stop_crit, cw=cw, runtime=runtime)) } ################################################## ##### Latent Attribute Estimation for ##### ##### Multivariate Graded Response Model ##### ##### Please cite the paper when using the R code. The citation is: ##### Wang, C., Weiss, D., & Shang, Z. (2019). Variable-length stopping ##### rules for multidimensional computerized adaptive testing. Psychometrika. ##### ##### Created: 03/01/2017 ##### ##### Updated: 11/15/2017 ##### ################################################## ##### Notes: There are two functions loaded, the ##### MGRM_est is used when there are at least two ##### items tested, while MGRM_est_single is used ##### for estimation based on one single item ################################################## ################################################## ##### Function: MGRM_est ##### ################################################## ##### Inputs ##### resp: response vector of length L, L > 1 ##### params: matrix of item parameters tested, L rows ##### ncat: number of response categories, vector of length L ##### p: latent attribute dimension, scalar ##### method: estimation method, takes values ##### of "mle", "eap" or "map" ##### pr_mu: prior mean vector of length p ##### invov: inverse of prior covariance matrix, p by p ##### m: eap sampling size, should be made omitable when using eap and map ##### constr: constraint vector of optimization of length 2 ##### the box constraint would be ##### [-constr, +constr]. Should be omitable ##### when use eap ################################################## ##### Outputs: ##### theta_hat: estimate of theta, vector of length p ##### std_error: standard error of the estimator, vector of length p ##### detfi: determinant of observed Fisher Information, scalar ################################################## MGRM_est <- function(resp, params, ncat, method = "map", m=200, p, pr_mu, invcov=NULL, constr=c(-4,4)){ # Preparations L <- length(resp) #get the current test length if (length(ncat) != L) { stop("Incorrect response categories") } # check response categories #if(ncol(params)!=p+max(ncat)-1){ #stop("Incorrect Item Parameters") #} # check item parameters a <- params[, c(1:p), drop=FALSE] #extract first p rows of the matrix params, # the discriminating parameters, a is a matrix of size L by p b <- params[,c((p + 1):ncol(params)), drop=FALSE] # extract the difficulty parameter matrix b <- cbind(b, rep(1e8, L)) # add one column to the right of b (to vectorize the likelihood computation) b <- cbind(rep(- 1e8, L), b) # add one column to the left of b (to vectorize the likelihood computation) ## EAP Estimation # if(method=="eap"){ # require(mvtnorm) # call R pacakge "mvtnorm" # pr_cov = solve(invcov) # compute the original covariance matrix # # (the EAP is not being used in the current study, should be made a required input # # for the EAP instead of inverse the matrix again) # samp_x <- rmvnorm(m,pr_mu,pr_cov) # Monte Carlo Sampling from multinormal distribution # # a matrix of size m by p # idx1 = cbind(c(1:L),resp+1) # indexing of the observed parameters, a matrix of two columns, # # first column is 1 to L, the second is the observed response plus 1 # idx2 = idx1 # indexing of the observed parameters # idx2[,2] = idx2[,2]+1 #indexing of the observed parameters plus 1, same setting as idx1, a # # matrix with 2 columns # b1 = matrix(rep(b[idx1],p),L,p) #extract the required parameter matrix of size L by p # b2 = matrix(rep(b[idx2],p),L,p) #extract the required parameter matrix of size L by p # Poster <- function(x){ # define the porsterior function # temp = -0.5*crossprod(x-pr_mu,invcov)%*%(x-pr_mu) # for(j in 1:L){ # temp=temp+log(1/(1+exp(-a[j,]%*%(x-b1[j,])))- # 1/(1+exp(-a[j,]%*%(x-b2[j,])))) # } # return(exp(temp)) # } # wt <- apply(samp_x,1,Poster) #compute posterior function for all samplings # wt <- wt/sum(wt) # compute the weights # theta_hat = colSums(samp_x*wt) # compute the estimate as weighted sum of # # all the samples # } ## MLE Estimation if(method=="mle"){ #optimization idx1 = cbind(c(1:L),resp+1) # indexing of the observed parameters, a matrix of two columns, # first column is 1 to L, the second is the observed response plus 1 idx2 = idx1 # indexing of the observed parameters idx2[,2] = idx2[,2]+1 #indexing of the observed parameters plus 1, same setting as idx1, a # matrix with 2 columns b1 = matrix(rep(b[idx1],p),L,p) #extract the required parameter matrix of size L by p b2 = matrix(rep(b[idx2],p),L,p) #extract the required parameter matrix of size L by p obj <- function(x){ # define the objective function temp = 0 # no prior information for(j in 1:L){ temp=temp+log(1/(1+exp(-a[j,]%*%(x-b1[j,])))- 1/(1+exp(-a[j,]%*%(x-b2[j,])))) # sum up loglikelihood function for all the items } return(-temp) } temp = optim(rep(0,p),obj, lower=rep(constr[1],p),upper=rep(constr[2],p), method="L-BFGS-B",hessian=TRUE) #solve optimization problem using R function optim # the output is a list theta_hat = temp$par #output the parameter estimation , a vector of length p fimat = solve(temp$hessian) # a matrix, the inverse of hessian (observed fisher information matrix) std_error = sqrt(diag(fimat)) # compute standard errors as a vector of the squared-root # of the diagonal terms of fisher information matrix detfi = det(fimat) # compute the determinant of fisher information matrix } ## MAP Estimation ## this is the same to MLE except the prior information is not 0 (the first step in computing ## the objective function) if (method == "map") { idx1 <- cbind(c(1:L), resp + 1) idx2 <- idx1 idx2[, 2] <- idx2[, 2] + 1 b1 <- matrix(rep(b[idx1], p), L, p) b2 <- matrix(rep(b[idx2], p), L, p) obj <- function(x){ temp <- -0.5 * crossprod(x - pr_mu, invcov) %*% (x - pr_mu) for(j in 1:L){ temp <- temp + log(1 / (1 + exp(- a[j, ] %*% (x-b1[j, ]))) - 1 / (1 + exp(- a[j, ] %*% (x - b2[j, ])))) } return(- temp) } temp <- optim(rep(0, p), obj, lower = rep(constr[1], p), upper = rep(constr[2], p), method = "L-BFGS-B", hessian = TRUE) theta_hat <- temp$par fimat <- solve(temp$hessian) std_error <- sqrt(diag(fimat)) detfi <- det(temp$hessian) } return(list(theta_hat=theta_hat,std_error = std_error, detfi=detfi)) } ################################################## ##### Determinant of Fisher Information ##### ##### Given New Item ##### ##### Please cite the paper when using the R code. The citation is: ##### Wang, C., Weiss, D., & Shang, Z. (2019). Variable-length stopping ##### rules for multidimensional computerized adaptive testing. Psychometrika. ##### ##### Created: 03/28/2017 ##### ##### Updated: 11/15/2017 ##### ################################################## ##### Notes: ##### ################################################## ################################################## ##### Function: MGRM_detfi ##### ################################################## ##### Inputs ##### xhat: Current Estimate of theta, a vector of length p ##### params: item parameters, a matrix of length L+1 ##### ncat: No. of response Catogories, a matrix of length L+1 ##### invcov: the inverse of Fisher Information matrix, p by p ################################################# ##### Outputs a list of ##### All the input settings ##### thetas: matrix of latent attributes ##### params: matrix of item parameters ##### invcov: inverse of covariance matrix ##### first p column is the discriminating parameter ##### p+1 to p+ncat[j]-1 is the level intercept ##### if ncat[j] < max(ncat), the rest is 1e8 ##### resp is the response matrix ##### method is a character, takes values in "d", "a", "e" ################################################## MGRM_detfi <- function(xhat, params, ncat, method, p, invcov, aweights = rep(1,p)) { if( ! method %in% c("d","a")){ stop("Item Selection Methods Not Found") } # Preparations FI <- invcov # starts to compute fisher information matrix a <- params[,c(1:p), drop = FALSE] # a matrix of discriminating parameters, L+1 by p b <- params[,c((p+1):ncol(params)), drop = FALSE] # matrix of intercepts b <- cbind(b,rep(1e8,nrow(params))) b <- cbind(rep(-1e8,nrow(params)),b) # Subjects simulation for (j in 1:nrow(params)) { # define probability ws <- rep(0, ncat[j]) # probability of getting l-1 vector of length L-1 # compute first cat ws[1] <- 1- 1 / (1 + exp( - a[j, ] %*% (xhat - b[j, 1]))) # compute middle cats if (ncat[j] > 2) { for(l in 1:(ncat[j]-2)){ ws[l + 1] <- 1 / (1 + exp( - a[j,] %*% (xhat - b[j, l]))) - 1 / (1 + exp( - a[j, ] %*% (xhat - b[j, l + 1]))) } } # compute last cat ws[ncat[j]] <- 1 / (1 + exp( - a[j, ] %*% (xhat - b[j, ncat[j] - 1]))) # compute fisher information xis <- rep(0, ncat[j] - 1) for(l in 1:(ncat[j] - 1)){ xis[l] <- (ws[l] + ws[l + 1]) * exp( - a[j, ] %*% (xhat - b[j, l])) / (1 + exp( - a[j, ] %*% (xhat - b[j, l]))) ^ 2 } FI <- FI + sum(xis) * tcrossprod(a[j, ], a[j, ]) } if (method == "d") { return(det(FI)) } else { temp <- solve(FI) return( - sum(diag(temp) * aweights)) } }