Article

Mizumoto, A., Sasao, Y., & Webb, S. (in press). Developing and evaluating a computerized adaptive testing version of the Word Part Levels Test. Language Testing. Retrieved from http:///www.mizumot.com/files/cat-wplt.pdf


Data

CAT-WPLT data

Fixed-item data (Form)

Fixed-item data (Mearning)

Fixed-item data (Use)


Importing the data to R

# Form section
dat <- read.csv("http://www.mizumot.com/files/CAT-WPLT-ALL.csv", header=T)
CAT.Form_SE <- data.frame(dat$Form_se)
colnames(CAT.Form_SE) <- c("SE")
x1 <- dat$Form_se

SasaoForm <- read.csv("http://www.mizumot.com/files/Form.p.est.csv", header=T)
Pilot.Form_SE <- data.frame(SasaoForm[,3])
colnames(Pilot.Form_SE) <- c("SE")
y1 <- SasaoForm[,3]

df1 <- rbind(CAT.Form_SE, Pilot.Form_SE)
df1$Group <- c(rep("CAT", nrow(CAT.Form_SE)),rep("Fixed-item", nrow(Pilot.Form_SE)))

# Meaning section
CAT.Meaning_SE <- data.frame(dat$Meaning_se)
colnames(CAT.Meaning_SE) <- c("SE")
x2 <- dat$Meaning_se

SasaoMeaning <- read.csv("http://www.mizumot.com/files/Meaning.p.est.csv", header=T)
Pilot.Meaning_SE <- data.frame(SasaoMeaning[,3])
colnames(Pilot.Meaning_SE) <- c("SE")
y2 <- SasaoMeaning[,3]

df2 <- rbind(CAT.Meaning_SE, Pilot.Meaning_SE)
df2$Group <- c(rep("CAT", nrow(CAT.Form_SE)),rep("Fixed-item", nrow(Pilot.Form_SE)))

# Use section
CAT.Use_SE <- data.frame(dat$Use_se)
colnames(CAT.Use_SE) <- c("SE")
x3 <- dat$Use_se

SasaoUse <- read.csv("http://www.mizumot.com/files/Use.p.est.csv",header=TRUE, fileEncoding="CP932")
Pilot.Use_SE <- data.frame(SasaoUse[,3])
colnames(Pilot.Use_SE) <- c("SE")
y3 <- SasaoUse[,3]

df3 <- rbind(CAT.Use_SE, Pilot.Use_SE)
df3$Group <- c(rep("CAT", nrow(CAT.Form_SE)),rep("Fixed-item", nrow(Pilot.Form_SE)))

Box plots of the standard errors of the CAT-WPLT and the fixed-item WPLT (Figure 5)

par(mfrow = c(1, 3))
par(mar = c(5, 6, 5, 3))

boxplot(df1$SE ~ df1$Group, boxwex = 0.6, ylim=c(0, 0.7), las=1, main="Form",
        col    = c("pink", "lightcyan"), ylab="Standard Error",
        cex.main=1.5, cex.axis=1.5, yaxt="n", cex.lab=1.5)
axis(2,cex.axis=1.5, las=1)
stripchart(df1$SE ~ df1$Group, method = "jitter", pch=1, col=rgb(0, 0, 0, alpha=0.3), vert=TRUE, add=TRUE)
abline(h = 0.33, lwd=0.5)
text(1, 0, "20 items", cex=1.2)
text(2, 0, "115 items", cex=1.2)

boxplot(df2$SE ~ df2$Group, boxwex = 0.5, ylim=c(0, 0.7), las=1, main="Meaning",
        col    = c("pink", "lightcyan"), ylab="Standard Error",
        cex.main=1.5, cex.axis=1.5, yaxt="n", cex.lab=1.5)
axis(2,cex.axis=1.5, las=1)
stripchart(df2$SE ~ df2$Group, method = "jitter", pch=1, col=rgb(0, 0, 0, alpha=0.3), vert=TRUE, add=TRUE)
abline(h = 0.33, lwd=0.5)
text(1, 0, "15 items", cex=1.2)
text(2, 0, "73 items", cex=1.2)

boxplot(df3$SE ~ df3$Group, boxwex = 0.5, ylim=c(0, 0.7), las=1, main="Use",
        col    = c("pink", "lightcyan"), ylab="Standard Error",
        cex.main=1.5, cex.axis=1.5, yaxt="n", cex.lab=1.5)
axis(2,cex.axis=1.5, las=1)
stripchart(df3$SE ~ df3$Group, method = "jitter", pch=1, col=rgb(0, 0, 0, alpha=0.3), vert=TRUE, add=TRUE)
abline(h = 0.33, lwd=0.5)
text(1, 0, "10 items", cex=1.2)
text(2, 0, "56 items", cex=1.2)


The correspondence between the ability estimate and standard error in the two test formats (Figure 6)

par(mfrow = c(1, 3))
# (1) Form
CAT.Form <- data.frame(dat$Form_theta, dat$Form_se)
plot(SasaoForm[,2], SasaoForm[,3], xlim=c(-3,3), ylim=c(0.1,0.7), xlab="", ylab="", pch = 20, cex = 0.25)
par(new=T)
plot(CAT.Form [,1], CAT.Form [,2], xlim=c(-3,3), ylim=c(0.1,0.7), xlab="Ability Estimate", ylab="Standard Error", main="Form", pch = 23, cex = 1)
labels <- c("CAT (20 items)","Fixed-item (115 items)")
legend("topleft", legend = labels, pch = c(23, 20))
abline(h = 0.33, lwd=0.5)

# (2) Meaning
CAT.Meaning <- data.frame(dat$Meaning_theta, dat$Meaning_se)
plot(SasaoMeaning[,2], SasaoMeaning[,3], xlim=c(-3,3), ylim=c(0.1,0.7), xlab="", ylab="", pch = 20, cex = 0.25)
par(new=T)
plot(CAT.Meaning[,1], CAT.Meaning[,2], xlim=c(-3,3), ylim=c(0.1,0.7), xlab="Ability Estimate", ylab="Standard Error", main="Meaning", pch = 23, cex = 1)
labels <- c("CAT (15 items)","Fixed-item (73 items)")
legend("topleft", legend = labels, pch = c(23, 20))
abline(h = 0.33, lwd=0.5)

# (3) Use
CAT.Use <- data.frame(dat$Use_theta, dat$Use_se)
plot(SasaoUse[,2], SasaoUse[,3], xlim=c(-3,3), ylim=c(0.1,0.7), xlab="", ylab="", pch = 20, cex = 0.25)
par(new=T)
plot(CAT.Use[,1], CAT.Use[,2], xlim=c(-3,3), ylim=c(0.1,0.7), xlab="Ability Estimate", ylab="Standard Error", main="Use", pch = 23, cex = 1)
labels <- c("CAT (10 items)","Fixed-item (56 items)")
legend("topleft", legend = labels, pch = c(23, 20))
abline(h = 0.33, lwd=0.5)


Bayesian estimation of the test of proportions (Table 3)

# Install package (library) if not installed
if(!require(BayesianFirstAid)){devtools::install_github("rasmusab/bayesian_first_aid")}
require(BayesianFirstAid)
# *Baysian First Aid*
# http://www.sumsar.net/blog/2014/06/bayesian-first-aid-prop-test/

# (1) Form
under1 <- c(table(x1<0.33)[2], table(y1<0.33)[2])
over1 <- c(length(x1), length(y1))
fit1 <- bayes.prop.test(under1, over1)
fit1
## 
##  Bayesian First Aid proportion test
## 
## data: under1 out of over1
## number of successes:   537,  990
## number of trials:      760, 1348
## Estimated relative frequency of success [95% credible interval]:
##   Group 1: 0.71 [0.67, 0.74]
##   Group 2: 0.73 [0.71, 0.76]
## Estimated group difference (Group 1 - Group 2):
##   -0.03 [-0.069, 0.011]
## The relative frequency of success is larger for Group 1 by a probability
## of 0.085 and larger for Group 2 by a probability of 0.915 .
plot(fit1)

s1 <- as.data.frame(fit1)
mean(abs((s1$theta1 - s1$theta2)) < 0.05) # Mean difference (absolute) is p < .05
## [1] 0.8564
# (2) Meaning
under2 <- c(table(x2<0.33)[2], table(y2<0.33)[2])
over2 <- c(length(x2), length(y2))
fit2 <- bayes.prop.test(under2, over2)
fit2
## 
##  Bayesian First Aid proportion test
## 
## data: under2 out of over2
## number of successes:   529,  734
## number of trials:      760, 1348
## Estimated relative frequency of success [95% credible interval]:
##   Group 1: 0.70 [0.66, 0.73]
##   Group 2: 0.54 [0.52, 0.57]
## Estimated group difference (Group 1 - Group 2):
##   0.15 [0.11, 0.19]
## The relative frequency of success is larger for Group 1 by a probability
## of >0.999 and larger for Group 2 by a probability of <0.001 .
plot(fit2)

s2 <- as.data.frame(fit2)
mean(abs((s2$theta1 - s2$theta2)) < 0.05)
## [1] 0
# (3) Use
under3 <- c(table(x3<0.33)[2], table(y3<0.33)[2])
over3 <- c(length(x3), length(y3))
fit3 <- bayes.prop.test(under3, over3)
fit3
## 
##  Bayesian First Aid proportion test
## 
## data: under3 out of over3
## number of successes:   565,  991
## number of trials:      760, 1348
## Estimated relative frequency of success [95% credible interval]:
##   Group 1: 0.74 [0.71, 0.77]
##   Group 2: 0.73 [0.71, 0.76]
## Estimated group difference (Group 1 - Group 2):
##   0.01 [-0.031, 0.047]
## The relative frequency of success is larger for Group 1 by a probability
## of 0.659 and larger for Group 2 by a probability of 0.341 .
plot(fit3)

s3 <- as.data.frame(fit3)
mean(abs((s3$theta1 - s3$theta2)) < 0.05)
## [1] 0.9820667

Contact

Atsushi MIZUMOTO, Ph.D.
Professor of Applied Linguistics
Faculty of Foreign Language Studies
Graduate School of Foreign Language Education and Research
Kansai University, Osaka, Japan