Data and R codes (CAT-WPLT, Language Testing)

Article
Data
Importing the data to R
Box plots of the standard errors of the CAT-WPLT and the fixed-item WPLT (Figure 5)
The correspondence between the ability estimate and standard error in the two test formats (Figure 6)
Bayesian estimation of the test of proportions (Table 3)
Contact

Article

Mizumoto, A., Sasao, Y., & Webb, S. (in press). Developing and evaluating a computerized adaptive testing version of the Word Part Levels Test. Language Testing. Retrieved from http:///www.mizumot.com/files/cat-wplt.pdf

Data

CAT-WPLT data

Fixed-item data (Form)

Fixed-item data (Mearning)

Fixed-item data (Use)

Importing the data to R

# Form section
dat <- read.csv("http://www.mizumot.com/files/CAT-WPLT-ALL.csv", header=T)
CAT.Form_SE <- data.frame(dat$Form_se)
colnames(CAT.Form_SE) <- c("SE")
x1 <- dat$Form_se

SasaoForm <- read.csv("http://www.mizumot.com/files/Form.p.est.csv", header=T)
Pilot.Form_SE <- data.frame(SasaoForm[,3])
colnames(Pilot.Form_SE) <- c("SE")
y1 <- SasaoForm[,3]

df1 <- rbind(CAT.Form_SE, Pilot.Form_SE)
df1$Group <- c(rep("CAT", nrow(CAT.Form_SE)),rep("Fixed-item", nrow(Pilot.Form_SE)))

# Meaning section
CAT.Meaning_SE <- data.frame(dat$Meaning_se)
colnames(CAT.Meaning_SE) <- c("SE")
x2 <- dat$Meaning_se

SasaoMeaning <- read.csv("http://www.mizumot.com/files/Meaning.p.est.csv", header=T)
Pilot.Meaning_SE <- data.frame(SasaoMeaning[,3])
colnames(Pilot.Meaning_SE) <- c("SE")
y2 <- SasaoMeaning[,3]

df2 <- rbind(CAT.Meaning_SE, Pilot.Meaning_SE)
df2$Group <- c(rep("CAT", nrow(CAT.Form_SE)),rep("Fixed-item", nrow(Pilot.Form_SE)))

# Use section
CAT.Use_SE <- data.frame(dat$Use_se)
colnames(CAT.Use_SE) <- c("SE")
x3 <- dat$Use_se

SasaoUse <- read.csv("http://www.mizumot.com/files/Use.p.est.csv",header=TRUE, fileEncoding="CP932")
Pilot.Use_SE <- data.frame(SasaoUse[,3])
colnames(Pilot.Use_SE) <- c("SE")
y3 <- SasaoUse[,3]

df3 <- rbind(CAT.Use_SE, Pilot.Use_SE)
df3$Group <- c(rep("CAT", nrow(CAT.Form_SE)),rep("Fixed-item", nrow(Pilot.Form_SE)))

Box plots of the standard errors of the CAT-WPLT and the fixed-item WPLT (Figure 5)

par(mfrow = c(1, 3))
par(mar = c(5, 6, 5, 3))

boxplot(df1$SE ~ df1$Group, boxwex = 0.6, ylim=c(0, 0.7), las=1, main="Form",
        col    = c("pink", "lightcyan"), ylab="Standard Error",
        cex.main=1.5, cex.axis=1.5, yaxt="n", cex.lab=1.5)
axis(2,cex.axis=1.5, las=1)
stripchart(df1$SE ~ df1$Group, method = "jitter", pch=1, col=rgb(0, 0, 0, alpha=0.3), vert=TRUE, add=TRUE)
abline(h = 0.33, lwd=0.5)
text(1, 0, "20 items", cex=1.2)
text(2, 0, "115 items", cex=1.2)

boxplot(df2$SE ~ df2$Group, boxwex = 0.5, ylim=c(0, 0.7), las=1, main="Meaning",
        col    = c("pink", "lightcyan"), ylab="Standard Error",
        cex.main=1.5, cex.axis=1.5, yaxt="n", cex.lab=1.5)
axis(2,cex.axis=1.5, las=1)
stripchart(df2$SE ~ df2$Group, method = "jitter", pch=1, col=rgb(0, 0, 0, alpha=0.3), vert=TRUE, add=TRUE)
abline(h = 0.33, lwd=0.5)
text(1, 0, "15 items", cex=1.2)
text(2, 0, "73 items", cex=1.2)

boxplot(df3$SE ~ df3$Group, boxwex = 0.5, ylim=c(0, 0.7), las=1, main="Use",
        col    = c("pink", "lightcyan"), ylab="Standard Error",
        cex.main=1.5, cex.axis=1.5, yaxt="n", cex.lab=1.5)
axis(2,cex.axis=1.5, las=1)
stripchart(df3$SE ~ df3$Group, method = "jitter", pch=1, col=rgb(0, 0, 0, alpha=0.3), vert=TRUE, add=TRUE)
abline(h = 0.33, lwd=0.5)
text(1, 0, "10 items", cex=1.2)
text(2, 0, "56 items", cex=1.2)

The correspondence between the ability estimate and standard error in the two test formats (Figure 6)

par(mfrow = c(1, 3))
# (1) Form
CAT.Form <- data.frame(dat$Form_theta, dat$Form_se)
plot(SasaoForm[,2], SasaoForm[,3], xlim=c(-3,3), ylim=c(0.1,0.7), xlab="", ylab="", pch = 20, cex = 0.25)
par(new=T)
plot(CAT.Form [,1], CAT.Form [,2], xlim=c(-3,3), ylim=c(0.1,0.7), xlab="Ability Estimate", ylab="Standard Error", main="Form", pch = 23, cex = 1)
labels <- c("CAT (20 items)","Fixed-item (115 items)")
legend("topleft", legend = labels, pch = c(23, 20))
abline(h = 0.33, lwd=0.5)

# (2) Meaning
CAT.Meaning <- data.frame(dat$Meaning_theta, dat$Meaning_se)
plot(SasaoMeaning[,2], SasaoMeaning[,3], xlim=c(-3,3), ylim=c(0.1,0.7), xlab="", ylab="", pch = 20, cex = 0.25)
par(new=T)
plot(CAT.Meaning[,1], CAT.Meaning[,2], xlim=c(-3,3), ylim=c(0.1,0.7), xlab="Ability Estimate", ylab="Standard Error", main="Meaning", pch = 23, cex = 1)
labels <- c("CAT (15 items)","Fixed-item (73 items)")
legend("topleft", legend = labels, pch = c(23, 20))
abline(h = 0.33, lwd=0.5)

# (3) Use
CAT.Use <- data.frame(dat$Use_theta, dat$Use_se)
plot(SasaoUse[,2], SasaoUse[,3], xlim=c(-3,3), ylim=c(0.1,0.7), xlab="", ylab="", pch = 20, cex = 0.25)
par(new=T)
plot(CAT.Use[,1], CAT.Use[,2], xlim=c(-3,3), ylim=c(0.1,0.7), xlab="Ability Estimate", ylab="Standard Error", main="Use", pch = 23, cex = 1)
labels <- c("CAT (10 items)","Fixed-item (56 items)")
legend("topleft", legend = labels, pch = c(23, 20))
abline(h = 0.33, lwd=0.5)

Bayesian estimation of the test of proportions (Table 3)

# Install package (library) if not installed
if(!require(BayesianFirstAid)){devtools::install_github("rasmusab/bayesian_first_aid")}
require(BayesianFirstAid)
# *Baysian First Aid*
# http://www.sumsar.net/blog/2014/06/bayesian-first-aid-prop-test/

# (1) Form
under1 <- c(table(x1<0.33)[2], table(y1<0.33)[2])
over1 <- c(length(x1), length(y1))
fit1 <- bayes.prop.test(under1, over1)
fit1

## 
##  Bayesian First Aid proportion test
## 
## data: under1 out of over1
## number of successes:   537,  990
## number of trials:      760, 1348
## Estimated relative frequency of success [95% credible interval]:
##   Group 1: 0.71 [0.67, 0.74]
##   Group 2: 0.73 [0.71, 0.76]
## Estimated group difference (Group 1 - Group 2):
##   -0.03 [-0.069, 0.011]
## The relative frequency of success is larger for Group 1 by a probability
## of 0.085 and larger for Group 2 by a probability of 0.915 .

plot(fit1)

s1 <- as.data.frame(fit1)
mean(abs((s1$theta1 - s1$theta2)) < 0.05) # Mean difference (absolute) is p < .05

## [1] 0.8564

# (2) Meaning
under2 <- c(table(x2<0.33)[2], table(y2<0.33)[2])
over2 <- c(length(x2), length(y2))
fit2 <- bayes.prop.test(under2, over2)
fit2

## 
##  Bayesian First Aid proportion test
## 
## data: under2 out of over2
## number of successes:   529,  734
## number of trials:      760, 1348
## Estimated relative frequency of success [95% credible interval]:
##   Group 1: 0.70 [0.66, 0.73]
##   Group 2: 0.54 [0.52, 0.57]
## Estimated group difference (Group 1 - Group 2):
##   0.15 [0.11, 0.19]
## The relative frequency of success is larger for Group 1 by a probability
## of >0.999 and larger for Group 2 by a probability of <0.001 .

plot(fit2)

s2 <- as.data.frame(fit2)
mean(abs((s2$theta1 - s2$theta2)) < 0.05)

## [1] 0

# (3) Use
under3 <- c(table(x3<0.33)[2], table(y3<0.33)[2])
over3 <- c(length(x3), length(y3))
fit3 <- bayes.prop.test(under3, over3)
fit3

## 
##  Bayesian First Aid proportion test
## 
## data: under3 out of over3
## number of successes:   565,  991
## number of trials:      760, 1348
## Estimated relative frequency of success [95% credible interval]:
##   Group 1: 0.74 [0.71, 0.77]
##   Group 2: 0.73 [0.71, 0.76]
## Estimated group difference (Group 1 - Group 2):
##   0.01 [-0.031, 0.047]
## The relative frequency of success is larger for Group 1 by a probability
## of 0.659 and larger for Group 2 by a probability of 0.341 .

plot(fit3)

s3 <- as.data.frame(fit3)
mean(abs((s3$theta1 - s3$theta2)) < 0.05)

## [1] 0.9820667

Contact

Atsushi MIZUMOTO, Ph.D.
Professor of Applied Linguistics
Faculty of Foreign Language Studies
Graduate School of Foreign Language Education and Research
Kansai University, Osaka, Japan