# NOTE: The "#" symbol indiates comments. All other lines are comments that can be copied into the R console.

# 1. Load a file that was exported from DataShop as student-step rollup export
file = file.choose() # Brings up a dialog so you can select the dsXX_student_step_XX.txt file you exported.
file # To illustrate the file I used:
# [1] "/Ken/.../ds76_student_step_2014_0716_171821/ds76_student_step_All_Data_74_2014_0615_045213.txt"
ds = read.delim(file, header = TRUE, quote="\"", dec=".", fill = TRUE, comment.char="")

# 2. Inspect the file and do minimal necessary preprocessing
attach(ds) # Allows reference to the variables in ds without using ds: e.g., ds$Anon.Student.Id
summary(ds) # Inspect the contents of the file
L = length(Anon.Student.Id) # Number of "rows" (values) in (this "column" variable from) ds
Success = vector(mode="numeric", length=L) # Create a new variable (default values are 0)
Success[First.Attempt=="correct"]=1 # Change rows where First.Attempt is "correct" to 1.

# 3. Run a simple version of the Additive Factors Model -- all variables are fixed effects.
model.glm = glm(Success~Anon.Student.Id + KC..Original. + KC..Original.:Opportunity..Original., family=binomial(), data=ds)  # family=binomial() makes this logistic regression

# 4. Inspect parameters & produce prediction fit metrics
summary(model.glm) # Allows you to inspect parameter estimates
length(coef(model.glm)) # Number of parameters. You should get Parameters = 88
-summary(model.glm)$deviance/2 # Likelihood = -2479.298
summary(model.glm)$aic # AIC = 5134.595
summary(model.glm)$aic+length(coef(model.glm))*(log(N)-2) # BIC = 5709.92

# 5. Try a different KC model
model.glm = glm(Success~Anon.Student.Id + KC..Textbook_New_Decompose. + KC..Textbook_New_Decompose.:Opportunity..Textbook_New_Decompose., family=binomial(), data=ds)  # family=binomial() makes this logistic regression
length(coef(model.glm)) # Number of parameters. You should get Parameters = 80
-summary(model.glm)$deviance/2 # Likelihood = -2461.867 - better despite fewer parameters
summary(model.glm)$aic # AIC = 5083.734 # also better
summary(model.glm)$aic+length(coef(model.glm))*(log(N)-2) # BIC = 5606.756  # also better


# OTHER OPTIONAL EXAMPLES
# 6. Fixed learning rate (slope) across all KCs
model.glm = glm(Success~Anon.Student.Id + KC..Textbook_New_Decompose. + Opportunity..Textbook_New_Decompose., family=binomial(), data=ds) 
summary(model.glm)$aic # AIC = 5128.8 # worse than 5083.734 above

# 7. Different slopes for different students
model.glm = glm(Success~Anon.Student.Id + KC..Textbook_New_Decompose. + KC..Textbook_New_Decompose.:Opportunity..Textbook_New_Decompose.+ Anon.Student.Id:Opportunity..Textbook_New_Decompose., family=binomial(), data=ds)
summary(model.glm)$aic # AIC = 5112.847 # worse than 5083.734 above

# 8. To do a model with random effects load the lme4 package
# This model, like AFM model in DataShop, treats the Student (Anon.Student.Id) as a random effect 
model1.lmer <- glmer(Success~(1|Anon.Student.Id) + KC..Original. + Opportunity..Original., data=ds, family=binomial())

# Loading a different DataShop dataset
detach(ds) # Clear local variables from prior ds.
ds = read.delim(file.choose(), header = TRUE, quote="\"", dec=".", fill = TRUE, comment.char="")
# [1] "/Ken/.../ds748_student_step_2014_0224_102531/ds748_student_step_All_Data_2133_2014_0221_202753.txt"
attach(ds)
summary(ds)
Success = vector(mode="numeric", length(Anon.Student.Id)) # Create a new variable (default values are 0)
Success[First.Attempt=="correct"]=1 # Change rows where First.Attempt is "correct" to 1.
model.glm = glm(Success~Anon.Student.Id + KC..all.shapes.merged. + KC..all.shapes.merged.:Opportunity..all.shapes.merged., family=binomial(), data=ds)

# If you need to save memory or want to create your own (smaller) data table:
sds=data.frame(Success, Anon.Student.Id, KC..all.shapes.merged., Opportunity..all.shapes.merged.) 
detach(ds)
rm(ds)
rm(Success)
attach(sds)

model.glm = glm(Success~Anon.Student.Id + KC..all.shapes.merged. + KC..all.shapes.merged.:Opportunity..all.shapes.merged., family=binomial(), data=sds)





# EXTRA
# If you are getting memory allocation errors, the following function is useful to track down objects you might remove (using rm()).

# improved list of objects
.ls.objects <- function (pos = 1, pattern, order.by,
                        decreasing=FALSE, head=FALSE, n=5) {
    napply <- function(names, fn) sapply(names, function(x)
                                         fn(get(x, pos = pos)))
    names <- ls(pos = pos, pattern = pattern)
    obj.class <- napply(names, function(x) as.character(class(x))[1])
    obj.mode <- napply(names, mode)
    obj.type <- ifelse(is.na(obj.class), obj.mode, obj.class)
    obj.prettysize <- napply(names, function(x) {
                           capture.output(print(object.size(x), units = "auto")) })
    obj.size <- napply(names, object.size)
    obj.dim <- t(napply(names, function(x)
                        as.numeric(dim(x))[1:2]))
    vec <- is.na(obj.dim)[, 1] & (obj.type != "function")
    obj.dim[vec, 1] <- napply(names, length)[vec]
    out <- data.frame(obj.type, obj.size, obj.prettysize, obj.dim)
    names(out) <- c("Type", "Size", "PrettySize", "Rows", "Columns")
    if (!missing(order.by))
        out <- out[order(out[[order.by]], decreasing=decreasing), ]
    if (head)
        out <- head(out, n)
    out
}

# shorthand
lsos <- function(..., n=10) {
    .ls.objects(..., order.by="Size", decreasing=TRUE, head=TRUE, n=n)
}

lsos()