# Load following package first
library(party)
#
#-------------------------start of main function----------------------------------------------------
REEMctree <- function (formula, data, random, subset = NULL, initialRandomEffects = rep(0, 
    TotalObs), ErrorTolerance = 0.001, MaxIterations = 1000, 
    verbose = FALSE, lme.control = lmeControl(returnObject = TRUE), ctree.control = party::ctree_control(mincriterion = 0.95),
    method = "REML", correlation = NULL) 
 {
    TotalObs <- dim(data)[1]
    if (identical(subset, NULL)) {
        subs <- rep(TRUE, dim(data)[1])
    }
    else {
        subs <- subset
    }
    Predictors <- paste(attr(terms(formula), "term.labels"), 
        collapse = "+")
    TargetName <- formula[[2]]
    if (length(TargetName) > 1) 
        TargetName <- TargetName[3]
    if (verbose) 
        print(paste("Target variable: ", TargetName))
    Target <- data[, toString(TargetName)]
    ContinueCondition <- TRUE
    iterations <- 0
    AdjustedTarget <- Target - initialRandomEffects
    oldlik <- -Inf
    newdata <- data
    newdata[, "SubsetVector"] <- subs
    while (ContinueCondition) {
        newdata[, "AdjustedTarget"] <- AdjustedTarget
        iterations <- iterations + 1
		
        tree <- party::ctree(formula(paste(c("AdjustedTarget", Predictors), collapse = "~")), data = newdata, subset = subs, controls = ctree.control)  
        
        if (verbose) 
            print(tree)
        newdata[, "nodeInd"] <- 0
        newdata[subs, "nodeInd"] <- where(tree)
        if (min(where(tree)) == max(where(tree))) { #it doesn't split on root
            lmefit <- lme(formula(paste(c(toString(TargetName), 
                1), collapse = "~")), data = newdata, random = random, 
                subset = SubsetVector, method = method, control = lme.control, 
                correlation = correlation)
        }
        else {
            lmefit <- lme(formula(paste(c(toString(TargetName), 
                "as.factor(nodeInd)"), collapse = "~")), data = newdata, 
                random = random, subset = SubsetVector, method = method, 
                control = lme.control, correlation = correlation)
        }
        if (verbose) {
            print(lmefit)
            print(paste("Estimated Error Variance = ", lmefit$sigma))
            print("Estimated Random Effects Variance = ")
            print(as.matrix(lmefit$modelStruct$reStruct[[1]]) * 
                lmefit$sigma^2)
        }
        newlik <- logLik(lmefit)
        if (verbose) 
            print(paste("Log likelihood: ", newlik))
        ContinueCondition <- (newlik - oldlik > ErrorTolerance & 
            iterations < MaxIterations)
        oldlik <- newlik
        AllEffects <- lmefit$residuals[, 1] - lmefit$residuals[, 
            dim(lmefit$residuals)[2]]
        AdjustedTarget[subs] <- Target[subs] - AllEffects
    }
    residuals <- rep(NA, length = length(Target))
    residuals[subs] <- Target[subs] - predict(lmefit)
    attr(residuals, "label") <- NULL 
	
    result <- list(Tree = tree, EffectModel = lmefit, RandomEffects = ranef(lmefit), 
        BetweenMatrix = as.matrix(lmefit$modelStruct$reStruct[[1]]) * 
            lmefit$sigma^2, ErrorVariance = lmefit$sigma^2, data = data, 
        logLik = newlik, IterationsUsed = iterations, Formula = formula, 
        Random = random, Subset = subs, ErrorTolerance = ErrorTolerance, 
        correlation = correlation, residuals = residuals, method = method, 
        lme.control = lme.control, ctree.control = ctree.control)
    class(result) <- "REEMctree"
    return(result)
 }
#-------------------------end of main function----------------------------------------------------
#--------------------------------------------------------------------------------------------------
# Example to use unbiased RE-EM tree
library(REEMtree) # Only needed to access data set
library(party)
data(simpleREEMdata)
REEM.ctree.result<-REEMctree(Y~D+t+X, data=simpleREEMdata, random=~1|ID)
plot(REEM.ctree.result$Tree)

### Some functions in the original RE-EM tree may be lost or need different treatment such as using the predict() function
### to predict the fixed effect of testing data from a fitted tree. In particular, if a correlation structure other than
### independence is assumed for errors within individuals, the predicted response values at the terminal nodes of the tree
### will not be correct, and need to be obtained from the associated mixed model fit; that is,
###          unique(cbind(where(REEM.ctree.result$Tree), predict(REEM.ctree.result$Tree)))
### will NOT give the correct responses at the terminal nodes, but
###          unique(cbind(where(REEM.ctree.result$Tree), predict(REEM.ctree.result$EffectModel, level = 0)))
### will. Note that this means that the estimated responses at the terminal nodes when the tree is plotted will be (slightly)
### incorrect as well.