TABLE OF CONTENTS
initialization/makedatamat [ Functions ]
NAME
makedatamat --- make data matrix
FUNCTION
Convert an Anderson - Gill data frame for a single recurrent event process into the data matrix format used in the remainder of the algorithm.
SYNOPSIS
206 makedatamat <- function(agdata, as, alternating)
INPUTS
agdata a data frame in Anderson-Gill format with columns i,j,k,r,start,stop,delta and covariates as a matrix of discretization breakpoints, for each stratum generated by makeas alternating boolean, indicating whether the at-risk function alternates between events (episodic process)
OUTPUTS
datamat a matrix with columns i,j,k,r,time,delta,smin,smax and covariates. The columns are: i cluster j subject k event counter r stratum time length of time for each interval delta event indicator smin discretization interval corresponding to start time in input smax discretization interval corresponding to stop time in input
SOURCE
209 { 210 # Allocate space. Most data is copied directly from agdata. 211 datamat <- as.matrix(cbind(agdata[, 1:4], 0, agdata$delta, 1, 0, 212 agdata[, 8:dim(agdata)[2]])) 213 colnames(datamat)[5:8] <- c("time", "delta", "smin", "smax") 214 # Rows in which a new event has occurred 215 diffevent <- (c(TRUE, diff(agdata$i * 1000 + agdata$j) != 0) | 216 c(TRUE, agdata$delta[ - length(agdata$delta)] == 1)) 217 badind <- NULL 218 219 # Loop to compute discretization intervals 220 for(ind in 1:dim(datamat)[1]) 221 { 222 # Reset last event start time 223 if(diffevent[ind]) { 224 lasteventtime <- agdata$start[ind] 225 smax <- 0 226 } 227 # Interevent time is given by stop - start from agdata. 228 if(!alternating){ 229 newtime <- agdata$stop[ind] - lasteventtime 230 }else{ 231 newtime <- agdata$stop[ind] - agdata$start[ind] 232 } 233 r <- datamat[ind, "r"] 234 datamat[ind, "time"] <- newtime 235 # Find the discretization intervals that the times fall into 236 smin <- smax + 1 237 smax <- sum(as[r, ] < newtime) 238 # Most of the time, smin <= smax, however, if both start and stop times 239 # fall into the same interval, this needs to be fixed 240 if(smin > smax){ 241 smin <- datamat[ind - 1, "smin"] 242 badind <- c(badind, ind - 1) 243 timediff0 <- as[r, datamat[ind - 1, "smax"] + 1] - 244 as[r, datamat[ind - 1, "smin"]] 245 timediff1 <- as[r, smax + 1] - as[r, smax] 246 datamat[ind, 9:dim(datamat)[2]] <- (timediff0 * 247 datamat[ind - 1, 9:dim(datamat)[2]] + timediff1 * 248 datamat[ind, 9:dim(datamat)[2]]) / (timediff0 + timediff1) 249 } 250 datamat[ind, "smin"] <- smin 251 datamat[ind, "smax"] <- smax 252 253 } 254 # Remove bad entries 255 if(!is.null(badind)) datamat <- datamat[ - badind, ] 256 257 return(datamat) 258 }