TABLE OF CONTENTS
initialization/makedatamat [ Functions ]
NAME
makedatamat --- make data matrix
FUNCTION
Convert an Anderson - Gill data frame for a single recurrent event process into the data matrix format used in the remainder of the algorithm.
SYNOPSIS
206 makedatamat <- function(agdata, as, alternating)
INPUTS
agdata a data frame in Anderson-Gill format with columns
i,j,k,r,start,stop,delta and covariates
as a matrix of discretization breakpoints, for each stratum
generated by makeas
alternating boolean, indicating whether the at-risk function
alternates between events (episodic process)
OUTPUTS
datamat a matrix with columns i,j,k,r,time,delta,smin,smax
and covariates. The columns are:
i cluster
j subject
k event counter
r stratum
time length of time for each interval
delta event indicator
smin discretization interval corresponding to
start time in input
smax discretization interval corresponding to
stop time in input
SOURCE
209 { 210 # Allocate space. Most data is copied directly from agdata. 211 datamat <- as.matrix(cbind(agdata[, 1:4], 0, agdata$delta, 1, 0, 212 agdata[, 8:dim(agdata)[2]])) 213 colnames(datamat)[5:8] <- c("time", "delta", "smin", "smax") 214 # Rows in which a new event has occurred 215 diffevent <- (c(TRUE, diff(agdata$i * 1000 + agdata$j) != 0) | 216 c(TRUE, agdata$delta[ - length(agdata$delta)] == 1)) 217 badind <- NULL 218 219 # Loop to compute discretization intervals 220 for(ind in 1:dim(datamat)[1]) 221 { 222 # Reset last event start time 223 if(diffevent[ind]) { 224 lasteventtime <- agdata$start[ind] 225 smax <- 0 226 } 227 # Interevent time is given by stop - start from agdata. 228 if(!alternating){ 229 newtime <- agdata$stop[ind] - lasteventtime 230 }else{ 231 newtime <- agdata$stop[ind] - agdata$start[ind] 232 } 233 r <- datamat[ind, "r"] 234 datamat[ind, "time"] <- newtime 235 # Find the discretization intervals that the times fall into 236 smin <- smax + 1 237 smax <- sum(as[r, ] < newtime) 238 # Most of the time, smin <= smax, however, if both start and stop times 239 # fall into the same interval, this needs to be fixed 240 if(smin > smax){ 241 smin <- datamat[ind - 1, "smin"] 242 badind <- c(badind, ind - 1) 243 timediff0 <- as[r, datamat[ind - 1, "smax"] + 1] - 244 as[r, datamat[ind - 1, "smin"]] 245 timediff1 <- as[r, smax + 1] - as[r, smax] 246 datamat[ind, 9:dim(datamat)[2]] <- (timediff0 * 247 datamat[ind - 1, 9:dim(datamat)[2]] + timediff1 * 248 datamat[ind, 9:dim(datamat)[2]]) / (timediff0 + timediff1) 249 } 250 datamat[ind, "smin"] <- smin 251 datamat[ind, "smax"] <- smax 252 253 } 254 # Remove bad entries 255 if(!is.null(badind)) datamat <- datamat[ - badind, ] 256 257 return(datamat) 258 }