TABLE OF CONTENTS
00main/bivrec.agdata [ Functions ]
NAME
bivrec.agdata --- fitter for bivariate models
FUNCTION
The main fitting function. Fits the model, given an Anderson-Gill data set and other parameters, iterating between updating frailty, dispersion and regression parameters until convergence. After initializing, fitting consists of iteratively calling fupdatefrailties4 to update frailty estimates, updatedisppears to update dispersion parameter estimtes, and updateregprof to update regression parameter estimates. The function fmkstderr computes standard errors at the end.
See also the call graph linked from 00main.
This function should not be called directly, rather, the interface provided by the bivrec.formula and unirec.formula methods should be used.
SYNOPSIS
2449 bivrec.agdata <- function(x, K1 = 10, K2 = 10, excludevars1 = NULL, 2450 excludevars2 = NULL, verbose = 1, alternating = FALSE, 2451 dispest = "pearson", correction = "none", computesd = TRUE, 2452 fullS = TRUE, fixzero = NULL, smooth = FALSE, maxiter = 200, 2453 convergence = 1e-3, initial = NULL)
INPUTS
x a data frame with the following columns: i cluster index j subject index k event counter r stratum index start interval start time stop interval stop time delta indicator for event 1 Delta indicator for event 2 ... columns of all covariates used in the model K1 if > 1, number of discretization breakpoints for event 1 if < 1, proportion relative to maximum can be of length > 1 for different strata K2 analogous for event 2 excludevars1 vector of strings giving covariate names that should not be included for process 1 excludevars2 analogous for process 2 verbose integer from 1-5 specifying the amount of output printed alternating boolean indicating whether the data are episodic dispest dispersion parameter estimators to use. Options are "pearson", "marginal" and "ohlsson" correction determines whether to use Ma's correction. Options are "none", "single" or "double" computesd boolean, determines whether to compute standard errors fullS boolean, determines whether to compute the full sensitivity matrix fixzero list of parameters that should be fixed at 0. For univariate models, this should contain "sigma2dhat", "nu2dhat" and "thetahat" smooth boolean, whether to smooth the baseline hazards maxiter maximum number of iterations permitted before failing convergence convergence criterion, absolute change in all params initial optionally a list containing initial values for betahat, betadhat, Uihat, Vihat, Uijhat, Vijhat and dispparams
OUTPUTS
an object of class bivrec, see the package documentation for details
SOURCE
2456 { 2457 # Read in the input and make basic adjustments 2458 agdata <- x 2459 K <- K1;Kd <- K2; 2460 if(is.null(excludevars1)) vars1 <- NULL else 2461 vars1 <- which(!colnames(x)[ - (1:8)]%in%excludevars1) 2462 if(is.null(excludevars2)) vars2 <- NULL else 2463 vars2 <- which(!colnames(x)[ - (1:8)]%in%excludevars2) 2464 maxiter.outer <- maxiter; thresh.outer <- convergence; 2465 fixzero <- fixzero[fixzero%in%c("clust1", "clust2", "subj1", "subj2", "cov")] 2466 fixzero[fixzero == "clust1"] <- "sigma2hat" 2467 fixzero[fixzero == "clust2"] <- "sigma2dhat" 2468 fixzero[fixzero == "subj1"] <- "nu2hat" 2469 fixzero[fixzero == "subj2"] <- "nu2dhat" 2470 fixzero[fixzero == "cov"] <- "thetahat" 2471 if(length(fixzero) == 0) fixzero <- NULL 2472 2473 # Check whether a univariate model is being fit 2474 if(all(c("sigma2dhat", "nu2dhat", "thetahat")%in%fixzero)) 2475 univariate <- TRUE else univariate <- FALSE 2476 2477 call <- match.call() 2478 # Get the values of m and Ji from the agdata matrix, and set 2479 # global variables with those values. 2480 m <- max(agdata$i) 2481 Jilocal <- rep(0, m) 2482 for(i in 1:m){ 2483 Jilocal[i] <- max(agdata$j[agdata$i == i]) 2484 } 2485 Ji <- Jilocal 2486 # If the provided K, Kd are of length 1, then assume all strata should 2487 # have the same number of breakpoints 2488 nr <- length(unique(agdata$r)) 2489 2490 ########################### 2491 #### Begin find initial values for frailties and coefficients 2492 2493 # Initial values are computed using coxph. 2494 # For recurrent events, the response time is the interevent time stop - start, 2495 # and event indicator delta. 2496 if(verbose >= 2) cat("Get initial values\n") 2497 if(dim(agdata)[2] > 9) if(sum(agdata[, 10] != 0) == 0) agdata <- agdata[, -10] 2498 2499 # create separate data frames for each process 2500 agdata1 <- agdata[, -8] 2501 agdata2 <- agdata[, -7];colnames(agdata2)[7] <- "delta" 2502 2503 # remove duplicate rows in each of the two data frames, and adjust the 2504 # times accordingly 2505 if(alternating){ 2506 atrisk1 <- c(TRUE,!duplicated(agdata[, 1:2])[ - 1]) | 2507 c(TRUE, agdata$Delta[ - length(agdata$Delta)] == 1) 2508 agdata1 <- agdata[atrisk1, ] 2509 agdata2 <- agdata[!atrisk1, ] 2510 agdata1 <- agdata1[, -8] 2511 agdata2 <- agdata2[, -7];colnames(agdata2)[7] <- "delta" 2512 }else{ 2513 # new method 2514 dup <- duplicated(agdata1[, -c(3, 5, 6, 7)]) 2515 dup <- c(dup[ - 1], FALSE) & agdata1$delta != 1 2516 agdata1 <- agdata1[!dup, ] 2517 agdata1$start[ - 1] <- agdata1$stop[ - length(agdata1$stop)] 2518 agdata1$start[!duplicated(agdata1[, 1:2])] <- 0 2519 2520 dup <- duplicated(agdata2[, -c(3, 5, 6, 7)]) 2521 dup <- c(dup[ - 1], FALSE) & agdata2$delta != 1 2522 agdata2 <- agdata2[!dup, ] 2523 agdata2$start[ - 1] <- agdata2$stop[ - length(agdata2$stop)] 2524 agdata2$start[!duplicated(agdata2[, 1:2])] <- 0 2525 } 2526 2527 # Remove excluded covariates 2528 if(!is.null(vars1)) agdata1 <- agdata1[, c(1:7, 7 + vars1)] 2529 if(!is.null(vars2)) agdata2 <- agdata2[, c(1:7, 7 + vars2)] 2530 2531 # Compute the number of discretization breakpoints, if K or Kd were specified 2532 # as a proportion of the maximum 2533 if(length(K) == 1 & K <= 1) { 2534 Kout <- rep(0, nr) 2535 for(r in 1:nr) Kout[r] <- round(length(unique((agdata1$stop - 2536 agdata1$start)[agdata1$delta == 1 & agdata1$r == r])) * K) 2537 K <- Kout 2538 } 2539 if(length(Kd) == 1 & Kd <= 1) { 2540 Kout <- rep(0, nr) 2541 for(r in 1:nr) Kout[r] <- round(length(unique((agdata2$stop - 2542 agdata2$start)[agdata2$delta == 1 & agdata2$r == r])) * Kd) 2543 Kd <- Kout 2544 } 2545 if(length(K) == 1) K = rep(K, nr) 2546 if(length(Kd) == 1) Kd = rep(Kd, nr) 2547 if(length(Kd) != nr | length(K) != nr) stop("K needs to be as long as the number of strata") 2548 if(univariate) Kd <- 1 2549 if(verbose >= 1) cat("K = ", K, "\t Kd = ", Kd, "\n") 2550 2551 # Covariate names are needed for Cox model fits and for output 2552 covariatenames1 <- paste(colnames(agdata1)[ - (1:7)], collapse = " + ") 2553 covariatenames2 <- paste(colnames(agdata2)[ - (1:7)], collapse = " + ") 2554 2555 if(is.null(initial)){ 2556 2557 # gamma frailties 2558 ftype <- "gamma" 2559 2560 # Models need to be fit with both subject - level frailties, and 2561 # cluster - level frailties, resulting in the four models fit below. 2562 # Compute Cox fits with cluster frailties 2563 if(m > 1){ 2564 cox.clust1 <- try(coxph(as.formula(paste("Surv(stop - start, delta) ~", 2565 covariatenames1, "+ frailty(i, distribution='", ftype, "')", 2566 sep = "")), agdata1)) 2567 if(!univariate) 2568 cox.clust2 <- try(coxph(as.formula(paste("Surv(stop - start, delta) ~", 2569 covariatenames2, "+ frailty(i, distribution='", ftype, "')", 2570 sep = "")), agdata2)) 2571 else 2572 cox.clust2 <- list(frail = rep(0, m), fvar = 0) 2573 }else{ 2574 cox.clust1 <- list(frail = 0, fvar = 0) 2575 cox.clust2 <- list(frail = 0, fvar = 0) 2576 } 2577 # Compute Cox fits with subject - level frailties 2578 cox.ind1 <- try(coxph(as.formula(paste("Surv(stop - start, delta) ~", 2579 covariatenames1, "+ frailty(i * 1000 + j, distribution='", ftype, "')", 2580 sep = "")), agdata1)) 2581 if(!univariate) 2582 cox.ind2 <- try(coxph(as.formula(paste("Surv(stop - start, delta) ~", 2583 covariatenames2, "+ frailty(i * 1000 + j, distribution='", ftype, "')", 2584 sep = "")), agdata2)) 2585 else 2586 cox.ind2 <- list(frail = rep(0, sum(Ji)), fvar = 0, 2587 coef = rep(0, dim(agdata2)[2] - 7)) 2588 2589 if(inherits(cox.clust1, "try - error") | inherits(cox.clust2, "try - error") | 2590 inherits(cox.ind1, "try - error") | inherits(cox.ind2, "try - error") ) 2591 browser() 2592 2593 ## Set initial values for frailties based on estimated values 2594 Uihat <- exp(cox.clust1$frail) 2595 Vihat <- exp(cox.clust2$frail) 2596 if(!alternating){ 2597 Uijhat <- exp(cox.ind1$frail) 2598 Vijhat <- exp(cox.ind2$frail) 2599 }else{ 2600 Uijhat <- exp(cox.ind1$frail) 2601 Vijhat <- rep(1, length(Uijhat)) 2602 Vijhat[unique(agdata1$i * 1000 + agdata1$j)%in%unique(agdata2$i * 1000 + 2603 agdata2$j)] <- exp(cox.ind2$frail) 2604 } 2605 2606 ## Set initial values for dispersion parameters 2607 sigma2hat <- mean(cox.clust1$fvar) 2608 sigma2dhat <- mean(cox.clust2$fvar) 2609 nu2hat <- max(mean(cox.ind1$fvar) - sigma2hat, .1) 2610 nu2dhat <- max(mean(cox.ind2$fvar) - sigma2hat, .1) 2611 thetahat <- cov(Uijhat, Vijhat) 2612 2613 ## Check that dispersion parameters are valid 2614 if(abs(thetahat)<.001) thetahat <- sign(thetahat)*.01 2615 if(abs(sigma2hat)<.001) sigma2hat <- sign(sigma2hat)*.01 2616 if(abs(sigma2dhat)<.001) sigma2dhat <- sign(sigma2dhat)*.01 2617 if(abs(nu2hat)<.001) nu2hat <- sign(nu2hat)*.01 2618 if(abs(nu2dhat)<.001) nu2dhat <- sign(nu2dhat)*.01 2619 2620 ## Extract initial regression parameters 2621 betahat <- cox.ind1$coef 2622 betadhat <- cox.ind2$coef 2623 2624 # Save initial dispersion parameters 2625 dispparams <- list(sigma2hat = sigma2hat, sigma2dhat = sigma2dhat, 2626 nu2hat = nu2hat, nu2dhat = nu2dhat, thetahat = thetahat) 2627 2628 # Fix something at zero if desired 2629 dispparams[fixzero] <- 0 2630 2631 if(sum(is.na(betahat)) + sum(is.na(betadhat)) > 0){ 2632 warning("Unable to find good starting values") 2633 return(0) 2634 } 2635 2636 ## Save all initial values for output 2637 initial <- list(betahat = betahat, betadhat = betadhat, 2638 dispparams = dispparams) 2639 if(verbose >= 1) cat("\t betahat =", betahat, ", betadhat = ", 2640 betadhat, "\n") 2641 if(verbose >= 1) cat("\t dispparams = ", dispparams$sigma2hat, 2642 dispparams$sigma2dhat, dispparams$nu2hat, dispparams$nu2dhat, 2643 dispparams$thetahat, "\n") 2644 2645 }else{ # if initial values are provided, use those 2646 dispparams <- initial$dispparams 2647 Uihat <- initial$Uihat 2648 Uijhat <- initial$Uijhat 2649 Vihat <- initial$Vihat 2650 Vijhat <- initial$Vijhat 2651 betahat <- initial$betahat 2652 betadhat <- initial$betadhat 2653 } 2654 2655 ########################## 2656 ### Convert Anderson - Gill data into matrix form and initialize matrices 2657 if(verbose >= 2) cat("Initialize matrices for estimation\n") 2658 2659 ## Compute breakpoints 2660 as <- makeas(agdata1, K, alternating) 2661 asd <- makeas(agdata2, Kd, alternating) 2662 2663 ## Write frailty estimates in the form of a matrix 2664 Uijhatframe <- makeUijframe(m, Ji, Uijhat, Vijhat) 2665 2666 ## Create data matrix 2667 datamat <- makedatamat(agdata1, as, alternating) 2668 datamatd <- makedatamat(agdata2, asd, alternating) 2669 2670 ## Compute initial estimates of baseline hazard parameters 2671 alphars <- fmakealphars2(m, Ji, datamat, betahat, as, Uijhatframe$Uijmat, smooth) 2672 alpharsd <- fmakealphars2(m, Ji, datamatd, betadhat, asd, 2673 Uijhatframe$Vijmat, smooth) 2674 2675 ## Cleanup: Remove Anderson - Gill data 2676 varnames1 <- colnames(agdata1)[ - (1:7)] 2677 varnames2 <- colnames(agdata2)[ - (1:7)] 2678 rm(agdata) 2679 2680 ########################## 2681 ### Begin iterative estimation procedure 2682 2683 if(verbose >= 2) cat("Iterative estimation procedure\n") 2684 2685 ## Initialize convergence criterion 2686 convergence <- 1000 2687 iter <- 0 2688 2689 # Compute ad - hoc correction value 2690 if(is.character(correction)){ 2691 if(correction == "none") corrval <- 1 2692 if(correction == "single") corrval <- max(1, m / (m - length(varnames1))) 2693 if(correction == "double") corrval <- max(1, m / (m - 2 * length(varnames1))) 2694 } else corrval <- correction 2695 2696 ## Loop until convergence 2697 while((convergence > thresh.outer) & (iter <= maxiter.outer)){ 2698 2699 # Starting convergence criterion and iteration counter 2700 conv.inner <- 1000 2701 iter.inner <- 0 2702 2703 ######## Step 1: Update Frailty estimates 2704 if(verbose >= 2) cat("\tUpdating frailty estimates\n") 2705 frailtyoutput <- fupdatefrailties4(m, Ji, datamat, datamatd, 2706 alphars, alpharsd, as, asd, betahat, betadhat, dispparams) 2707 2708 ######## Step 2: Update Dispersion coefficient estimates 2709 if(dispest == "ohlsson"){ 2710 if(verbose >= 2) cat("\tUpdating dispersion parameters (Ohlsson)\n") 2711 # Use Ohlsson estimators 2712 dispersionoutput <- updatedispohls(m, Ji, datamat, datamatd, 2713 alphars, alpharsd, as, asd, betahat, betadhat, fixzero) 2714 if(is.null(names(dispersionoutput))){return(0)} 2715 } 2716 if(dispest == "pearson"){ 2717 if(verbose >= 2) cat("\tUpdating dispersion parameters (Pearson)\n") 2718 # Use Pearson estimation code 2719 dispersionoutput <- updatedisppears(m, Ji, frailtyoutput, 2720 dispparams, corrval) 2721 dispersionoutput[fixzero] <- 0 2722 } 2723 if(dispest == "marginal"){ 2724 if(verbose >= 2) cat("\tUpdating dispersion parameters (Marginal)\n") 2725 # Use marginal estimation code 2726 dispersionoutput <- updatedispmarg2(m, Ji, datamat, datamatd, 2727 alphars, alpharsd, as, asd, betahat, betadhat, fixzero, univariate) 2728 } 2729 if(length(dispest) == 5){ 2730 # it's possible to compute some parameters via Pearson and others by 2731 # marginal estimators. This was for testing only, there's no reason 2732 # to ever do this! 2733 dispersionoutput.marg <- unlist(updatedispmarg2(m, Ji, datamat, datamatd, 2734 alphars, alpharsd, as, asd, betahat, betadhat, fixzero)) 2735 dispersionoutput.pears <- unlist(updatedisppears(m, Ji, frailtyoutput, 2736 dispparams, corrval)) 2737 dispersionoutput <- dispersionoutput.marg 2738 dispersionoutput[dispest == "p"] <- dispersionoutput.pears[dispest == "p"] 2739 dispersionoutput <- as.list(dispersionoutput) 2740 } 2741 2742 iter.inner <- iter.inner + 1 2743 2744 ######## Step 3: Update Regression parameter estimates 2745 if(verbose >= 2) cat("\tUpdating regression parameter estimates ...\n") 2746 regressionoutput <- updateregprof(m, Ji, datamat, datamatd, 2747 alphars, alpharsd, as, asd, betahat, betadhat, K, Kd, 2748 frailtyoutput, dispersionoutput, smooth) 2749 if(is.null(names(regressionoutput))){ 2750 return(0) 2751 } 2752 alpharsnew <- regressionoutput$alphars; 2753 alpharsdnew <- regressionoutput$alpharsd; 2754 betahatnew <- regressionoutput$betahat; 2755 betadhatnew <- regressionoutput$betadhat; 2756 2757 # Compute new value of convergence criterion 2758 newconvergence <- sum(abs(betahatnew - betahat)) + 2759 sum(abs(betadhatnew - betadhat)) + 2760 abs(dispparams$sigma2hat - dispersionoutput$sigma2hat) + 2761 abs(dispparams$sigma2dhat - dispersionoutput$sigma2dhat) + 2762 abs(dispparams$nu2hat - dispersionoutput$nu2hat) + 2763 abs(dispparams$nu2dhat - dispersionoutput$nu2dhat) + 2764 abs(dispparams$thetahat - dispersionoutput$thetahat) 2765 if(verbose >= 1) cat("\tConvergence criterion: ", newconvergence, "\n") 2766 2767 # DEBUG: Output state at this iteration 2768 if(verbose >= 2) cat("\t betahat =", betahatnew, ", betadhat = ", 2769 betadhatnew, "\n") 2770 if(verbose >= 2) cat("\t dispparams = ", format(unlist(dispersionoutput), 2771 digits = 4), "\n") 2772 2773 if(is.nan(newconvergence) | iter == maxiter.outer | 2774 (newconvergence - convergence) > 10) { 2775 warning("Outer loop did not converge") 2776 return(0) 2777 } 2778 2779 # Replace parameter values with new parameters 2780 alphars <- alpharsnew 2781 alpharsd <- alpharsdnew 2782 betahat <- betahatnew 2783 betadhat <- betadhatnew 2784 convergence <- newconvergence 2785 dispparams <- dispersionoutput 2786 iter <- iter + 1 2787 } 2788 2789 ## Format for output, compute standard errors, etc 2790 betahat <- regressionoutput$betahat 2791 betadhat <- regressionoutput$betadhat 2792 alphars <- as.vector(regressionoutput$alphars) 2793 alpharsd <- as.vector(regressionoutput$alpharsd) 2794 if(computesd){ 2795 # Compute standard errors either using full sensitivity matrix, or only 2796 # the sensitivity of the regression coefficients 2797 if(fullS){ 2798 np <- length(betahat) + sum(K);npd <- length(betadhat) + sum(Kd) 2799 b <- matrix(0, np + npd, length(betahat) + length(betadhat)) 2800 b[c(sum(K) + 1:length(betahat), sum(K) + sum(Kd) + length(betahat) + 2801 1:length(betadhat)), 1:dim(b)[2]] <- diag(1, dim(b)[2]) 2802 stderr <- fmkstderr(m, Ji, b, datamat, datamatd, regressionoutput$alphars, 2803 regressionoutput$alpharsd, betahat, betadhat, as, asd, 2804 frailtyoutput, dispersionoutput, K, Kd, univariate) 2805 }else{ 2806 S <- fmakeSens2(m, Ji, datamat, datamatd, regressionoutput$alphars, 2807 regressionoutput$alpharsd, betahat, betadhat, as, asd, 2808 frailtyoutput, dispersionoutput, K, Kd, fullS) 2809 if(univariate) S <- S[1:length(betahat), 1:length(betahat)] 2810 stderr <- sqrt(diag(-solve(S))) 2811 } 2812 std_betahat <- stderr[1:length(betahat)] 2813 std_betadhat <- stderr[length(betahat) + 1:length(betadhat)] 2814 }else{ 2815 stderr <- 0 2816 std_betahat <- 0 2817 std_betadhat <- 0 2818 } 2819 2820 # Compute p-values and summaries 2821 pval = pmin(pnorm(betahat / std_betahat), 1 - pnorm(betahat / std_betahat)) 2822 pval.d = pmin(pnorm(betadhat / std_betadhat), 1 - pnorm(betadhat / std_betadhat)) 2823 varnames <- unique(c(varnames1, varnames2)) 2824 summary.reg <- as.data.frame(matrix(NA, length(varnames), 6), row.names = varnames) 2825 colnames(summary.reg) <- c("coeff.1", "se.1", "pval.1", "coeff.2", "se.2", "pval.2") 2826 summary.reg[match(varnames1, varnames), 1:3] <- cbind(betahat, std_betahat, pval) 2827 summary.reg[match(varnames2, varnames), 3 + 1:3] <- 2828 cbind(betadhat, std_betadhat, pval.d) 2829 2830 rhohat <- dispparams$thetahat / sqrt((dispparams$sigma2hat + dispparams$nu2hat) * 2831 (dispparams$sigma2dhat + dispparams$nu2dhat)) 2832 2833 summary.disp <- c(dispparams$sigma2hat, dispparams$sigma2dhat, dispparams$nu2hat, 2834 dispparams$nu2dhat, dispparams$thetahat, rhohat) 2835 names(summary.disp) <- c("sigma2hat", "sigma2dhat", "nu2hat", 2836 "nu2dhat", "thetahat", "rhohat") 2837 2838 2839 return(list(call = call, 2840 summary.reg = summary.reg, 2841 summary.disp = summary.disp, 2842 regressionoutput = regressionoutput, 2843 frailtyoutput = frailtyoutput, 2844 dispparams = dispparams, 2845 initial = initial, 2846 stderr = stderr)) 2847 }