Source code for npDoseResponse.npDoseResponseDerivDR

# -*- coding: utf-8 -*-

# Author: Yikun Zhang
# Last Editing: Dec 26, 2024

# Description: This script contains the implementations of the IPW and doubly 
# robust estimators of the derivative of a dose-response curve with and without 
# the positivity condition.

import numpy as np
from .rbf import KernelRetrieval
from .utils import CondDenEst, CondDenEstKDE, KDE
from sklearn.model_selection import KFold
from sklearn.base import BaseEstimator

import torch
import torch.nn as nn
import torch.optim as optim

#=======================================================================================#
# Implementations of the proposed estimators that assume the positivity condition

## Define the neural network
[docs] class NeurNet(nn.Module): def __init__(self, input_size): super(NeurNet, self).__init__() self.fc1 = nn.Linear(input_size, 100) # First layer self.silu = nn.SiLU() self.fc2 = nn.Linear(100, 50) # Second layer self.fc3 = nn.Linear(50, 1) # Apply Kaiming initialization to each layer nn.init.kaiming_normal_(self.fc1.weight, nonlinearity='relu') nn.init.kaiming_normal_(self.fc2.weight, nonlinearity='relu') nn.init.kaiming_normal_(self.fc3.weight, nonlinearity='linear') self.double() def forward(self, x): x = self.silu(self.fc1(x)) x = self.silu(self.fc2(x)) x = self.fc3(x) return x
[docs] def train(mod, X_train, Y_train, lr=0.01, n_epochs=10, momentum=0.7, weight_decay=0, print_loss=True): ''' Utility function for training the PyTorch neural network model via stochastic gradient descent. Parameters ---------- mod: python class The neural network class defined by PyTorch. X_train: (n,d+1)-torch.Tensor The first column of "X_train" is the treatment/exposure variable, while the other d columns are the confounding variables of n observations. Y_train: (n,)-torch.Tensor The outcome variables of n observations. lr: float The learning rate (Default: lr=0.01.) n_epochs: int The number of training epochs. (Default: n_epochs=10.) momentum: float The momentum factor (Default: momentum=0.7.) weight_decay: float The weight decay (L2 penalty) (Default: weight_decay=0.) print_loss: boolean An indicator of whether the training loss will be printed to the console. Return ---------- model: python object The fitted model instance of a neural network class defined by PyTorch. ''' # Initialize the model, loss function, and optimizer model = mod(input_size=X_train.shape[1]) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) criterion = nn.MSELoss() optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) for epoch in range(n_epochs): model.train() # Forward pass outputs = model(X_train) loss = criterion(outputs, Y_train) # Backward pass and optimization optimizer.zero_grad() # Zero the gradients loss.backward() # Backpropagate optimizer.step() # Update weights if print_loss: print(f'Epoch [{epoch + 1}/{n_epochs}], Loss: {loss.item():.4f}') return model
[docs] def RADRDeriv(Y, X, t_eval, mu, L=1, n_iter=1000, lr=0.1, multi_boot=False, B=1000): ''' Estimating the derivative of a dose-response curve through the regression adjustment (or G-computation) form by a PyTorch neural network model under the positivity condition. Parameters ---------- Y: (n,)-array The outcome variables of n observations. X: (n,d+1)-array The first column of X is the treatment/exposure variable, while the other d columns are the confounding variables of n observations. t_eval: (m,)-array The coordinates of the m evaluation points. mu: a neural network class defined by PyTorch The conditional mean outcome (or regression) model of Y given X. L: int The number of data folds for cross-fitting. When L<= 1, no cross-fittings are applied and the regression model is fitted on the entire dataset. (Default: L=1.) n_iter: int The number of iterations or training epochs of the neural network model. (Default: n_iter=1000.) lr: float The learning rate (Default: lr=0.01.) multi_boot: boolean An indicator of whether the multiplier bootstrap will be run. (Default: multi_boot=False.) B: int The number of bootstrapping times. (Default: B=1000.) Return ---------- theta_est: (m,)-array The estimated derivative of the dose-response curve evaluated at points "t_eval". mu_boot: (B,m)-array The estimated derivatives of the dose-response curves on bootstrapping data evaluated at points "t_eval". (Only return this quantity when "multi_boot=True".) ''' n = X.shape[0] ## Number of data points if L <= 1: # No cross-fittings: fit the regression model on the entire data X_tensor = torch.from_numpy(X) Y_tensor = torch.from_numpy(Y.reshape(-1,1)) NN_fit = train(mu, X_tensor, Y_tensor, lr=lr, n_epochs=n_iter) beta_est = np.zeros((n, t_eval.shape[0])) for i in range(t_eval.shape[0]): # Define the data matrix for evaluating the fitted regression model X_eval = np.column_stack([t_eval[i]*np.ones(n), X[:,1:]]) X_eval_tensor = torch.from_numpy(X_eval) for j in range(X_eval.shape[0]): # Compute the gradient of the fitted regression model with respect to the first coordinate x = X_eval_tensor[j,:] x = x.clone().detach().requires_grad_(True) y = NN_fit(x) y_scalar = y[0] y_scalar.backward() x_grad = x.grad beta_est[j,i] = x_grad[0].item() # theta_est = np.mean(beta_est, axis=0) else: # Conduct L-fold cross-fittings: fit the regression model on the training fold data # and evaluate it on the test fold data kf = KFold(n_splits=L, shuffle=True, random_state=0) beta_est = np.zeros((n, t_eval.shape[0])) for tr_ind, te_ind in kf.split(X): X_tr = X[tr_ind,:] Y_tr = Y[tr_ind] X_te = X[te_ind,:] X_tr_tensor = torch.from_numpy(X_tr) Y_tr_tensor = torch.from_numpy(Y_tr.reshape(-1,1)) NN_fit = train(mu, X_tr_tensor, Y_tr_tensor, lr=lr, n_epochs=n_iter) for i in range(t_eval.shape[0]): # Define the data matrix for evaluating the fitted regression model X_eval_te = np.column_stack([t_eval[i]*np.ones(X_te.shape[0]), X_te[:,1:]]) X_eval_te_tensor = torch.from_numpy(X_eval_te) beta_hat = np.zeros((X_eval_te.shape[0],)) for j in range(X_eval_te.shape[0]): # Compute the gradient of the fitted regression model with respect to the first coordinate x = X_eval_te_tensor[j,:] x = x.clone().detach().requires_grad_(True) y = NN_fit(x) y_scalar = y[0] y_scalar.backward() x_grad = x.grad beta_hat[j] = x_grad[0].item() beta_est[te_ind,i] = beta_hat # theta_est = np.mean(beta_est, axis=0) if multi_boot: theta_boot = np.zeros((B, t_eval.shape[0])) for b in range(B): Z = np.random.randn(n, t_eval.shape[0]) + 1 theta_boot[b,:] = np.mean(Z * beta_est, axis=0) theta_est = np.mean(beta_est, axis=0) return theta_est, theta_boot else: theta_est = np.mean(beta_est, axis=0) return theta_est
[docs] def RADRDerivSKLearn(Y, X, t_eval, mu, L=1, delta=0.01): ''' Estimating the derivative of a dose-response curve through the regression adjustment (or G-computation) form under the positivity condition. Parameters ---------- Y: (n,)-array The outcome variables of n observations. X: (n,d+1)-array The first column of X is the treatment/exposure variable, while the other d columns are the confounding variables of n observations. t_eval: (m,)-array The coordinates of the m evaluation points. mu: scikit-learn model or any python model that can use ".fit()" and ".predict()" The conditional mean outcome (or regression) model of Y given X. L: int The number of data folds for cross-fitting. When L<= 1, no cross-fittings are applied and the regression model is fitted on the entire dataset. (Default: L=1.) delta: float The step value for computing the finite differences (or numerical partial differentiation) of the fitted regression model. Return ---------- theta_est: (m,)-array The estimated derivative of the dose-response curve evaluated at points "t_eval". ''' n = X.shape[0] ## Number of data points if L <= 1: # No cross-fittings: fit the regression model on the entire data mu_fit = mu.fit(X, Y) t_new = np.linspace(np.min(t_eval)-delta, np.max(t_eval)+delta, t_eval.shape[0]+1) beta_est = np.zeros((n, t_new.shape[0])) for i in range(t_new.shape[0]): # Define the data matrix for evaluating the fitted regression model X_eval = np.column_stack([t_new[i]*np.ones(n), X[:,1:]]) beta_est[:,i] = mu_fit.predict(X_eval) theta_est = np.diff(np.mean(beta_est, axis=0))/np.diff(t_new) else: # Conduct L-fold cross-fittings: fit the regression model on the training fold data # and evaluate it on the test fold data kf = KFold(n_splits=L, shuffle=True, random_state=0) t_new = np.linspace(np.min(t_eval)-delta, np.max(t_eval)+delta, t_eval.shape[0]+1) beta_est = np.zeros((n, t_new.shape[0])) for tr_ind, te_ind in kf.split(X): X_tr = X[tr_ind,:] Y_tr = Y[tr_ind] X_te = X[te_ind,:] mu_fit = mu.fit(X_tr, Y_tr) for i in range(t_new.shape[0]): # Define the data matrix for evaluating the fitted regression model X_eval_te = np.column_stack([t_new[i]*np.ones(X_te.shape[0]), X_te[:,1:]]) beta_est[te_ind,i] = mu_fit.predict(X_eval_te) theta_est = np.diff(np.mean(beta_est, axis=0))/np.diff(t_new) return theta_est
[docs] def IPWDRDeriv(Y, X, t_eval, condTS_type, condTS_mod, L, h, kern="epanechnikov", tau=0.001, b=None, self_norm=True): ''' Estimating the derivative of a dose-response curve through the inverse probability weighting (IPW) form under the positivity condition. Parameters ---------- Y: (n,)-array The outcome variables of n observations. X: (n,d+1)-array The first column of X is the treatment/exposure variable, while the other d columns are the confounding variables of n observations. t_eval: (m,)-array The coordinates of the m evaluation points. condTS_type: str Specifying the model type for estimating the conditional density of the treatment variable T given the covariate vector S. condTS_mod: cikit-learn model or any python model that can use ".fit()" and ".predict()" The regression model for estimating the conditional density of T given S. L: int The number of data folds for cross-fitting. When L<= 1, no cross-fittings are applied and the regression model is fitted on the entire dataset. (Default: L=1.) h: float The bandwidth parameter. kern: str The name of the kernel function. (Default: kern="epanechnikov".) tau: float The threshold value that lower bounds the estimated conditional density values. (Default: tau=0.001.) b: float The bandwidth parameter for the kernel-smoothed conditional density estimation methods. (Default: b=None.) self_norm: boolean An indicator of whether the self-normalized version is implemented. (Default: self_norm=True.) Return ---------- theta_est: (m,)-array The estimated derivative of the dose-response curve evaluated at points "t_eval". ''' kern_type = kern kern, sigmaK_sq, K_sq = KernelRetrieval(kern) n = X.shape[0] ## Number of data points if L <= 1: # No cross-fittings: fit the conditional density model on the entire data if condTS_type == 'true': condTS_est = condTS_mod elif condTS_type == 'kde': condTS_est = CondDenEstKDE(X[:,0], X[:,1:], reg_mod=condTS_mod, y_eval=X[:,0], x_eval=X[:,1:], kern=kern_type, b=b) else: condTS_est = CondDenEst(X[:,0], X[:,1:], reg_mod=condTS_mod, y_eval=X[:,0], x_eval=X[:,1:], kern='gaussian', b=b) condTS_est[condTS_est < tau] = tau beta_hat = np.zeros((n, t_eval.shape[0])) norm_w = np.zeros((t_eval.shape[0],)) for i in range(t_eval.shape[0]): # Self-normalizing weights norm_w[i] = np.sum(kern((t_eval[i] - X[:,0])/h) / condTS_est) / h beta_hat[:,i] = ((X[:,0] - t_eval[i])/h) * kern((t_eval[i] - X[:,0])/h) * Y / (h**2 * sigmaK_sq * condTS_est) if self_norm: theta_est = np.sum(beta_hat, axis=0) / norm_w else: theta_est = np.mean(beta_hat, axis=0) else: # Conduct L-fold cross-fittings: fit the conditional density model on the training fold # data and evaluate it on the test fold data kf = KFold(n_splits=L, shuffle=True, random_state=0) beta_hat = np.zeros((n, t_eval.shape[0])) norm_w = np.zeros((t_eval.shape[0],)) for tr_ind, te_ind in kf.split(X): X_tr = X[tr_ind,:] X_te = X[te_ind,:] Y_te = Y[te_ind] if condTS_type == 'true': condTS_est = condTS_mod[te_ind] elif condTS_type == 'kde': condTS_est = CondDenEstKDE(X_tr[:,0], X_tr[:,1:], reg_mod=condTS_mod, y_eval=X_te[:,0], x_eval=X_te[:,1:], kern=kern_type, b=b) else: condTS_est = CondDenEst(X_tr[:,0], X_tr[:,1:], reg_mod=condTS_mod, y_eval=X_te[:,0], x_eval=X_te[:,1:], kern='gaussian', b=b) condTS_est[condTS_est < tau] = tau for i in range(t_eval.shape[0]): # Self-normalizing weights w = np.sum(kern((t_eval[i] - X[te_ind,0])/h) / condTS_est) / h if ~np.isnan(w) and w != np.inf: norm_w[i] = norm_w[i] + w beta_hat[te_ind,i] = ((X[te_ind,0] - t_eval[i])/h) * kern((t_eval[i] - X[te_ind,0])/h) * Y_te / (h**2 * sigmaK_sq * condTS_est) if self_norm: norm_w[norm_w == 0] = 1 theta_est = np.sum(beta_hat, axis=0) / norm_w else: theta_est = np.mean(beta_hat, axis=0) return theta_est
[docs] def DRDRDeriv(Y, X, t_eval, mu, condTS_type, condTS_mod, L, h, kern="epanechnikov", n_iter=1000, lr=0.01, tau=0.001, b=None, self_norm=True): ''' Estimating the derivative of a dose-response curve through the doubly robust (DR) form by a PyTorch neural network model under the positivity condition. Parameters ---------- Y: (n,)-array The outcome variables of n observations. X: (n,d+1)-array The first column of X is the treatment/exposure variable, while the other d columns are the confounding variables of n observations. t_eval: (m,)-array The coordinates of the m evaluation points. mu: a neural network class defined by PyTorch The conditional mean outcome (or regression) model of Y given X. condTS_type: str Specifying the model type for estimating the conditional density of the treatment variable T given the covariate vector S. condTS_mod: cikit-learn model or any python model that can use ".fit()" and ".predict()" The regression model for estimating the conditional density of T given S. L: int The number of data folds for cross-fitting. When L<= 1, no cross-fittings are applied and the regression model is fitted on the entire dataset. (Default: L=1.) h: float The bandwidth parameter. kern: str The name of the kernel function. (Default: kern="epanechnikov".) n_iter: int The number of iterations or training epochs of the neural network model. (Default: n_iter=1000.) lr: float The learning rate (Default: lr=0.01.) tau: float The threshold value that lower bounds the estimated conditional density values. (Default: tau=0.001.) b: float The bandwidth parameter for the kernel-smoothed conditional density estimation methods. (Default: b=None.) self_norm: boolean An indicator of whether the self-normalized version is implemented. (Default: self_norm=True.) Return ---------- theta_est: (m,)-array The estimated derivative of the dose-response curve evaluated at points "t_eval". sd_est: (m,)-array The estimated asymptotic stdndard deviation of the DR derivative estimator evaluated at points "t_eval". ''' kern_type = kern kern, sigmaK_sq, K_sq = KernelRetrieval(kern) n = X.shape[0] ## Number of data points if L <= 1: # No cross-fittings: fit the conditional density model and the regression model on the entire data if condTS_type == 'true': condTS_est = condTS_mod elif condTS_type == 'kde': condTS_est = CondDenEstKDE(X[:,0], X[:,1:], reg_mod=condTS_mod, y_eval=X[:,0], x_eval=X[:,1:], kern=kern_type, b=b) else: condTS_est = CondDenEst(X[:,0], X[:,1:], reg_mod=condTS_mod, y_eval=X[:,0], x_eval=X[:,1:], kern='gaussian', b=b) condTS_est[condTS_est < tau] = tau X_tensor = torch.from_numpy(X) Y_tensor = torch.from_numpy(Y.reshape(-1,1)) NN_fit = train(mu, X_tensor, Y_tensor, lr=lr, n_epochs=n_iter) mu_hat = np.zeros((n, t_eval.shape[0])) IPW_hat = np.zeros((n, t_eval.shape[0])) beta_hat = np.zeros((n, t_eval.shape[0])) norm_w = np.zeros((t_eval.shape[0],)) for i in range(t_eval.shape[0]): # Define the data matrix for evaluating the fitted regression model X_eval = np.column_stack([t_eval[i]*np.ones(n), X[:,1:]]) X_eval_tensor = torch.from_numpy(X_eval) for j in range(X_eval.shape[0]): # Compute the gradient of the fitted regression model with respect to the first coordinate x = X_eval_tensor[j,:] x = x.clone().detach().requires_grad_(True) y = NN_fit(x) y_scalar = y[0] y_scalar.backward() x_grad = x.grad beta_hat[j,i] = x_grad[0].item() NN_fit.eval() mu_pred = NN_fit(X_eval_tensor) mu_hat[:,i] = mu_pred.detach().numpy()[:,0] IPW_hat[:,i] = ((X[:,0] - t_eval[i])/h) * kern((t_eval[i] - X[:,0])/h) * (Y - mu_hat[:,i] - (X[:,0] - t_eval[i])*beta_hat[:,i]) / (h**2 * sigmaK_sq * condTS_est) # Self-normalizing weights norm_w[i] = np.sum(kern((t_eval[i] - X[:,0])/h) / condTS_est) / (n * h) if self_norm: IPW_hat = IPW_hat / norm_w # Add up the IPW and RA components theta_hat = IPW_hat + beta_hat theta_est = np.mean(theta_hat, axis=0, where=~np.isnan(theta_hat)) # Estimate the variance of theta(t) using the square of the influence function var_est = np.zeros((n, t_eval.shape[0])) for i in range(t_eval.shape[0]): var_est[:,i] = (IPW_hat[:,i] + (beta_hat[:,i] - theta_est[i]))**2 * (h**3) sd_est = np.sqrt(np.mean(var_est, axis=0)/(n*(h**3))) else: # Conduct L-fold cross-fittings: fit the reciprocal of the conditional model # and the regression model on the training fold data and evaluate it on the test fold data kf = KFold(n_splits=L, shuffle=True, random_state=0) mu_hat = np.zeros((n, t_eval.shape[0])) beta_hat = np.zeros((n, t_eval.shape[0])) IPW_hat = np.zeros((n, t_eval.shape[0])) norm_w = np.zeros((t_eval.shape[0],)) cond_est_full = np.zeros((n,)) for tr_ind, te_ind in kf.split(X): X_tr = X[tr_ind,:] Y_tr = Y[tr_ind] X_te = X[te_ind,:] Y_te = Y[te_ind] if condTS_type == 'true': condTS_est = condTS_mod[te_ind] elif condTS_type == 'kde': condTS_est = CondDenEstKDE(X_tr[:,0], X_tr[:,1:], reg_mod=condTS_mod, y_eval=X_te[:,0], x_eval=X_te[:,1:], kern=kern_type, b=b) else: condTS_est = CondDenEst(X_tr[:,0], X_tr[:,1:], reg_mod=condTS_mod, y_eval=X_te[:,0], x_eval=X_te[:,1:], kern='gaussian', b=b) condTS_est[condTS_est < tau] = tau cond_est_full[te_ind] = condTS_est X_tr_tensor = torch.from_numpy(X_tr) Y_tr_tensor = torch.from_numpy(Y_tr.reshape(-1,1)) NN_fit = train(mu, X_tr_tensor, Y_tr_tensor, lr=lr, n_epochs=n_iter) for i in range(t_eval.shape[0]): # Define the data matrix for evaluating the fitted regression model X_eval_te = np.column_stack([t_eval[i]*np.ones(X_te.shape[0]), X_te[:,1:]]) X_eval_te_tensor = torch.from_numpy(X_eval_te) for j in range(X_eval_te.shape[0]): # Compute the gradient of the fitted regression model with respect to the first coordinate x = X_eval_te_tensor[j,:] x = x.clone().detach().requires_grad_(True) y = NN_fit(x) y_scalar = y[0] y_scalar.backward() x_grad = x.grad beta_hat[j,i] = x_grad[0].item() NN_fit.eval() mu_pred = NN_fit(X_eval_te_tensor) mu_hat[te_ind,i] = mu_pred.detach().numpy()[:,0] IPW_hat[te_ind,i] = ((X[te_ind,0] - t_eval[i])/h) * kern((t_eval[i] - X[te_ind,0])/h) * (Y_te - mu_hat[te_ind,i] - (X[te_ind,0] - t_eval[i])*beta_hat[te_ind,i]) / (h**2 * sigmaK_sq * condTS_est) # Self-normalizing weights w = np.sum(kern((t_eval[i] - X[te_ind,0])/h) / condTS_est) / (n * h) norm_w[i] = norm_w[i] + w if self_norm: IPW_hat = IPW_hat / norm_w # Add up the IPW and RA components theta_hat = IPW_hat + beta_hat theta_est = np.mean(theta_hat, axis=0, where=~np.isnan(theta_hat)) # Estimate the variance of theta(t) using the square of the influence function var_est = np.zeros((n, t_eval.shape[0])) for i in range(t_eval.shape[0]): var_est[:,i] = (IPW_hat[:,i] + (beta_hat[:,i] - theta_est[i]))**2 * (h**3) sd_est = np.sqrt(np.mean(var_est, axis=0)/(n*(h**3))) return theta_est, sd_est
[docs] def DRDRDerivSKLearn(Y, X, t_eval, mu, condTS_type, condTS_mod, L, h, kern="epanechnikov", tau=0.001, b=None, delta=0.01, self_norm=True): ''' Estimating the derivative of a dose-response curve through the doubly robust (DR) form under the positivity condition. Parameters ---------- Y: (n,)-array The outcome variables of n observations. X: (n,d+1)-array The first column of X is the treatment/exposure variable, while the other d columns are the confounding variables of n observations. t_eval: (m,)-array The coordinates of the m evaluation points. mu: scikit-learn model or any python model that can use ".fit()" and ".predict()" The conditional mean outcome (or regression) model of Y given X. condTS_type: str Specifying the model type for estimating the conditional density of the treatment variable T given the covariate vector S. condTS_mod: cikit-learn model or any python model that can use ".fit()" and ".predict()" The regression model for estimating the conditional density of T given S. L: int The number of data folds for cross-fitting. When L<= 1, no cross-fittings are applied and the regression model is fitted on the entire dataset. (Default: L=1.) h: float The bandwidth parameter. kern: str The name of the kernel function. (Default: kern="epanechnikov".) n_iter: int The number of iterations or training epochs of the neural network model. (Default: n_iter=1000.) lr: float The learning rate (Default: lr=0.01.) tau: float The threshold value that lower bounds the estimated conditional density values. (Default: tau=0.001.) b: float The bandwidth parameter for the kernel-smoothed conditional density estimation methods. (Default: b=None.) delta: float The step value for computing the finite differences (or numerical partial differentiation) of the fitted regression model. self_norm: boolean An indicator of whether the self-normalized version is implemented. (Default: self_norm=True.) Return ---------- theta_est: (m,)-array The estimated derivative of the dose-response curve evaluated at points "t_eval". sd_est: (m,)-array The estimated asymptotic stdndard deviation of the DR derivative estimator evaluated at points "t_eval". ''' kern_type = kern kern, sigmaK_sq, K_sq = KernelRetrieval(kern) n = X.shape[0] ## Number of data points if L <= 1: # No cross-fittings: fit the conditional density model and the regression model on the entire data if condTS_type == 'true': condTS_est = condTS_mod elif condTS_type == 'kde': condTS_est = CondDenEstKDE(X[:,0], X[:,1:], reg_mod=condTS_mod, y_eval=X[:,0], x_eval=X[:,1:], kern=kern_type, b=b) else: condTS_est = CondDenEst(X[:,0], X[:,1:], reg_mod=condTS_mod, y_eval=X[:,0], x_eval=X[:,1:], kern='gaussian', b=b) condTS_est[condTS_est < tau] = tau mu_fit = mu.fit(X, Y) mu_hat = np.zeros((n, t_eval.shape[0])) IPW_hat = np.zeros((n, t_eval.shape[0])) t_new = np.linspace(np.min(t_eval)-delta, np.max(t_eval)+delta, t_eval.shape[0]+1) beta_hat = np.zeros((n, t_new.shape[0])) norm_w = np.zeros((t_eval.shape[0],)) for i in range(t_new.shape[0]): X_new = np.column_stack([t_new[i]*np.ones(n), X[:,1:]]) beta_hat[:,i] = mu_fit.predict(X_new) beta_hat = np.diff(beta_hat, axis=1) for i in range(t_eval.shape[0]): # Define the data matrix for evaluating the fitted regression model X_eval = np.column_stack([t_eval[i]*np.ones(n), X[:,1:]]) mu_hat[:,i] = mu_fit.predict(X_eval) IPW_hat[:,i] = ((X[:,0] - t_eval[i])/h) * kern((t_eval[i] - X[:,0])/h) * (Y - mu_hat[:,i] - (X[:,0] - t_eval[i])*beta_hat[:,i]) / (h**2 * sigmaK_sq * condTS_est) # Self-normalizing weights norm_w[i] = np.sum(kern((t_eval[i] - X[:,0])/h) / condTS_est) / (n * h) if self_norm: IPW_hat = IPW_hat / norm_w # Add up the IPW and RA components theta_hat = IPW_hat + beta_hat theta_est = np.mean(theta_hat, axis=0, where=~np.isnan(theta_hat)) # Estimate the variance of theta(t) using the square of the influence function var_est = np.zeros((n, t_eval.shape[0])) for i in range(t_eval.shape[0]): var_est[:,i] = (IPW_hat[:,i] + (beta_hat[:,i] - theta_est[i]))**2 * (h**3) sd_est = np.sqrt(np.mean(var_est, axis=0)/(n*(h**3))) else: # Conduct L-fold cross-fittings: fit the reciprocal of the conditional model # and the regression model on the training fold data and evaluate it on the test fold data kf = KFold(n_splits=L, shuffle=True, random_state=0) mu_hat = np.zeros((n, t_eval.shape[0])) beta_hat = np.zeros((n, t_eval.shape[0])) t_new = np.linspace(np.min(t_eval)-delta, np.max(t_eval)+delta, t_eval.shape[0]+1) beta_can = np.zeros((n, t_new.shape[0])) IPW_hat = np.zeros((n, t_eval.shape[0])) norm_w = np.zeros((t_eval.shape[0],)) cond_est_full = np.zeros((n,)) for tr_ind, te_ind in kf.split(X): X_tr = X[tr_ind,:] Y_tr = Y[tr_ind] X_te = X[te_ind,:] Y_te = Y[te_ind] if condTS_type == 'true': condTS_est = condTS_mod[te_ind] elif condTS_type == 'kde': condTS_est = CondDenEstKDE(X_tr[:,0], X_tr[:,1:], reg_mod=condTS_mod, y_eval=X_te[:,0], x_eval=X_te[:,1:], kern=kern_type, b=b) else: condTS_est = CondDenEst(X_tr[:,0], X_tr[:,1:], reg_mod=condTS_mod, y_eval=X_te[:,0], x_eval=X_te[:,1:], kern='gaussian', b=b) condTS_est[condTS_est < tau] = tau cond_est_full[te_ind] = condTS_est mu_fit = mu.fit(X_tr, Y_tr) for i in range(t_new.shape[0]): X_new_te = np.column_stack([t_new[i]*np.ones(X_te.shape[0]), X_te[:,1:]]) beta_can[te_ind,i] = mu_fit.predict(X_new_te) beta_hat[te_ind,:] = np.diff(beta_can[te_ind,:], axis=1) for i in range(t_eval.shape[0]): # Define the data matrix for evaluating the fitted regression model X_eval_te = np.column_stack([t_eval[i]*np.ones(X_te.shape[0]), X_te[:,1:]]) mu_hat[te_ind,i] = mu_fit.predict(X_eval_te) IPW_hat[te_ind,i] = ((X[te_ind,0] - t_eval[i])/h) * kern((t_eval[i] - X[te_ind,0])/h) * (Y_te - mu_hat[te_ind,i] - (X[te_ind,0] - t_eval[i])*beta_hat[te_ind,i]) / (h**2 * sigmaK_sq * condTS_est) # Self-normalizing weights w = np.sum(kern((t_eval[i] - X[te_ind,0])/h) / condTS_est) / (n * h) norm_w[i] = norm_w[i] + w if self_norm: IPW_hat = IPW_hat / norm_w # Add up the IPW and RA components theta_hat = IPW_hat + beta_hat theta_est = np.mean(theta_hat, axis=0, where=~np.isnan(theta_hat)) # Estimate the variance of theta(t) using the square of the influence function var_est = np.zeros((n, t_eval.shape[0])) for i in range(t_eval.shape[0]): var_est[:,i] = (IPW_hat[:,i] + (beta_hat[:,i] - theta_est[i]))**2 * (h**3) sd_est = np.sqrt(np.mean(var_est, axis=0)/(n*(h**3))) return theta_est, sd_est
[docs] def DRDerivCurve(Y, X, t_eval=None, est="RA", beta_mod=None, n_iter=1000, lr=0.01, condTS_type=None, condTS_mod=None, L=1, h=None, kern="epanechnikov", tau=0.001, h_cond=None, delta=0.01, self_norm=True, print_bw=True): ''' Dose-response curve derivative estimation under the positivity condition. Parameters ---------- Y: (n,)-array The outcome variables of n observations. X: (n,d+1)-array The first column of X is the treatment/exposure variable, while the other d columns are confounding variables of n observations. t_eval: (m,)-array The coordinates of the m evaluation points. (Default: t_eval=None. Then, t_eval=X[:,0], which consists of the observed treatment variables.) est: str The type of the dose-response curve estimator. (Default: est="RA". Other choices include "IPW" and "DR".) beta_mod: PyTorch neural network class or scikit-learn model or any python model that can use ".fit()" and ".predict()" The conditional mean outcome (or regression) model of Y given X. n_iter: int The number of iterations or training epochs of the neural network model. (Default: n_iter=1000.) lr: float The learning rate (Default: lr=0.01.) condTS_type: str Specifying the model type for estimating the conditional density of the treatment variable T given the covariate vector S. condTS_mod: cikit-learn model or any python model that can use ".fit()" and ".predict()" The regression model for estimating the conditional density of T given S. L: int The number of data folds for cross-fitting. When L<= 1, no cross-fittings are applied and the regression model is fitted on the entire dataset. (Default: L=1.) h: float The bandwidth parameter for the IPW/DR estimator. (Default: h=None. Then the Silverman's rule of thumb is applied; see Chen et al.(2016) for details.) tau: float The threshold value that lower bounds the estimated conditional density values. (Default: tau=0.001.) h_cond: float The bandwidth parameter for the kernel-smoothed conditional density estimation methods. (Default: b=None.) self_norm: boolean An indicator of whether the self-normalized version is implemented. (Default: self_norm=True.) print_bw: boolean The indicator of whether the current bandwidth parameters should be printed to the console. (Default: print_bw=True.) Return ---------- theta_est: (m,)-array The estimated derivative of the dose-response curve evaluated at points "t_eval". sd_est: (m,)-array (if est="DR") The estimated asymptotic stdndard deviation of the DR derivative estimator evaluated at points "t_eval". ''' if t_eval is None: t_eval = X[:,0].copy() n = X.shape[0] ## Number of data points if h is None: # Apply Silverman's rule of thumb to select the bandwidth parameter h = (4/3)**(1/5)*(n**(-1/5))*np.std(X[:,0]) if print_bw: print("The current bandwidth for the "+str(est)+" estimator is "+ str(h) + ".\n") if est == "RA": if isinstance(beta_mod, BaseEstimator): theta_est = RADRDerivSKLearn(Y, X, t_eval, beta_mod, L, delta) else: theta_est = RADRDeriv(Y, X, t_eval, beta_mod, L, n_iter, lr) elif est == "IPW": theta_est = IPWDRDeriv(Y, X, t_eval, condTS_type, condTS_mod, L, h, kern, tau, h_cond, self_norm) elif isinstance(beta_mod, BaseEstimator): theta_est = DRDRDerivSKLearn(Y, X, t_eval, beta_mod, condTS_type, condTS_mod, L, h, kern, tau, h_cond, delta, self_norm) else: theta_est = DRDRDeriv(Y, X, t_eval, beta_mod, condTS_type, condTS_mod, L, h, kern, n_iter, lr, tau, h_cond, self_norm) return theta_est
#=======================================================================================# # Implementations of the proposed estimators without assuming the positivity # condition (work for additive confounding models)
[docs] def RADRDerivBC(Y, X, t_eval, mu, L=1, n_iter=1000, lr=0.01, h_bar=None, kernT_bar="gaussian", print_bw=False): ''' Estimating the derivative of a dose-response curve through the regression adjustment (or G-computation) form by a PyTorch neural network model without assuming the positivity condition. Parameters ---------- Y: (n,)-array The outcome variables of n observations. X: (n,d+1)-array The first column of X is the treatment/exposure variable, while the other d columns are the confounding variables of n observations. t_eval: (m,)-array The coordinates of the m evaluation points. mu: a neural network class defined by PyTorch The conditional mean outcome (or regression) model of Y given X. L: int The number of data folds for cross-fitting. When L<= 1, no cross-fittings are applied and the regression model is fitted on the entire dataset. (Default: L=1.) n_iter: int The number of iterations or training epochs of the neural network model. (Default: n_iter=1000.) lr: float The learning rate (Default: lr=0.01.) h_bar: float The bandwidth parameters for the Nadaraya-Watson conditional CDF estimator. (Default: h_bar=None. Then, the Silverman's rule of thumb is applied. See Chen et al.(2016) for details.) kernT_bar: str The name of the kernel function for Nadaraya-Watson conditional CDF estimator. (Default: "gaussian".) print_bw: boolean The indicator of whether the current bandwidth parameters should be printed to the console. (Default: print_bw=False.) Return ---------- theta_C: (m,)-array The estimated derivative of the dose-response curve evaluated at points "t_eval". ''' n = X.shape[0] ## Number of data points if h_bar is None: # Apply the Silverman's rule of thumb bandwidth in Chen et al. (2016) h_bar = (4/3)**(1/5)*(n**(-1/5))*np.std(X[:,0]) if print_bw: print("The current bandwidth for the conditional CDF estimator is "+ str(h_bar) + ".\n") # Compute the weight matrix for NW conditional CDF estimator kernT_bar, sigmaK_sq, K_sq = KernelRetrieval(kernT_bar) weight_mat = kernT_bar((t_eval - X[:,0].reshape(-1,1)) / h_bar) weight_mat = weight_mat / np.sum(weight_mat, axis=0) weight_mat[np.isnan(weight_mat)] = 0 if L <= 1: # No cross-fittings: fit the regression model on the entire data X_tensor = torch.from_numpy(X) Y_tensor = torch.from_numpy(Y.reshape(-1,1)) NN_fit = train(mu, X_tensor, Y_tensor, lr=lr, n_epochs=n_iter) beta_est = np.zeros((n, t_eval.shape[0])) for i in range(t_eval.shape[0]): # Define the data matrix for evaluating the fitted regression model X_eval = np.column_stack([t_eval[i]*np.ones(n), X[:,1:]]) X_eval_tensor = torch.from_numpy(X_eval) for j in range(X_eval.shape[0]): # Compute the gradient of the fitted regression model with respect to the first coordinate x = X_eval_tensor[j,:] x = x.clone().detach().requires_grad_(True) y = NN_fit(x) y_scalar = y[0] y_scalar.backward() x_grad = x.grad beta_est[j,i] = x_grad[0].item() theta_C = np.sum(beta_est * weight_mat, axis=0) else: # Conduct L-fold cross-fittings: fit the regression model on the training fold data # and evaluate it on the test fold data kf = KFold(n_splits=L, shuffle=True, random_state=0) beta_est = np.zeros((n, t_eval.shape[0])) for tr_ind, te_ind in kf.split(X): X_tr = X[tr_ind,:] Y_tr = Y[tr_ind] X_te = X[te_ind,:] X_tr_tensor = torch.from_numpy(X_tr) Y_tr_tensor = torch.from_numpy(Y_tr.reshape(-1,1)) NN_fit = train(mu, X_tr_tensor, Y_tr_tensor, lr=lr, n_epochs=n_iter) for i in range(t_eval.shape[0]): # Define the data matrix for evaluating the fitted regression model X_eval_te = np.column_stack([t_eval[i]*np.ones(X_te.shape[0]), X_te[:,1:]]) X_eval_te_tensor = torch.from_numpy(X_eval_te) beta_hat = np.zeros((X_eval_te.shape[0],)) for j in range(X_eval_te.shape[0]): # Compute the gradient of the fitted regression model with respect to the first coordinate x = X_eval_te_tensor[j,:] x = x.clone().detach().requires_grad_(True) y = NN_fit(x) y_scalar = y[0] y_scalar.backward() x_grad = x.grad beta_hat[j] = x_grad[0].item() beta_est[te_ind,i] = beta_hat theta_C = np.sum(beta_est * weight_mat, axis=0) return theta_C
[docs] def IPWDRDerivBC(Y, X, t_eval, L=1, h=None, kern="epanechnikov", b=None, thres_val=0.75, self_norm=True): ''' Estimating the derivative of a dose-response curve through the inverse probability weighting (IPW) form without assuming the positivity condition. Parameters ---------- Y: (n,)-array The outcome variables of n observations. X: (n,d+1)-array The first column of X is the treatment/exposure variable, while the other d columns are the confounding variables of n observations. t_eval: (m,)-array The coordinates of the m evaluation points. L: int The number of data folds for cross-fitting. When L<= 1, no cross-fittings are applied and the regression model is fitted on the entire dataset. (Default: L=1.) h: float The bandwidth parameter. (Default: h=None. Then, the Silverman's rule of thumb is applied; see Chen et al.(2016) for details.) kern: str The name of the kernel function. (Default: kern="epanechnikov".) b: float The bandwidth parameter for the kernel-smoothed conditional density estimation methods. (Default: b=None.) thres_val: float The threshold factor that is multiplied to the maximum conditional density values of S given T evaluated at the sample points. (Default: thres_val=0.75.) self_norm: boolean An indicator of whether the self-normalized version is implemented. (Default: self_norm=True.) Return ---------- theta_est: (m,)-array The estimated derivative of the dose-response curve evaluated at points "t_eval". ''' kern_type = kern kern, sigmaK_sq, K_sq = KernelRetrieval(kern) n = X.shape[0] ## Number of data points if h is None: # Apply Silverman's rule of thumb to select the bandwidth parameter h = (4/3)**(1/5)*(n**(-1/5))*np.std(X[:,0]) print("The current bandwidth is "+ str(h) + ".\n") if L <= 1: # No cross-fittings: fit the density models on the entire data kde_joint = KDE(X, data=X, kern='gaussian', h=b) beta_hat = np.zeros((n, t_eval.shape[0])) norm_w = np.zeros((t_eval.shape[0],)) condST_full = np.zeros((n, t_eval.shape[0])) for i in range(t_eval.shape[0]): # Define the data matrix for evaluating the interior densities X_eval = np.column_stack([t_eval[i]*np.ones(n), X[:,1:]]) new_joint = KDE(X_eval, data=X, kern=kern_type, h=b) t_den = KDE(t_eval[i], data=X[:,0], kern=kern_type, h=b)[0] condST_den = new_joint / t_den condST_full[:,i] = condST_den # condST_den = condST_den * (condST_den >= np.quantile(condST_den, thres_val)) / (1 - thres_val) trunc_perc = np.mean(condST_den >= thres_val * np.max(condST_den)) condST_den = condST_den * (condST_den >= thres_val * np.max(condST_den)) / (1 - trunc_perc) den_fac = condST_den / kde_joint # Self-normalizing weights norm_w[i] = np.sum(kern((t_eval[i] - X[:,0])/h) * den_fac) / h beta_hat[:,i] = ((X[:,0] - t_eval[i])/h) * kern((t_eval[i] - X[:,0])/h) * Y * den_fac/ (h**2 * sigmaK_sq) if self_norm: theta_est = np.sum(beta_hat, axis=0) / norm_w else: theta_est = np.mean(beta_hat, axis=0) else: # Conduct L-fold cross-fittings: fit the conditional density model on the training fold # data and evaluate it on the test fold data kf = KFold(n_splits=L, shuffle=True, random_state=0) beta_hat = np.zeros((n, t_eval.shape[0])) norm_w = np.zeros((t_eval.shape[0],)) condST_full = np.zeros((n, t_eval.shape[0])) for tr_ind, te_ind in kf.split(X): X_tr = X[tr_ind,:] X_te = X[te_ind,:] Y_te = Y[te_ind] kde_joint = KDE(X_te, data=X_tr, kern='gaussian', h=b) for i in range(t_eval.shape[0]): # Define the data matrix for evaluating the interior densities X_eval_te = np.column_stack([t_eval[i]*np.ones(X_te.shape[0]), X_te[:,1:]]) new_joint = KDE(X_eval_te, data=X_tr, kern='gaussian', h=b) t_den = KDE(t_eval[i], data=X_tr[:,0], kern='gaussian', h=b)[0] condST_den = new_joint / t_den condST_full[te_ind,i] = condST_den # condST_den = condST_den * (condST_den >= np.quantile(condST_den, thres_val)) / (1 - thres_val) trunc_perc = np.mean(condST_den >= thres_val * np.max(condST_den)) condST_den = condST_den * (condST_den >= thres_val * np.max(condST_den)) / (1 - trunc_perc) den_fac = condST_den / kde_joint # Self-normalizing weights w = np.sum(kern((t_eval[i] - X_te[:,0])/h) * den_fac) / h if ~np.isnan(w) and w != np.inf: norm_w[i] = norm_w[i] + w beta_hat[te_ind,i] = ((X_te[:,0] - t_eval[i])/h) * kern((t_eval[i] - X_te[:,0])/h) * Y_te * den_fac / (h**2 * sigmaK_sq) if self_norm: norm_w[norm_w == 0] = 1 theta_est = np.sum(beta_hat, axis=0) / norm_w else: theta_est = np.mean(beta_hat, axis=0) return theta_est, condST_full
[docs] def DRDRDerivBC(Y, X, t_eval, mu, L=1, h=None, kern="epanechnikov", n_iter=1000, lr=0.01, b=None, thres_val=0.75, self_norm=True): ''' Estimating the derivative of a dose-response curve through the doubly robust (DR) form by a PyTorch neural network model without assuming the positivity condition. Parameters ---------- Y: (n,)-array The outcome variables of n observations. X: (n,d+1)-array The first column of X is the treatment/exposure variable, while the other d columns are the confounding variables of n observations. t_eval: (m,)-array The coordinates of the m evaluation points. mu: a neural network class defined by PyTorch The conditional mean outcome (or regression) model of Y given X. L: int The number of data folds for cross-fitting. When L<= 1, no cross-fittings are applied and the regression model is fitted on the entire dataset. (Default: L=1.) h: float The bandwidth parameter. (Default: h=None. Then, the Silverman's rule of thumb is applied; see Chen et al.(2016) for details.) kern: str The name of the kernel function. (Default: kern="epanechnikov".) n_iter: int The number of iterations or training epochs of the neural network model. (Default: n_iter=1000.) lr: float The learning rate (Default: lr=0.01.) b: float The bandwidth parameter for the kernel-smoothed conditional density estimation methods. (Default: b=None.) thres_val: float The threshold factor that is multiplied to the maximum conditional density values of S given T evaluated at the sample points. (Default: thres_val=0.75.) self_norm: boolean An indicator of whether the self-normalized version is implemented. (Default: self_norm=True.) Return ---------- theta_est: (m,)-array The estimated derivative of the dose-response curve evaluated at points "t_eval". sd_est: (m,)-array The estimated asymptotic stdndard deviation of the DR derivative estimator evaluated at points "t_eval". ''' kern_type = kern kern, sigmaK_sq, K_sq = KernelRetrieval(kern) n = X.shape[0] ## Number of data points if h is None: # Apply Silverman's rule of thumb to select the bandwidth parameter h = (4/3)**(1/5)*(n**(-1/5))*np.std(X[:,0]) print("The current bandwidth is "+ str(h) + ".\n") if L <= 1: # No cross-fittings: fit the density and regression model on the entire data kde_joint = KDE(X, data=X, kern='gaussian', h=b) X_tensor = torch.from_numpy(X) Y_tensor = torch.from_numpy(Y.reshape(-1,1)) NN_fit = train(mu, X_tensor, Y_tensor, lr=lr, n_epochs=n_iter) mu_hat = np.zeros((n, t_eval.shape[0])) IPW_hat = np.zeros((n, t_eval.shape[0])) beta_hat = np.zeros((n, t_eval.shape[0])) condST_int = np.zeros((n, t_eval.shape[0])) norm_w = np.zeros((t_eval.shape[0],)) for i in range(t_eval.shape[0]): # Define the data matrix for evaluating the interior densities and fitted regression model X_eval = np.column_stack([t_eval[i]*np.ones(n), X[:,1:]]) new_joint = KDE(X_eval, data=X, kern=kern_type, h=b) t_den = KDE(t_eval[i], data=X[:,0], kern=kern_type, h=b)[0] condST_den = new_joint / t_den # condST_den = condST_den * (condST_den >= np.quantile(condST_den, thres_val)) / (1 - thres_val) trunc_perc = np.mean(condST_den >= thres_val * np.max(condST_den)) condST_den = condST_den * (condST_den >= thres_val * np.max(condST_den)) / (1 - trunc_perc) den_fac = condST_den / kde_joint X_eval_tensor = torch.from_numpy(X_eval) for j in range(X_eval.shape[0]): # Compute the gradient of the fitted regression model with respect to the first coordinate x = X_eval_tensor[j,:] x = x.clone().detach().requires_grad_(True) y = NN_fit(x) y_scalar = y[0] y_scalar.backward() x_grad = x.grad beta_hat[j,i] = x_grad[0].item() NN_fit.eval() mu_pred = NN_fit(X_eval_tensor) mu_hat[:,i] = mu_pred.detach().numpy()[:,0] IPW_hat[:,i] = ((X[:,0] - t_eval[i])/h) * kern((t_eval[i] - X[:,0])/h) * (Y - mu_hat[:,i] - (X[:,0] - t_eval[i])*beta_hat[:,i]) * den_fac/ (h**2 * sigmaK_sq) # Self-normalizing weights norm_w[i] = np.sum(kern((t_eval[i] - X[:,0])/h) * den_fac) / (n * h) condST_int[:,i] = condST_den if self_norm: IPW_hat = IPW_hat / norm_w # Add up the IPW and RA components theta_hat = IPW_hat + beta_hat * condST_int theta_est = np.mean(theta_hat, axis=0, where=~np.isnan(theta_hat)) # Estimate the variance of theta(t) using the square of the influence function var_est = np.zeros((n, t_eval.shape[0])) for i in range(t_eval.shape[0]): var_est[:,i] = (IPW_hat[:,i] + (np.mean(beta_hat[:,i] * condST_int[:,i]) - theta_est[i]))**2 * (h**3) sd_est = np.sqrt(np.mean(var_est, axis=0)/(n*(h**3))) else: # Conduct L-fold cross-fittings: fit the reciprocal of the conditional model # and the regression model on the training fold data and evaluate it on the test fold data kf = KFold(n_splits=L, shuffle=True, random_state=0) mu_hat = np.zeros((n, t_eval.shape[0])) beta_hat = np.zeros((n, t_eval.shape[0])) IPW_hat = np.zeros((n, t_eval.shape[0])) condST_int = np.zeros((n, t_eval.shape[0])) norm_w = np.zeros((t_eval.shape[0],)) for tr_ind, te_ind in kf.split(X): X_tr = X[tr_ind,:] Y_tr = Y[tr_ind] X_te = X[te_ind,:] Y_te = Y[te_ind] kde_joint = KDE(X_te, data=X_tr, kern='gaussian', h=b) X_tr_tensor = torch.from_numpy(X_tr) Y_tr_tensor = torch.from_numpy(Y_tr.reshape(-1,1)) NN_fit = train(mu, X_tr_tensor, Y_tr_tensor, lr=lr, n_epochs=n_iter) for i in range(t_eval.shape[0]): # Define the data matrix for evaluating the interior densities and fitted regression model X_eval_te = np.column_stack([t_eval[i]*np.ones(X_te.shape[0]), X_te[:,1:]]) new_joint = KDE(X_eval_te, data=X_tr, kern='gaussian', h=b) t_den = KDE(t_eval[i], data=X_tr[:,0], kern='gaussian', h=b)[0] condST_den = new_joint / t_den # condST_den = condST_den * (condST_den >= np.quantile(condST_den, thres_val)) / (1 - thres_val) trunc_perc = np.mean(condST_den >= thres_val * np.max(condST_den)) condST_den = condST_den * (condST_den >= thres_val * np.max(condST_den)) / (1 - trunc_perc) den_fac = condST_den / kde_joint X_eval_te_tensor = torch.from_numpy(X_eval_te) for j in range(X_eval_te.shape[0]): # Compute the gradient of the fitted regression model with respect to the first coordinate x = X_eval_te_tensor[j,:] x = x.clone().detach().requires_grad_(True) y = NN_fit(x) y_scalar = y[0] y_scalar.backward() x_grad = x.grad beta_hat[j,i] = x_grad[0].item() NN_fit.eval() mu_pred = NN_fit(X_eval_te_tensor) mu_hat[te_ind,i] = mu_pred.detach().numpy()[:,0] IPW_hat[te_ind,i] = ((X[te_ind,0] - t_eval[i])/h) * kern((t_eval[i] - X[te_ind,0])/h) * (Y_te - mu_hat[te_ind,i] - (X[te_ind,0] - t_eval[i])*beta_hat[te_ind,i]) * den_fac / (h**2 * sigmaK_sq) # Self-normalizing weights w = np.sum(kern((t_eval[i] - X[te_ind,0])/h) * den_fac) / (n * h) norm_w[i] = norm_w[i] + w condST_int[te_ind,i] = condST_den if self_norm: IPW_hat = IPW_hat / norm_w # Add up the IPW and RA components theta_hat = IPW_hat + beta_hat * condST_int theta_est = np.mean(theta_hat, axis=0, where=~np.isnan(theta_hat)) # Estimate the variance of theta(t) using the square of the influence function var_est = np.zeros((n, t_eval.shape[0])) for i in range(t_eval.shape[0]): var_est[:,i] = (IPW_hat[:,i] + (np.mean(beta_hat[:,i] * condST_int[:,i]) - theta_est[i]))**2 * (h**3) sd_est = np.sqrt(np.mean(var_est, axis=0)/(n*(h**3))) return theta_est, sd_est