Closed aminadibi closed 7 years ago
In order to make the change, 8 regression formulas in input.R
must be updated:
This is the corresponding piece of code:
##COPD
input$COPD$logit_p_COPD_betas_by_sex<-cbind(
male=c(intercept=-6,age=log(1.96)/10,age2=0,pack_years=log(1.20)/10,current_smoking=0,year=0,asthma=0),
female=c(intercept=-6.2,age=log(1.94)/10,age2=0,pack_years=log(1.20)/10,current_smoking=0,year=0,asthma=0))
input_help$COPD$logit_p_COPD_betas_by_sex<-"Logit of the probability of having COPD (FEV1/FVC<0.7) at time of creation (separately by sex)"
input$COPD$ln_h_COPD_betas_by_sex<-cbind(
male =c(Intercept =-6.65,age = 0.046 ,age2 = 0,pack_years = 0.016, smoking_status = 0,year = 0,asthma = 0)
,female =c(Intercept =-6.92 ,age = 0.05, age2 =0,pack_years = 0.016, smoking_status = 0 ,year = 0,asthma = 0))
input_help$COPD$ln_h_COPD_betas_by_sex<-"Log-hazard of developing COPD (FEV1/FVC<0.7) for those who did not have COPD at creation time (separately by sex)"
##Lung function
input$lung_function$fev1_0_prev_betas_by_sex<-cbind(
male=c(intercept=-1.969651,age=-0.027212,height=3.786599,pack_years=-0.005458,current_smoker=0,sgrq=0),
female=c(intercept=-0.8339577 ,age=-0.0252077 ,height=2.7785297,pack_years=-0.0063040,current_smoker=0,sgrq=0))
input_help$lung_function$fev1_0_prev_betas_by_sex<-"Regression (OLS) coefficients for mean of FEV1 at time of creation for those with COPD (separately by sex)"
input$lung_function$fev1_0_prev_sd_by_sex<-c(male=0.6148,female=0.4242)
input_help$lung_function$fev1_0_prev_sd_by_sex<-"SD of FEV1 at time of creation for those with COPD (separately by sex)"
input$lung_function$fev1_0_inc_betas_by_sex<-cbind(
male=c(intercept=-5.9429280+3.2719928*0.7,age=-0.0253224,height=4.7959326,pack_years=-0.0033031,current_smoker=0,sgrq=0),
female=c(intercept=-3.1143794+2.1388758*0.7,age=-0.0234219,height=3.2741914,pack_years=-0.0032205,current_smoker=0,sgrq=0))
input_help$lung_function$fev1_0_inc_betas_by_sex<-"Regression (OLS) coefficients for mean of FEV1 at time of development of COPD(separately by sex)"
input$lung_function$fev1_0_inc_sd_by_sex<-c(male=0.54,female=0.36)
input_help$lung_function$fev1_0_inc_sd_by_sex<-"SD of FEV1 at time of development of COPD (separately by sex)"
The definition of COPD is already embedded in how the coefficients in these equations are calculated, the 0.7 threshold is never explicitly mentioned in the code.
These equations come at play under the EVENT_COPD
heading in model.WIP.cpp
:
void event_COPD_process(agent *ag)
{
(*ag).fev1=input.lung_function.fev1_0_inc_betas_by_sex[0][(*ag).sex]
+input.lung_function.fev1_0_inc_betas_by_sex[1][(*ag).sex]*((*ag).age_at_creation+(*ag).local_time)
+input.lung_function.fev1_0_inc_betas_by_sex[2][(*ag).sex]*(*ag).height
+input.lung_function.fev1_0_inc_betas_by_sex[3][(*ag).sex]*(*ag).pack_years
+rand_norm()*input.lung_function.fev1_0_inc_sd_by_sex[(*ag).sex];
double pred_fev1=CALC_PRED_FEV1(ag);
(*ag)._pred_fev1=pred_fev1;
if ((*ag).fev1/pred_fev1<0.3) (*ag).gold=4;
else
if ((*ag).fev1/pred_fev1<0.5) (*ag).gold=3;
else
if ((*ag).fev1/pred_fev1<0.8) (*ag).gold=2;
else (*ag).gold=1;
double temp[2];
rbvnorm(input.lung_function.dfev1_re_rho,temp);
(*ag).fev1_slope=input.lung_function.dfev1_betas[0]
+temp[0]*input.lung_function.dfev1_re_sds[0]
+input.lung_function.dfev1_betas[1]*(*ag).sex
+input.lung_function.dfev1_betas[2]*((*ag).age_at_creation+(*ag).local_time)
+input.lung_function.dfev1_betas[3]*(*ag).fev1
+input.lung_function.dfev1_betas[4]*(*ag).smoking_status;
(*ag).fev1_slope_t=input.lung_function.dfev1_betas[5]+temp[1]*input.lung_function.dfev1_re_sds[1];
(*ag).lung_function_LPT=(*ag).local_time;
#if OUTPUT_EX>1
output_ex.cumul_non_COPD_time+=(*ag).local_time;
#endif
#if (OUTPUT_EX & OUTPUT_EX_COPD) > 0
output_ex.n_inc_COPD_by_ctime_age[(int)floor((*ag).time_at_creation+(*ag).local_time)][(int)floor((*ag).age_at_creation+(*ag).local_time)]+=1;
#endif
}
@shahzadg I need 8 LLN-fixed equations from you!
Thanks!
These equations may change in the future:
COPD prevalence from CanCOLD :
ww= (sex== FEMALE ) ? -4.377285 + (0.031689)double(integer_age) +(0.025183)PY + (0.636127)smoking_s :
-5.337997 + 0.044918 double (integer_age) + 0.018734PY + 1.093586 smoking_s ;Initial_COPD_Probability = 1/(1+exp(-ww))
COPD incidence from Amin's equation :
log_Hazard_copd =( sex== MALE ) ?
-7.53034878 + double (integer_age) 0.05169510 + 0.01436641PY: ( sex== FEMALE ) ? -7.75310133 + double (integer_age) 0.05380265 + 0.01466302PY:
FEV1 Prevalence(baseline) :
FEV1= (sex== FEMALE ) ? 1.329805 + -0.031296 double (integer_age)+ -0.006925 PY+ 1.012579(gHeight_m)(gHeight_m) : 1.329805 + -0.031296 double (integer_age)+ -0.006925 PY+ 1.012579(gHeight_m)(gHeight_m) + 0.212974 + 0.003382;
FEV1 Incidence (baseline): FEV1 regression equation from CANCOLD based on LLN from NHANCES, the last term in the equation in the condition that FEV1/FVC is less than LLN of this ratio
FEV1= (sex== FEMALE ) ? 1.5737-2.75e-02double(integer_age)+(-4.65e-03)PY+ (0.97)(gHeight_m)(gHeight_m)+(-0.425)(has_copd_incidence==TRUE) ? 1.4895-3.22e-02double(integer_age)+(-4.28e-03)PY+ (1.27)(gHeight_m)(gHeight_m)+(-0.6831)(has_copd_incidence==TRUE);
@shahzadg Sweet! Using these equations, I recalibrated COPD incidence with the iterative approach. See below for detailed results of each run:
nIterations: 1000
nPatients: 10,000
time_horizon = 20
male female
Intercept -7.903433669 -6.97644682
age 0.049834824 0.03586682
pack_years 0.008978898 0.01823664
smoking_status 1.481478755 0.93397776
Running with 100,000 patients again, to reduce the error and will let you know the results.
First Run:
nIterations: 1000
nPatients: 10,000
time_horizon = 20
male female
Intercept -7.986607467 -6.94609704
age 0.050658975 0.03532266
pack_years 0.009039704 0.01793856
smoking_status 1.467749546 0.94517283
Second Run:
nIterations: 1000
nPatients: 10,000
time_horizon = 20
male female
Intercept -7.995015219 -6.95702191
age 0.050740331 0.03546536
pack_years 0.009078453 0.01793198
smoking_status 1.470198842 0.93574235
Third Run:
nIterations: 1000
nPatients: 10,000
time_horizon = 20
male female
Intercept -7.99955532 -6.94219957
age 0.05078785 0.03518884
pack_years 0.00909231 0.01805253
smoking_status 1.47333598 0.93346005
Average of the three runs:
nIterations: 1000
nPatients: 10,000
time_horizon = 20
Male Female
Intercept -7.993726002 -6.948439507
Age 0.050729052 0.03532562
Pack_Years 0.009070156 0.017974357
Smoking Status 1.470428123 0.938125077
Which results in the following output in validate_COPD()
:
$p_copd_at_creation
[1] 0.1237301
$inc_copd
[1] 0.01525168
$inc_copd_by_sex
[1] 0.01525168
$p_copd_at_creation_by_sex
[1] 0.1146135 0.1328794
$p_copd_at_creation_by_age
40-50 50-60 60-70 70-80 80-111
0.06982552 0.12202486 0.17605307 0.24379671 0.34556754
$p_copd_at_creation_by_pack_years
0-25 25-50 50-75 75-Inf
0.06769634 0.14480957 0.38196978 0.64393939
$calib_prev_copd_reg_coeffs_male
(Intercept) age pack_years smoking_status year
-5.391246656 0.045394828 0.018360888 1.082309256 0.003023303
$calib_prev_copd_reg_coeffs_female
(Intercept) age pack_years smoking_status year
-4.414605846 0.031452242 0.025585949 0.643596274 0.003693911
$calib_prev_gold2p_reg_coeffs_male
(Intercept) age pack_years smoking_status year
-5.415187278 0.045521666 0.018518708 1.085318128 0.003365276
$calib_prev_gold2p_reg_coeffs_female
(Intercept) age pack_years smoking_status year
-4.459228427 0.031797929 0.025839680 0.643839335 0.004092516
$calib_prev_gold3p_reg_coeffs_male
(Intercept) age pack_years smoking_status year
-6.061249075 0.051633564 0.019579413 1.074169069 0.007117395
$calib_prev_gold3p_reg_coeffs_female
(Intercept) age pack_years smoking_status year
-5.600937396 0.043842322 0.028555077 0.620753190 0.005230727
New set of equations for FEV1 prevalence
**Female:**
Coefficients:
Estimate
(Intercept) 1.4564221
age -0.0300643
pkyr -0.0071115
Height2_SI 0.9368746
**Male:**
Coefficients:
Estimate
(Intercept) 1.365940
age -0.032715
pkyr -0.003434
Height2_SI 1.098201
Amin: @shahzadg
This is how COPD incidence will look like based on the above values;
male female
Intercept -7.936003284 -6.70509390
age 0.050620788 0.03106581
pack_years 0.007761126 0.02033920
smoking_status 1.535318307 0.82152890
And here is the output of validate_COPD()
based on that:
$p_copd_at_creation
[1] 0.1232717
$inc_copd
[1] 0.01537453
$inc_copd_by_sex
[1] 0.01537453
$p_copd_at_creation_by_sex
[1] 0.1133195 0.1333243
$p_copd_at_creation_by_age
40-50 50-60 60-70 70-80 80-111
0.0703872 0.1207083 0.1733778 0.2454930 0.3452893
$p_copd_at_creation_by_pack_years
0-25 25-50 50-75 75-Inf
0.06794505 0.14369668 0.38039421 0.62500000
$calib_prev_copd_reg_coeffs_male
(Intercept) age pack_years smoking_status year
-5.367879784 0.045252104 0.017839056 1.133512105 0.003368152
$calib_prev_copd_reg_coeffs_female
(Intercept) age pack_years smoking_status year
-4.239224473 0.028748918 0.026674057 0.542716849 0.004807521
$calib_prev_gold2p_reg_coeffs_male
(Intercept) age pack_years smoking_status year
-5.381277144 0.045326321 0.017860215 1.129844308 0.003664168
$calib_prev_gold2p_reg_coeffs_female
(Intercept) age pack_years smoking_status year
-4.298497123 0.029243861 0.026985654 0.543600768 0.005342133
$calib_prev_gold3p_reg_coeffs_male
(Intercept) age pack_years smoking_status year
-6.060734417 0.052322228 0.018283254 1.105102799 0.006577326
$calib_prev_gold3p_reg_coeffs_female
(Intercept) age pack_years smoking_status year
-5.523640298 0.041397695 0.030253643 0.532689101 0.009042111
@shahzadg is Height2_SI the same as (gHeight_m)? Or is the first one height squared and the second one just simple height?
Also, I have recalculated COPD incidence regression with 100,000 patients. See above for results.
epicR v0.2.0 is now released with these equations:
Average of the three runs:
nIterations: 1000
nPatients: 10,000
time_horizon = 20
Male Female
Intercept -7.993726002 -6.948439507
Age 0.050729052 0.03532562
Pack_Years 0.009070156 0.017974357
Smoking Status 1.470428123 0.938125077
validate_COPD ()
now shows year
coefficients <0.01 for males and females overall and within gold stages:
> validate_COPD()
Initializing the session
working...
Loading required package: tcltk
Terminating the session
$p_copd_at_creation
[1] 0.1233817
$inc_copd
[1] 0.0151412
$inc_copd_by_sex
[1] 0.0151412
$p_copd_at_creation_by_sex
[1] 0.1158011 0.1309154
$p_copd_at_creation_by_age
40-50 50-60 60-70 70-80 80-111
0.07000574 0.12251603 0.17493595 0.24547999 0.33979902
$p_copd_at_creation_by_pack_years
0-25 25-50 50-75 75-Inf
0.06699846 0.14636750 0.38039481 0.64957265
$calib_prev_copd_reg_coeffs_male
(Intercept) age pack_years smoking_status year
-5.381403024 0.045154411 0.018399010 1.140008420 0.004052329
$calib_prev_copd_reg_coeffs_female
(Intercept) age pack_years smoking_status year
-4.400657495 0.031119787 0.025215774 0.621563609 0.004300421
$calib_prev_gold2p_reg_coeffs_male
(Intercept) age pack_years smoking_status year
-5.398984871 0.045246649 0.018537617 1.139637529 0.004232997
$calib_prev_gold2p_reg_coeffs_female
(Intercept) age pack_years smoking_status year
-4.436814908 0.031427110 0.025395388 0.621184960 0.004526553
$calib_prev_gold3p_reg_coeffs_male
(Intercept) age pack_years smoking_status year
-6.059049685 0.051280414 0.019945602 1.113019731 0.008884526
$calib_prev_gold3p_reg_coeffs_female
(Intercept) age pack_years smoking_status year
-5.52739057 0.04239654 0.02828973 0.60033735 0.00725957
epicR v0.3.0 is released with corrections of terms in FEV1 prevalence and FEV1 incidence.
The incidence of COPD had to be re-calibrated to account for changes. The new COPD incidence is as below:
male female
Intercept -8.341626527 -7.40741695
age 0.053539987 0.03751128
pack_years 0.004274558 0.01995802
smoking_status 1.523356867 0.92879076
@shahzadg
The definition of COPD should be upgraded to LLN, instead of the fixed 0.7 ratio threshold for FEV1/FVC, as per Hankinson JL, Odencrantz JR, Fedan KB. Spirometric reference values from a sample of the general US population. American journal of respiratory and critical care medicine. 1999 Jan 1;159(1):179-87.
The change has already taken place in POHEM model.