# nfix hspnon 9-10 race 20-21 sex 31-32 numkids 41-43 ageint 52-54
# age1st 63-65 age2nd 74-76 using "D:\Martin_UI\STATA users group\cps695dat.txt"
# variable description: hspnon = 1 if hispanic, 2 if not
tab hspnon, missing# race = 1 if white, 2 if black, 3 if native descent, 4 if asian, 5 if unknown
tab race, missing# sex = 2 (females only)
tab sex, missing# number of kids should be 1 or more (mothers only)
tab numkids, missing# age at interview should be 306 to 366 (1965-1970 birth cohort)
tab ageint, missing# age at first birth should be a value
tab age1st, missing# age at second birth may be missing
tab age2nd, missing
#there is no case id, so create one
= fill(1 2)
egen id
# create a dummy variable for whether a second birth occurred
= 9
generate birth2 = 1 if age2nd>100 & age2nd<600
replace birth2 = 0 if age2nd==.
replace birth2
tab birth2, missing
# create dummy variables for the race and ethnic categories
# hispanic:
= 9
generate hispanic = 1 if hspnon==1
replace hispanic = 0 if hspnon==2
replace hispanic
tab hispanic= 9
generate nhblack = 1 if race==2 & hspnon==2
replace nhblack = 0 if hspnon==1 | race==1 | race==3 | race==4 | race==5
replace nhblack
tab nhblack= 9
generate nhother = 1 if race>=3 & hspnon==2
replace nhother = 0 if hspnon==1 | race<3
replace nhother
tab nhother
sort race: tab nhblack nhother
by race: tab hispanic
by race
sort hspnon: tab hispanic
by hspnon
# create a variable for interval from first birth to second or interview
= 999
generate dur = age2nd - age1st if birth2==1
replace dur = ageint - age1st if birth2==0
replace dur
tab dur, missing
# create categories for age at first birth le19,ge20
= -9
generate age1teen = 1 if age1st < 240 & age1st >= 0
replace age1teen = 0 if age1st >= 240 & age1st <= 888
replace age1teen
# first, tell STATA which is the duration, which is the event, and which is the id
fail(birth2) id(id)
stset dur,
# life table of second birth intervals
if(dur>0), interval(0,9,21,33,69,129)
ltable dur birth2
# separate estimates by age at first birth
if(dur>0), by(age1teen) interval(0,9,21,33,69,129)
ltable dur birth2
# add hazard intervals
if(dur>0), by(age1teen) hazard interval(0,9,21,33,69,129)
ltable dur birth2
# graph results
by(age1teen)
sts graph,
# calculate the event rate for the overall sample and by age groups at first birth
strate
strate age1teen
# now, we are ready for a regression-style model
# use a cox model to automatically control for duration
stcox age1teen hispanic nhblack nhother
# if you are interested in a particular duration, you must make your own
# duration variables and interactions
at(9 27 73)
stsplit durcat, = group(durcat)
egen durgroup = durgroup==1
gen dur0008 = durgroup==2
gen dur0926 = durgroup==3
gen dur2772 = durgroup==4
gen dur73p
= age1teen*dur0008
generate teen0008 = age1teen*dur0926
generate teen0926 = age1teen*dur2772
generate teen2772 = age1teen*dur73p
generate teen73p
# then STATA allows you to control your own duration variables and interactions
dist(exp)
streg age1teen hispanic nhblack nhother, dist(exp)
streg age1teen hispanic nhblack nhother dur0008 dur0926 dur73p,
streg age1teen hispanic nhblack nhother dur0008 dur0926 dur73p dist(exp) teen0008 teen0926 teen73p,
Time Series/Event History Models
Please use all code samples responsibly - these are samples and likely require adjustments to work correctly for your specific needs. Read through the documentation and comments to understand any caveats or limitations of the code and/or data and follow-up with the code author or Code Library admins if you have questions on how to adapt the sample to your specific use case.
Purpose: Many of the projects at Urban involve repeated measurements of the same individuals (or sites or whatever) over time. With such data, we often model problems where we are interested in both the timing and the types of outcomes. This code has commands to set up the data to create suitable outcome variables for time series models and commands to run the models for time series data (event history models, hazard models).
This code comes from the stata-user group training series for more info see here.*
Data: This code uses a fake dataset of 10 cases in an RCT of an abstinence training program at age 14 however is applicable to any dataset you’re trying to use for time series models.