clear all
set memory 800M

*cd "C:\Dropbox\Australia stimulus\Stata Folder"
*or just set working folder per stata menue

*reading ATO dates for cheque payments
insheet using  ATO2.csv, clear
drop if missing(postcode)


rename distribtionday distributiondayCH
rename distribtiondaycode distribtiondaycodeCH
rename number numberCH

sort postcode
save ato , replace

*reading ATO dates for EFT electronic funds transfer payments
insheet using  ATOeft2.csv, clear
drop if missing(postcode)
sort postcode
rename distribtionday distributiondayeft
rename distribtiondaycode distribtiondaycodeeft
rename number numbereft
save atoeft , replace

* reading main data

infile using Dictionary,  clear

merge m:1 postcode using ato
drop _merge
merge m:1 postcode using atoeft
drop if id==.
drop _merge
//Nielsen sample covers 55% of postcodes.



*reading and formating the time
*main data
generate day= date(weekstr, "YMD")
gen week= week(day)
label var week "Week of Year"
gen weekly = wofd(day)
label var weekly "Week ID"

gen month = month(day)

gen year = year(day)



*ATO data
generate payday= date(distributiondayCH, "YMD")
generate paydayEFT= date(distributiondayeft, "YMD")

*Generating a Var that tells us what day of the week they got paid the stimulus: 0 is Sunday 
gen dow= dow(paydayEFT)
label var dow "Day of week paid"

*match to weeks most transparent by hand:
*every payday on or after Friday in one week will have the following week as payweek
*in other words a payweek is where the EFT transfer date was between Friday the week before and Thursday this week
*otherwise they will not receive and notice the cash transfer before the week is over.
gen payweek=0	
	
*Friday, 3 April 2009 only disaster area payouts on this date
replace payweek=2561 if paydayEFT==17990 //week 14


*Monday, 6 April 2009
replace payweek=2562 if paydayEFT==17993 //week 15
*Tuesday, 7 April 2009
replace payweek=2562 if paydayEFT==17994 //week 15
*Wednesday, 8 April 2009
replace payweek=2562 if paydayEFT==17995 //week 15
*Thursday, 9 April 2009
replace payweek=2562 if paydayEFT==17996 //week 15


*Tuesday, 14 April 2009
replace payweek=2563 if paydayEFT==18001 //week 16
*Wednesday, 15 April 2009
replace payweek=2563 if paydayEFT==18002 //week 16
*Thursday, 16 April 2009
replace payweek=2563 if paydayEFT==18003 //week 16
*Friday, 17 April 2009
replace payweek=2563 if paydayEFT==18004 //week 16



*Monday, 20 April 2009
replace payweek=2564 if paydayEFT==18007 //week 17
*Tuesday, 21 April 2009
replace payweek=2564 if paydayEFT==18008 //week 17
*Wednesday, 22 April 2009
replace payweek=2564 if paydayEFT==18009 //week 17
*Thursday, 23 April 2009
replace payweek=2564 if paydayEFT==18010 //week 17
*Friday, 24 April 2009
replace payweek=2564 if paydayEFT==18011 //week 17


*Monday, 27 April 2009
replace payweek=2565 if paydayEFT==18014 //week 18
*Tuesday, 28 April 2009
replace payweek=2565 if paydayEFT==18015 //week 18
*Wednesday, 29 April 2009
replace payweek=2565 if paydayEFT==18016 //week 18



*same  for cheques for now


gen chequeweek=0


*Friday, 3 April 2009 only disaster area payouts on this date
replace chequeweek=2561 if payday==17990 //week 14


*Monday, 6 April 2009
replace chequeweek=2562 if payday==17993 //week 15
*Tuesday, 7 April 2009
replace chequeweek=2562 if payday==17994 //week 15
*Wednesday, 8 April 2009
replace chequeweek=2562 if payday==17995 //week 15
*Thursday, 9 April 2009
replace chequeweek=2562 if payday==17996 //week 15


*Tuesday, 14 April 2009
replace chequeweek=2563 if payday==18001 //week 16
*Wednesday, 15 April 2009
replace chequeweek=2563 if payday==18002 //week 16
*Thursday, 16 April 2009
replace chequeweek=2563 if payday==18003 //week 16
*Friday, 17 April 2009
replace chequeweek=2563 if payday==18004 //week 16



*Monday, 20 April 2009
replace chequeweek=2564 if payday==18007 //week 17
*Tuesday, 21 April 2009
replace chequeweek=2564 if payday==18008 //week 17
*Wednesday, 22 April 2009
replace chequeweek=2564 if payday==18009 //week 17
*Thursday, 23 April 2009
replace chequeweek=2564 if payday==18010 //week 17
*Friday, 24 April 2009
replace chequeweek=2564 if payday==18011 //week 17


*Monday, 27 April 2009
replace chequeweek=2565 if payday==18014 //week 18
*Tuesday, 28 April 2009
replace chequeweek=2565 if payday==18015 //week 18
*Wednesday, 29 April 2009
replace chequeweek=2565 if payday==18016 //week 18
*Thursday, 30 April 2009
replace chequeweek=2565 if payday==18017 //week 18
*Friday, 1 May 2009
replace chequeweek=2565 if payday==18018 //week 18 


*Monday, 4 May 2009
replace chequeweek=2566 if payday==18021 //week 19 
*Tuesday, 5 May 2009
replace chequeweek=2566 if payday==18022 //week 19 
*Wednesday, 6 May 2009
replace chequeweek=2566 if payday==18023 //week 19 
*Thursday, 7 May 2009
replace chequeweek=2566 if payday==18024 //week 19


*Single income HH who are not receiving any bonus will be set to zero for all pay dummies
rename adults adult
replace payweek=0 if adult==1 & hhincome>100
replace chequeweek=0 if adult==1 & hhincome>100

sort id weekly
xtset id weekly
*a dummy marking the payweek
gen paid =0
replace paid=1 if payweek==weekly
label var paid "Payweek Dummy"


//57% of IDs have matches

*a dummy marking the payweek
gen cpaid =0
replace cpaid=1 if chequeweek==weekly
label var cpaid "Payweek Dummy for Cheques"

*generate paid dummies by week 
gen pweek1=0
gen pweek2=0
gen pweek3=0
gen pweek4=0
gen pweek5=0

replace pweek1=1 if weekly==payweek & payweek==2562
replace pweek2=1 if weekly==payweek & payweek==2563
replace pweek3=1 if weekly==payweek & payweek==2564
replace pweek4=1 if weekly==payweek & payweek==2565
replace pweek5=1 if weekly==payweek & payweek==2566



*non HH specific dummy for all 5 payweeks
gen payoutweeks=0
replace payoutweeks=1 if weekly>=2562 & weekly<=2566


*recode some variables in a more useful way
rename  naturaldisasterareas disaster
replace  disaster = 0 if (disaster >=.) 

* 1: shopper is Male  
recode sex 2=0

* 1: shopper is in full time employed 0: either part time or nor response (eg no empolyment)
gen fulltime=0
replace fulltime=1 if employ==1

* possibility to make use of LifeStage... will wait for feedback

*1 if single HH
gen single=0
replace single=1 if life==5 | hhsize==1
*1 if homeowner 
gen homeowner=0
replace homeowner=1 if tenure==1

*Expenditure in dollar terms
expenditure=expenditure/100

gegn anypaid_home= anypaid* homeowner
gen anypaid_hhincome = anypaid* hhincome
gen anypaid_age= anypaid*age

*1 if retrired; 0 if not
gen retired=0
replace retired=1 if  occupation==17

*1 if home stay shopper
gen homestay=0
replace homestay=1 if  occupation==14

*1: foreign born Shopper 	
gen foreign=0
replace foreign=1 if shoppercountry!=1
replace foreign=. if shoppercountry==0

*1: foreign born Household Head
gen foreignhhh=0
replace foreignhhh=1 if hhhcountry!=1
replace foreignhhh=. if hhhcountry==0

*1 if HH Head (which might or might not be the main shopper) has a different country of birth to Shopper
gen longdistance=0
replace longdistance=1 if hhhcountry!=shoppercountry & hhhcountry!=0 & shoppercountry!=0
 
* other variable cleaning up
* we have 10615 HH IDs //quietly tab id   gen n=r(r)
* of which only 149 have all weeks observations for two year (104 weeks)// tab len  // or xtdescribe

*state dummies
rename stateterritory state
encode  state, gen(states)
tab states, gen (state)

*label variable state1 ACT
*label variable state2 NSW
*label variable state3 NT
*label variable state4 QLD
*label variable state5 SA
*label variable state6 TAS
*label variable state7 VIC
*label variable state8 WA

gen ACT=state1
gen NSW=state2
gen NT=state3
gen QLD=state4
gen SA=state5
gen TAS=state6
gen VIC=state7
gen WA=state8
drop state1 state2 state3 state4 state5 state6 state7 state8

gen lexp=log(expenditure)

*drop this 1% of data will now adjust payment dummy to be zero for these HH see above
*drop if adult==1 & hhincome>100
gen sample==1
replace sample==0 if adult==1 & hhincome>100 

 
 xi, prefix(YM) i.year*i.month
 
tsset id weekly
tab weekly, gen(weekly_d)
tab month, gen(month_d)

*length gives you the number of week observation for the HH id
gen length=0
bys id (day): replace length=_N
label variable length "Number of weeks we observe HH in sample"

gen anypaid=0
replace anypaid=1 if paid==1 | cpaid==1


save panel, replace

******************************************************************************************************************
******** End of data-generating and merging ******************************************************************************************************************
*summary Stats:
*summarize  expenditure quant hhsize hhincome age sex children retired homestay  fulltime foreign length

sutex  expenditure quant hhsize hhincome age sex children retired homestay  fulltime foreign length, dig(2) lab min file(sumstat_collaps) replace


*Quick eyebowling in a 'event' aligned graph --- notthing promissing to see here.
gen window=1 if weekly-payweek<=20 & weekly-payweek>=-20
gen countdown=.
replace countdown=weekly-payweek if window==1
collapse  expenditure, by(countdown)
line  expenditure countdown

*a few sactter plots to get a feeling of HH characteristics and expenditure
collapse  expenditure hhsize hhincome age, by(id)
graph twoway (lfit expenditure  hhincome) (scatter expenditure  hhincome)
graph twoway (lfit expenditure hhsize) (scatter expenditure hhsize)
scatter  expenditure age


*more of looks on the demographics of the expenditure data
collapse   quant  children schoolage expenditure hhsize hhincome age  retired homestay foreign longdistance, by(id)
gen expitem=expenditure/ quant
gen expcapita=expenditure/ hhsize

graph twoway (lfit expitem  hhincome) (scatter expitem  hhincome)
bys foreign: sum  expenditure
bys  homestay: sum  expenditure
bys   retired: sum  expenditure
scatter  expitem age if  expitem<1000
scatter  expcapita age
