*****************************************************************
*						Descrizione dei dati
* 							SHARE DATA
*						Easy SHARE dataset
*
*****************************************************************
 cd "G:\Il mio Drive\Multifonte\AA 2025_26"
use easySHARE_rel8-0-0.dta, clear

 
 
 ***************************************************************
 *			CHI VIENE INTERVISTATO:
 * intervistati di 50 anni e più e i loro partner
 ***************************************************************
 sum age
 mean age, over(female)
 tabstat age if age>-1, by(female) st(min max mean p25 p50 p75 n)
 tab female if age<50
 
 recode female (0=0 "Maschi") (1=1 "Femmine"), gen(sesso)
 
 hist age if age>29, percent normal by(sesso,  graphregion(c(white)) note("Fonte: Easy SHARE", size(small))    						////
 legend(off) title("Distribuzione degli intervistati secondo l'età e sesso", color(balck))) 									   ////
 fcolor(gs10) lc(black) xlabel(30 40 50 60 70 80 90 100 110, format(%9.0f)) xtitle("Età") ytitle("%") ylabel(0(1)6)  
 
*donne leggermente più giovani (di un 0.5 anni in media)

 
 ****************************************************************
 ****************************************************************
 
 * 					CONTENUTI DEI DATI: SALUTE
 *
 *						salute auto-percepita
 ****************************************************************
 tab sphus
 
 hist sphus if sphus>0, dis percent  by(sesso,  graphregion(c(white))  ////
 title("salute percepita")) xlabel(1 "excellent" 2 "very good" 3 "good" 4 "fair" 5 "poor", angle(45))   ////
  fcolor(gs10) lcolor(black)
 
 recode sphus(min/0=.) (1 2 3=0 "good") (4 5=1 "not good"), gen(self_h)
 graph hbar (mean) self_h, over(country, sort(1)) bar(1,fc(gs9)) graphregion(c(white))
 graph hbar (mean) self_h, over(wave) bar(1,fc(gs9)) graphregion(c(white)) 

 
 
 *****************************************************************
 * 						depressione (eurod)
 *					       multiple items
 *****************************************************************
tab1 euro*
 
 forvalues x=1/12 {
recode euro`x'(min/-1=.) , gen(euro_`x')
}
*
*alpha di Crombach per misurare la depressione attraverso i 12 items
alpha euro_*, item label

 hist eurod if eurod>-1, dis percent  by(sesso,  graphregion(c(white))  ////
 title("EUROD depressione"))  fcolor(gs10) lcolor(black) normal

 
 *****************************************************************
 * 					numero di chronic diseases 
 *					       multiple items
 *****************************************************************
tab chronic_mod
 hist chronic_mod if chronic_mod>-1, dis percent by(sesso,  graphregion(c(white))  ////
 title("N of chronic diseases"))  fcolor(gs10) lcolor(black) normal

 bys age: egen mean_chronic=mean(chronic)if chronic_mod>-1, 
 scatter mean_chronic age if age>45, by(female, graphregion(c(white)))  symbol(Oh) mc(gs9) jitter(5)
 
 
 
  *****************************************************************
 * 						qualità della vita (casp)
 *					       multiple items
 *****************************************************************
recode casp(min/0=.) , gen(casp2)
  hist casp2, dis percent  by(female, legend(off) graphregion(c(white))  ////
 title("quality of life (CASP)"))  fcolor(gs10) lcolor(black) normal 

 recode mar_stat(min/0=.) (2=1 "with partner") (5=2 "separated") (4=3 "alone") (6=4 "widowed"), gen(marital)
  graph hbar (mean) casp2, over(marital, sort(1)) bar(1,fc(gs9)) graphregion(c(white))

  tabstat casp2, by(marital) st(min max mean p25 p50 p75 n semean )

  
 ****************************************************************
 * 				misure di salute, tra cui:
 *				Forza della presa della mano
 ****************************************************************
 hist maxgrip if maxgrip>0 & maxgrip<90, percent normal by(sesso,  graphregion(c(white)) note("Fonte: Easy SHARE", size(small))    		////
 legend(off) title("Distribuzione degli intervistati secondo il test fisico", color(balck))) 									       ////
 fcolor(gs10) lc(black) xlabel(0 10 20 30 40 50 60 70 80 , format(%9.0f)) xtitle("Forza di presa della mano") ytitle("%") ylabel(0(1)13)  

 tabstat maxgrip if maxgrip>-1, by(female) st(min max mean p25 p50 p75 n)

 
 

 ******************************************************************
 *			RELAZIONE TRA ETà E FORZA DELLA PRESA DELLA MANO
 *								******
 * ATTENZIONE: l'età varia tra individui ma anche per lo stesso 
 *             individuo nel tempo
 ******************************************************************
 two (scatter maxgrip age if maxgrip>-1 & age>25 & age<110,  by(sesso) symbol(Oh) mc(gs9) jitter(5))			    							///
 (mspline maxgrip age if maxgrip>-1 & age>25 & age<110,  by(sesso, graphregion(c(white)) legend(off) note("Fonte: Easy SHARE", size(small))    ///
 title("Relazione tra età e test fisico", color(balck))) color(black) lp(dash)  xlabel(30 40 50 60 70 80 90 100, format(%9.0f)) 			  ///
 ytitle("Forza di presa della mano") xtitle("Età") ylabel(0(10)100)) 
 
 
 recode maxgrip (min/-1=0) (0/99=1), gen(missing_grip)
 tab country missing_grip, row
 
 
 
 *****************************************************************
 * INTERVISTATI CHE VIVONO IN DIVERSI PAESI EUROPE (+ ISRAELE)
 *							******
 *proporzioni dei paesi non rispecchiano quelli reali
 *****************************************************************
 tab country

 *****************************************************************
 * diversi paesi, diversi tempistiche delle interviste
 *****************************************************************
 tab country wave
 tab country int_year

 
 ************************************************************
 * quando gli individui entrano nel campione? (re-sampling)
 *************************************************************
sort mergeid wave
 gen baseline=1 if mergeid[_n-1]!=mergeid 
 tab country wave if baseline==1
 
 
****************************************************************** 
 *****************************************************************
 * 						STRUTTURA DEI DATI
 *						******************
 * 1- paesi
 * 2- famiglie
 * 3- individui
 * 4- osservazioni (individui nel tempo)
 ******************************************************************
 sort hhid mergeid wave
 br mergeid hhid coupleid wave int_year country 
 
 
 
 *****************************************************************
 *					STRUTTURA LONGITUNIALE-PANEL
 *****************************************************************
 *					costruire ID dell'individuo
 *****************************************************************
 sort mergeid wave
 gen xx=_n if mergeid!=mergeid[_n-1]
 bys mergeid: egen ID=min(xx)
 drop xx
 
 br mergeid ID hhid coupleid wave int_year country 

 xtset  ID wave
 xtdes
 
 
 
 *******************************************************************
 *******************************************************************
 
 *						UN ESEMPIO CON EASYSHARE
 
 *******************************************************************
 keep mergeid wave int_year ep005_ maxgrip ID age country hhid
 
 
 ******************************************************************
 * 			2 FORMATI DIVERSI A SECONDA DELLE ANALISI
 *							WIDE & LONG
 *****************************************************************
 reshape wide int_year ep005_ maxgrip age mergeid, i(ID) j(wave)
 br
 
 **
 reshape long
 drop if int_year ==.
 br

 *****************************************************************
 * per semplificare escludiamo i missing
******************************************************************
mvdecode maxgrip ep005 age, mv(-1/-15)
recode ep005 (2/97=0) (1=1), gen(Retired)


two (scatter maxgrip Retired) (lfit maxgrip Retired)
two (scatter maxgrip age) (lfit maxgrip age)



******************************************************************
*
* 						MODELLI OLS
*		E' un modello corretto per i dati che abbiamo?
******************************************************************
graph matrix maxgrip Retired age
regress maxgrip Retired 
regress maxgrip age 


****************************************************
*
*			DIAGNOSTICA
*
****************************************************
* distribuzione dei residui
* studentized residuals per controllare gli outliers
****************************************************
predict r, rstudent


**********************************
*Checking Normality of Residuals
*kernel density dei residui
**********************************
predict r3,resid
kdensity r3,normal

 *Checking Linearity
 regress maxgrip age 
two (scatter maxgrip age) (lfit maxgrip age) if wave==7
 
 predict r4, resid
 scatter r4 age
 
 
 ************************************************************************
 *corretta specificazione del modello: ci sono altre variabili imporanti?
 *variabili omesse -> errore di specificazione
 *_hat valori predetti (dovrebbe essere significativo)
 *_hatsq quadrato (non dovrebbe esserlo)
 ***********************************************************************
  /* regress maxgrip age Retired i.country if wave ==7
linktest
   ovtest
 */
 
 
 ***********************************************************************
 ***********************************************************************
 *
 *					stime e variabili di controllo
 *
 ***********************************************************************
   regress maxgrip i.Retired 

margins Retired,
marginsplot, xlabel(0 "work" 1 "retired") xscale(r(-0.5 0 1 1.5)) plotopt(connect(none))     ///
title("valori predetti di Forza della mano") xtitle("") ytitle("")
graph save 1.gph, replace


***********************************************
regress maxgrip i.Retired age if wave ==7
margins Retired,
marginsplot, xlabel(0 "work" 1 "retired") xscale(r(-0.5 0 1 1.5)) plotopt(connect(none))     ///
title("valori predetti di Forza della mano" "controllando per età") xtitle("") ytitle("")
graph save 2.gph, replace


************************************************
* come cambia la stima della var Retired
* quando aggiungiamo al modello l'età
************************************************
graph combine 1.gph 2.gph,ycommon xcommon rows(1) title(Paradosso di Simpson)




*******************************************************************
 ******************************************************************
 
 *					STRUTTURA DEI DATI
 
  *****************************************************************
 *					STRUTTURA LONGITUNIALE-PANEL
 ******************************************************************
 *					costruire ID dell'individuo
 ******************************************************************
 sort mergeid wave
 gen xx=_n if mergeid!=mergeid[_n-1]
 bys mergeid: egen ID=min(xx)
 drop xx
 
** 
 br mergeid ID hhid coupleid wave int_year country 

 xtset  ID wave
 xtdes
 
 

 
 
 *****************************************************************
 * per semplificare escludiamo i missing
******************************************************************
mvdecode maxgrip ep005 age, mv(-1/-15)
recode ep005 (2/97=0) (1=1), gen(Retired)



******************************************************************
*
* 						MODELLI OLS
*		E' un modello corretto per i dati che abbiamo?
******************************************************************
regress maxgrip age Retired




*********************************************
**pensare alla struttura dei dati!!
********************************************
xtline maxgrip if ID<500, overlay legend(off)



 *****************************************************************
 * per semplificare escludiamo i missing
******************************************************************
mvdecode maxgrip ep005 age, mv(-1/-15)
recode ep005 (2/97=0) (1=1), gen(Retired)



**********************************************************************
* CONSIDERIAMO CHE I DATI HANNO UNA STRUTTURA GERARCHICA:
*
* osservazioni nel tempo (7 wave)
* individui 
* famiglie (PR e il partner)
* paesi (EU + israele)
**********************************************************************
*  				MODELLI CON SOLO L'INTERCETTA/E
* la media campionaria è la parte fissa del modello e la differnza
* tra ogni osservazione e la media è la parte random o residuale
**********************************************************************
********************************************************************
********************************************************************
*
* MODELLI RANDOM INTERCEPT - the Generalized Least Squares Estimator
*
**********************************************************************
* in stata sono chiamati random effects models ma abbiamo una
* intercetta specifica per ogni individuo/caso
**********************************************************************
* 						ASSUMIAMO CHE: 
* il termine di errore è libero di variare tra gli individui e 
* non è correlato con i predittori inclusi nel modello
*********************************************************************
* scomponiamo la varianza in una componente between (tra gli individui)
* [Sigma_u = standard deviation between sujects] e 
* una within (dentro l'individuo, ossia nel tempo) [Sigma_e = standard 
* deviation within subjects]
* Rho (correlazione interclasse): quanta varianza è spiegata dalla differenza 
* tra unità 
**********************************************************************

xtreg maxgrip Retired, re
xtreg maxgrip Retired c.age if age>49, re
xtreg maxgrip Retired c.age if age>49, fe

/**********************************************************************
* MODELLI RANDOM INTERCEPT - the maximum-likelihood Estimator
*********************************************************************
xtreg maxgrip Retired, mle
xtreg maxgrip Retired age, mle

* oppure... 
xtmixed maxgrip age || ID:,mle
xtmixed maxgrip age Retired || ID:,mle
*/


************************
*Transizioni
* da qualisasi posizione
*  a pensionato
************************
sort mergeid wave
gen retired2=1 if Retired[_n-1]==0 & Retired==1 & mergeid==mergeid[_n-1]
br mergeid ID wave retired2 Retired ep005

recode retired2(.=0) 
		tab retired2
		reg maxgrip retired2 c.age if age>49,
		xtreg maxgrip retired2 c.age if age>49,

		
************************************************************************
*stesso modello dove aggiungiamo un livello (e intercette) per paese
************************************************************************
xtmixed maxgrip || ID: || country:




/**************************************************************************
*consideriamo un multilevel cross-sectional in cui abbiamo le osservazioni 
* in un punto nel tempo
***************************************************************************
xtmixed maxgrip age if wave==7 || country:, 

predict score_grip if e(sample), fitted
predict re1 if e(sample), reffect
twoway (scatter maxgrip age,  ) || line score_grip age , connect(ascending) title(Random Intercept per country)
   
   *** Best Linear Unbiased Prediction
   *** (BLUP) of Random Effects in the
   *** Normal Linear Mixed Effects Model
  gen sa1 = _b[_cons] + re1
  twoway (scatter sa1 sa1,  ) (function y=x, range(50 70)  )      ////
  , legend(off) ytitle(BLUP) xtitle(ML) title(Country Intercepts)
  
  *labels
   twoway (scatter sa1 sa1,  mlabel(country)) (function y=x, range(50 70)  )      ////
   , legend(off) ytitle(BLUP) xtitle(ML) title(Country Intercepts)

  */
  *************************************************************************
  *					INTERCETTE PER famiglia
  *************************************************************************
  xtmixed maxgrip age Retired if wave==7 || hhid:, 

  predict score_grip1 if e(sample), fitted
predict re11 if e(sample), reffect
line score_grip1 age , connect(ascending) title(Random Intercept per country)
   

   
   
   
   
   
   
   *************************************************************
   *************************************************************
   *
   *							ESERCIZIO 2
   *
   *************************************************************
   /* Partiamo dall'esercizio precedente dove avevamo selezionato
   il campione appropriato per studiare le differenze di salute
   tra chi ha figli e coloro che non hanno figli. Potete scegliere 
   l'outcome che preferite (eurod, casp, maxgrip.. ecc..).
   Cercate di trovare un modello che sia appropriato per la struttura
   gerarchica dei dati. Utilizzate solo 2 livelli (per semplicità).
   *************************************************************
   
   FATE ATTENZIONE A:
   - in quali wave l'outcome viene riportato dagli intervistati
   - quanti individui abbiamo per ogni famiglie
   - dovete forzatamente ridurre le analisi a sotto-gruppi per avere
   "solo" due livelli
   
   **************************************************************/
   * QUESITO: i coefficienti cambiano rispetto ai modelli che 
   * considerano solo un livello?
   **************************************************************