####Lab 1: Linear regression and Nonlinear regression####

###Libraries##################################
library(readxl)
library(lmtest) 
library(forecast)
library(DIMORA)
library(fpp2)
##############################################

##some plots for time series##################
plot(a10, ylab="million dollars", xlab="Year", main="Antidiabetic drugs")

tsdisplay(a10) ##a general view of the data

seasonplot(a10, ylab="million dollars", xlab="Year", main="Seasonal plot: Antidiabetic drugs", year.labels=T, year.labels.left=T, col=1:20, pch=19)


###Linear regression for time series 
#Facebook example

###read the data
facebook<- read_excel("facebook.xlsx")
str(facebook)
##create a variable 'time'
tt<- 1:NROW(facebook)

#create the variable 'fb'
fb <- facebook$fb

##make a plot
plot(tt, fb, xlab="Time", ylab="Facebook users")

##acf of variable "fb"

acf(fb)

##fit a linear regression model 
fit1 <- lm(fb~ tt)
summary(fit1)
anova(fit1)

##plot of the model
plot(tt, fb, xlab="Time", ylab="Facebook users")
abline(fit1, col=3)

##check the residuals? are they autocorrelated? Test of DW
dwtest(fit1)

##check the residuals
resfit1<- residuals(fit1)
plot(resfit1,xlab="Time", ylab="residuals" )


##let us do the same with a linear model for time series, so we transform the data into a 'ts' object
fb.ts <- ts(fb, frequency = 4)
ts.plot(fb.ts, type="o")

## we fit a linear model with the tslm function
fitts<- tslm(fb.ts~trend)

###obviously it gives the same results of the first model
summary(fitts)

dwtest(fitts)


#################################### 
##Linear regression for iMac

apple<- read_excel("apple.xlsx")
str(apple)
imac <- apple$iMac
#data visualization
plot(imac,type="l", xlab="quarter", ylab="iMac sales")

#variable tt for a linear model 
tt<- 1:NROW(apple)

# linear model
fit2 <- lm(imac~tt)
summary(fit2)

plot(imac,type="l", xlab="quarter", ylab="iMac sales")
abline(fit2, col=3)

dwtest(fit2)

###check the residuals
res2<- residuals(fit2)
plot(res2, xlab="quarter", ylab="residuals", type="l")

acf(res2)


#data transformed as time series
mac.ts<-ts(imac, frequency=4)

#Model with trend and seasonality
fit3 <- tslm(mac.ts~ trend+season)
summary(fit3)

#check the residuals
res3 <- residuals(fit3)

plot(res3, ylab="residuals")
dwtest(fit3)

###plot of the model
plot(mac.ts, ylab="iMac sales", xlab="Time")
lines(fitted(fit3), col=2)


#####
################################################
####Linear regression with trend and seasonality and forecasting exercise 

#####################################
###data on Australian beer production
#####################################

beer<- ausbeer
beer
plot(beer)
Acf(beer)


#take a portion of data and fit a linear model with tslm
beer1<- window(ausbeer, start=1992, end=2006 -.1)
beer1
plot(beer1)
m1<- tslm(beer1~ trend+ season)
summary(m1)
fit<- fitted(m1)

plot(beer1)
lines(fitted(m1), col=2)


fore <- forecast(m1)
plot(fore)
##forecasts from regression model for beer production, The dark shaded region shows 80% prediction intervals and the light shaded 95% prediction intervals (range of values the random variable could take with relatively high probability). 

#analysis of residuals
res<- residuals(m1) 
plot(res) 
#the form of residuals seems to indicate the presence of negative autocorrelation
Acf(res)


####################################################################################################
####Data on quarterly percentage change in US consumption, income, production, savings, unemployment
####################################################################################################

uschange<- uschange
str(uschange)
plot(uschange)
autoplot(uschange) 
pairs(uschange)
#different way of seeing the same series

cons<- uschange[,1]
inc<- uschange[,2]
prod<- uschange[,3]
sav<- uschange[,4]
unem<- uschange[,5]


####consider the series of consumption as dependent variable and study with the other explanatory variables in a multiple regression model
fit.cons<- tslm(cons~inc+prod+sav+unem)
summary(fit.cons)
AIC(fit.cons)

plot(cons)
lines(fitted(fit.cons), col=2)

res<- residuals(fit.cons)
plot(res)
acf(res)
Acf(res) #note the difference

##we remove the 'production' variable  
fit.cons1<- tslm(cons~inc+sav+unem)
summary(fit.cons1)


##Exercise with scenario hypotheses 
## fit the model again (by using the data in a different way)

fit.consBest <- tslm(
  Consumption ~ Income + Savings + Unemployment,
  data = uschange)
  
h <- 4 
##window for prediction

##we assume a constant increase of 1 and 0.5% for income and savings and no change for unemployment

newdata <- data.frame(
    Income = c(1, 1, 1, 1),
    Savings = c(0.5, 0.5, 0.5, 0.5),
    Unemployment = c(0, 0, 0, 0))

##forecasts
fcast.up <- forecast(fit.consBest, newdata = newdata)

####we assume a constant decrease of 1 and 0.5% for income and savings and no change for unemployment

newdata <- data.frame(
    Income = rep(-1, h),
    Savings = rep(-0.5, h),
    Unemployment = rep(0, h))
fcast.down <- forecast(fit.consBest, newdata = newdata)

##graphical comparison of these two scenarios

autoplot(uschange[, 1]) +
  ylab("% change in US consumption") +
  autolayer(fcast.up, PI = TRUE, series = "increase") +
  autolayer(fcast.down, PI = TRUE, series = "decrease") +
  guides(colour = guide_legend(title = "Scenario"))


#############################################################
#############################################################
####Nonlinear models for new product growth (diffusion models)
#############################################################

###Music data (RIAA)
music<- read_excel("music.xlsx")
str(music)

##create the variable cassette
cassette<- music$cassette[1:36]

###some simple plots
plot(cassette, type="b")
plot(cumsum(cassette), type="b")

###a better plot of the yearly time series
plot(cassette, type= "b",xlab="Year", ylab="Annual sales",  pch=16, lty=3, xaxt="n", cex=0.6)
axis(1, at=c(1,10,19,28,37), labels=music$year[c(1,10,19,28,37)])

###we estimate a simple Bass Model 
bm_cass<-BM(cassette,display = T)
summary(bm_cass)

###prediction (out-of-sample)
pred_bmcas<- predict(bm_cass, newx=c(1:50))
pred.instcas<- make.instantaneous(pred_bmcas)

###plot of fitted model 
plot(cassette, type= "b",xlab="Year", ylab="Annual sales",  pch=16, lty=3, xaxt="n", cex=0.6)
axis(1, at=c(1,10,19,28,37), labels=music$year[c(1,10,19,28,37)])
lines(pred.instcas, lwd=2, col=2)


###we estimate the model with 50% of the data

bm_cass50<-BM(cassette[1:18],display = T)
summary(bm_cass50)

pred_bmcas50<- predict(bm_cass50, newx=c(1:50))
pred.instcas50<- make.instantaneous(pred_bmcas50)

plot(cassette, type= "b",xlab="Year", ylab="Annual sales",  pch=16, lty=3, xaxt="n", cex=0.6)
axis(1, at=c(1,10,19,28,37), labels=music$year[c(1,10,19,28,37)])
lines(pred.instcas50, lwd=2, col=2)


###we estimate the model with 25% of the data
bm_cass75<-BM(cassette[1:9],display = T)
summary(bm_cass75)

pred_bmcas75<- predict(bm_cass75, newx=c(1:50))
pred.instcas75<- make.instantaneous(pred_bmcas75)


###Comparison between models (instantaneous)
###instantaneous
plot(cassette, type= "b",xlab="Year", ylab="Annual sales",  pch=16, lty=3, xaxt="n", cex=0.6)
axis(1, at=c(1,10,19,28,37), labels=music$year[c(1,10,19,28,37)])
lines(pred.instcas75, lwd=2, col=2)
lines(pred.instcas50, lwd=2, col=3)
lines(pred.instcas, lwd=2, col=4)


###Comparison between models (cumulative)
plot(cumsum(cassette), type= "b",xlab="Year", ylab="Annual sales",  pch=16, lty=3, xaxt="n", cex=0.6)
axis(1, at=c(1,10,19,28,37), labels=music$year[c(1,10,19,28,37)])
lines(pred_bmcas75, lwd=2, col=2)
lines(pred_bmcas50, lwd=2, col=3)
lines(pred_bmcas, lwd=2, col=4)


###exercise: try the same with the CD time series


###Twitter (revenues)

twitter<- read_excel("twitter.xlsx")
length(twitter$twitter)


plot(twitter$twitter, type= "b",xlab="Quarter", ylab="Quarterly revenues",  pch=16, lty=3, xaxt="n", cex=0.6)
axis(1, at=c(1,10,19,28,37,46), labels=twitter$quarter[c(1,10,19,28,37,46)])


###BM
tw<- (twitter$twitter)
Acf(tw)


bm_tw<-BM(tw,display = T)
summary(bm_tw)


pred_bmtw<- predict(bm_tw, newx=c(1:60))
pred.insttw<- make.instantaneous(pred_bmtw)

plot(cumsum(tw), type= "b",xlab="Quarter", ylab="Cumulative revenues",  pch=16, lty=3, cex=0.6, xlim=c(1,60), ylim=c(0,40000))
lines(pred_bmtw, lwd=2, col=2)

plot(tw, type= "b",xlab="Quarter", ylab="Quarterly revenues",  pch=16, lty=3, cex=0.6, xlim=c(1,60))
lines(pred.insttw, lwd=2, col=2)


###GBMr1
GBMr1tw<- GBM(tw,shock = "rett",nshock = 1,prelimestimates = c(4.463368e+04, 1.923560e-03, 9.142022e-02, 24,38,-0.1))


######GBMe1

GBMe1tw<- GBM(tw,shock = "exp",nshock = 1,prelimestimates = c(4.463368e+04, 1.923560e-03, 9.142022e-02, 12,-0.1,0.1))
summary(GBMe1tw)

pred_GBMe1tw<- predict(GBMe1tw, newx=c(1:60))
pred_GBMe1tw.inst<- make.instantaneous(pred_GBMe1tw)

plot(cumsum(tw), type= "b",xlab="Quarter", ylab="Cumulative revenues",  pch=16, lty=3, cex=0.6, xlim=c(1,60), ylim=c(0,50000))
lines(pred_GBMe1tw, lwd=2, col=2)


plot(tw, type= "b",xlab="Quarter", ylab="Quarterly revenues",  pch=16, lty=3, cex=0.6, xlim=c(1,60))
lines(pred_GBMe1tw.inst, lwd=2, col=2)