Project
Team Python: Don Bui, Ben Aboagyi, Candida Lubongo, Hassan Jamy
10/31/2020
setwd(‘D:\Temp\UHD\STAT5307\Project’)
datastck <- read.csv(“AAPL.csv”)
#Using the Adjust closing values
stck <- ts(datastck$Adj.Close, start=c(2015, 1), end=c(2020, 10), frequency = 12)
# Plot ts objects
#autoplot(stck) + labs(x=”Year”, y=”Closing Values”, title=”stock”)
#plot.ts(stck, main = “stock”, ylab =’Closing Values’, xlab = ‘Year’)
plot(stck, main = “APPLE stock”)
#smooth0=ksmooth(time(stck), stck, “normal”, bandwidth=.5)
#lines(smooth0, col=”red”)
smooth2=lowess(stck, f=.1) # seasonal component (1% of data)
lines(smooth2, col=”blue”)
APPLE stock
Time
stck
2015 2016 2017 2018 2019 2020
24 25 26 27 28 29 30
library(ggfortify)
1
## Warning: package ‘ggfortify’ was built under R version 4.0.3
## Loading required package: ggplot2
#split the data to remove 2015 to use as a test set.
stck_training <- ts(stck, start=c(2015, 1), end=c(2019, 12), freq=12)
#autoplot(stck_training) + labs(x=”Year”, y=”Closing Values”, title=”stock”)
#plot.ts(stck_training, main = “stock”, ylab =’Closing Values’, xlab = ‘Year’)
plot(stck_training, main = “APPLE stock”)
smooth0=ksmooth(time(stck_training), stck_training, “normal”, bandwidth=.5)
lines(smooth0, col=”red”)
smooth2=lowess(stck_training, f=.1) # seasonal component (8% of data)
lines(smooth2, col=”blue”)
APPLE stock
Time
stck_training
2015 2016 2017 2018 2019 2020
24 25 26 27 28 29 30
ts_object_name <- ‘APPLE’
autoplot(stl(stck_training, s.window = “periodic”),
main = sprintf(“Decomposition anatomy Plot of %s training data”, ts_object_name),
ts.colour = “turquoise4”) +
theme(panel.background = element_rect(fill = “gray98”),
axis.line.y = element_line(colour=”gray”),
axis.line.x = element_line(colour=”gray”))
2
trend
seasonal
remainder
Data
2016 2018 2020
24
26
28
30
-1.0
-0.5
0.0
0.5
1.0
-0.3
-0.2
-0.1
0.0
0.1
0.2
25
26
27
28
29
Decomposition anatomy Plot of APPLE training data
library(astsa)
acf2(stck_training)
3
0 1 2 3 4
-0.4 0.2 0.6 1.0
Series: stck_training
LAG
ACF
0 1 2 3 4
-0.4 0.2 0.6 1.0
LAG
PACF
## ## ACF |
[,1] 0.95 |
[,2] [,3] 0.89 0.84 |
[,4] [,5] 0.80 0.76 |
[,6] 0.72 |
[,7] 0.67 |
[,8] 0.61 |
[,9] [,10] [,11] [,12] [,13] |
0.55 | 0.49 | 0.41 | 0.34 0.10 |
0.29 0.01 |
|||
## PACF 0.95 -0.08 0.07 -0.01 0.02 -0.01 -0.10 -0.09 -0.07 -0.13 -0.20 | |||||||
## ## ACF ## PACF ## ## ACF ## PACF ## ## ACF |
[,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] | ||||||
0.24 | 0.18 | 0.12 | 0.05 -0.01 -0.05 -0.09 -0.14 -0.18 -0.22 -0.26 | -0.3 -0.1 |
|||
0.03 -0.11 -0.06 -0.16 | 0.10 | 0.07 -0.02 -0.07 -0.03 -0.08 | 0.00 | ||||
[,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] -0.34 -0.37 -0.39 -0.39 -0.39 -0.39 -0.39 -0.39 -0.38 -0.36 -0.33 -0.32 |
|||||||
0.05 | 0.00 -0.05 | 0.12 | 0.05 | 0.03 | 0.06 -0.05 -0.02 | 0.12 -0.03 -0.17 | |
[,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] -0.31 -0.30 -0.29 -0.27 -0.26 -0.25 -0.23 -0.21 -0.19 -0.19 -0.19 |
|||||||
## PACF -0.10 -0.02 | 0.01 -0.03 -0.10 -0.04 | 0.14 -0.10 | 0.02 -0.02 | 0.00 |
Differencing the time series for stationarity.
library(forecast)
## Registered S3 method overwritten by ‘quantmod’:
## ## |
method | from |
as.zoo.data.frame zoo | ||
## Registered S3 methods overwritten by ‘forecast’: | ||
## ## ## ## ## ## |
method autoplot.Arima autoplot.acf autoplot.ar autoplot.bats |
from ggfortify ggfortify ggfortify ggfortify |
autoplot.decomposed.ts ggfortify |
4
## ## ## ## ## ## ## ## |
autoplot.ets autoplot.forecast autoplot.stl autoplot.ts fitted.ar fortify.ts residuals.ar |
ggfortify ggfortify ggfortify ggfortify ggfortify ggfortify ggfortify |
## Attaching package: ‘forecast’
## The following object is masked from ‘package:astsa’:
##
## gas
par(mfrow = c(2, 1))
xx_diff1 <- diff(stck_training)
acf2(xx_diff1)
0.1 0.2 0.3 0.4 0.5 0.6 0.7
-0.2 0.0 0.2
Series: xx_diff1
LAG
ACF
0.1 0.2 0.3 0.4 0.5 0.6 0.7
-0.2 0.0 0.2
LAG
PACF
## ## ACF |
[,1] | [,2] [,3] | [,4] | [,5] [,6] [,7] | [,8] [,9] |
-0.01 -0.21 0.04 | 0.04 -0.05 0.01 0.10 -0.02 0.00 |
## PACF -0.01 -0.21 0.03 -0.01 -0.03 0.02 0.09 -0.01 0.04
plot.ts(xx_diff1, main=”First Order Differencing 2015 – 2019″)
ggseasonplot(xx_diff1, xlab=”Year”,
main=sprintf(“Seasonality Plot of %s training data after the first order differencing”
year.labels=TRUE, year.labels.left=TRUE,
5
col=1:20, pch=19) +
theme(panel.background = element_rect(fill = “gray98”),
axis.line.y = element_line(colour=”gray”),
axis.line.x = element_line(colour=”gray”))
2015 | |
2015 2019 |
|
2016 | 2016 |
2017 | 2019 |
2018 |
2017 2018
-1.0
-0.5
0.0
0.5
1.0
1.5
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
Year
Seasonality Plot of APPLE training data after the first order differencing
library(TSA)
## Warning: package ‘TSA’ was built under R version 4.0.3
## Registered S3 methods overwritten by ‘TSA’:
## ## ## ## |
method | from |
fitted.Arima forecast | ||
plot.Arima | forecast |
## Attaching package: ‘TSA’
## The following objects are masked from ‘package:stats’:
##
## | acf, arima |
## The following object is masked from ‘package:utils’: | |
## ## |
tar |
eacf(xx_diff1)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
6
## 0 o o o o o o o o o o o ## 1 o o o o o o o o o o o ## 2 o o o o o o o o o o o ## 3 x o o o o o o o o o o ## 4 x x o o o o o o o o o ## 5 x o o o o o o o o o o ## 6 o x o o o o o o o o o ## 7 o o o o o o o o o o o auto.arima(stck_training) |
o o o o o o o o |
o o o o o o o o |
o o o o o o o o |
## Series: stck_training
## ARIMA(0,1,0)
##
## sigma^2 estimated as 0.2199: | log likelihood=-39.04 | |
## AIC=80.07 | AICc=80.14 | BIC=82.15 |
auto.arima(xx_diff1)
## Series: xx_diff1
## ARIMA(0,0,0) with zero mean
##
## sigma^2 estimated as 0.2199: | log likelihood=-39.04 | |
## AIC=80.07 | AICc=80.14 | BIC=82.15 |
datastck.ts.m= as.ts(datastck,freq = 12)
datastck.ts.m <- ts(runif(24,1,100), frequency=12)
plot.ts(datastck.ts.m, main = “Quarterly Closing values”, xlab = “Year”, ylab = “ML”)
7
Quarterly Closing values
Year
ML
1.0 1.5 2.0 2.5
20 40 60 80 100
datastckdecomp=decompose(datastck.ts.m, type=”additive”)
#decompose
plot(datastckdecomp)
8
20 60 100
observed
40 44 48
trend
-40 0 40
seasonal
1.5 2.5
1.0 1.5 2.0 2.5
random
Time
Decomposition of additive time series
datastckdecomp$seasonal # seasonal effects for each quarter
## | Jan | Feb | Mar 45.342599 45.342599 Oct |
Apr | May | Jun | Jul 55.865541 55.865541 |
## 1 -25.237260 -33.037728 ## 2 -25.237260 -33.037728 |
35.612038 -12.534159 -43.122546 35.612038 -12.534159 -43.122546 |
||||||
## ## 1 ## 2 |
Aug 3.571284 3.571284 |
Sep 7.430242 7.430242 |
Nov | Dec | |||
30.792661 -29.789815 -34.892858 30.792661 -29.789815 -34.892858 |
datastckdecomp$trend
## ## 1 |
Jan | Feb | Mar | Apr | May | Jun | Jul | Aug |
NA | NA | NA | NA | NA | NA 40.48943 39.76438 | |||
## 2 45.49911 44.96093 46.57677 46.52263 46.94383 48.80385 | NA | NA | ||||||
## | Sep | Oct | Nov | Dec | ||||
## 1 39.11106 42.99789 46.54505 46.86471 | ||||||||
## 2 | NA | NA | NA | NA |
library(forecast)
fit <- Arima(stck_training, order = c(0,1,0), include.drift = TRUE)
summary(fit)
## Series: stck_training
## ARIMA(0,1,0) with drift
##
## Coefficients:
## drift
## 0.0678
9
## s.e. ## |
0.0604 | |||||
## sigma^2 estimated as 0.219: | log likelihood=-38.41 | |||||
## AIC=80.83 | AICc=81.04 | BIC=84.98 | ||||
## | ||||||
## Training set error measures: | ||||||
## | ME | RMSE | MAE | MPE | MAPE | MASE |
## Training set 0.0004145612 0.460131 0.3578535 -0.007622535 1.312634 0.2388292 | ||||||
## | ACF1 |
## Training set -0.01159368
detrended.fit<- resid(fit)
acf(detrended.fit) # detrended
0.2 0.4 0.6 0.8 1.0 1.2 1.4
-0.2 -0.1 0.0 0.1 0.2
Series detrended.fit
Lag
ACF
# #RESIDUAL DIAGNOSTICS
# res <- fit$residuals
#
# autoplot(res) +
# # # # # # # # # |
ggplot2::geom_hline(yintercept = 0, linetype = ‘dashed’, size = .2, colour = ‘#888888’) + labs(subtitle = ”) + ggplot2::ggtitle(sprintf(“Residual Diagnostics for %s nNon-Standardized Residuals”, ts_object_name)) |
acfobj <- stats::acf(res, plot = FALSE, na.action = stats::na.pass) autoplot(acfobj)+ |
10
# # # # # # # # # # # # # # # # # # # # # |
ggplot2::ggtitle(sprintf(“ACF residuals for %s”, ts_object_name)) |
gof.lag = 10 nlag <- gof.lag pval <- numeric(nlag) for (i in 1L:nlag) pval[i] <- stats::Box.test(res, i, type = “Ljung-Box”)$p.value lb.df <- data.frame(Lag = 1L:nlag, `p value` = pval, lower = -0.05, upper = 0.05) |
|
# Unnable to create column with space by above expression colnames(lb.df) <- c(‘Lag’, ‘p value’, ‘lower’, ‘upper’) p.lb <- ggplot2::ggplot(data = lb.df, mapping = ggplot2::aes_string(x = ‘Lag’)) + ggplot2::geom_point(mapping = ggplot2::aes_string(y = ‘`p value`’), na.rm = TRUE, colour = ‘turquoise4’) + ggplot2::scale_y_continuous(limits=c(-0.1, 1)) + ggplot2::ggtitle(‘p values for Ljung-Box statistic’) ggfortify:::plot_confint(p = p.lb, data = lb.df, conf.int = TRUE, conf.int.colour = ‘#0000FF’, conf.int.linetype = ‘dashed’, conf.int.fill = NULL, conf.int.alpha = conf.int.alpha) |
for_datastck_all <- forecast(fit, h = 12)
plot(for_datastck_all, main = “APPLE stock test set for 2020”)
APPLE stock test set for 2020
2015 2016 2017 2018 2019 2020 2021
24 26 28 30 32
11