suppressPackageStartupMessages({
  library(tidyverse)
  library(scales)
  library(glue)
  library(tidyquant)
  library(DT)
  library(lubridate)
  library(treemapify)
})

1 S&P 500 Index

In this project we will analyze stocks in S&P 500 index (symbol: ^GSPC) which measures the stock performance of 500 large companies listed on stock exchanges in the United States (Wikipedia).

We can get daily values of the index using tq_get() function of the tidyquant package

sp <- tq_get("^GSPC", from = "1950-01-01") 

sp %>% 
  select(-adjusted, -symbol) %>% 
  DT::datatable(
    caption = "S&P 500 Raw Data",
    options = list(
      dom = 'tip',
      pageLength = 10,
      autoWidth = TRUE
    ),
    rownames = FALSE) %>% 
  formatRound(columns = 2:5,digits = 2)

Let’s use ggplot to plot the close column vs date (fill in the missing code)

sp %>%  
  ggplot(aes(x = date, y = close))+
  geom_line(color= "steelblue")+
  scale_x_date(breaks = pretty_breaks(8))+
  scale_y_continuous(breaks = pretty_breaks(8), labels = comma_format(prefix = "$"))+
  labs(x = "", y="", title = "S&P 500 Index")+
  theme_bw()

A question: how much did the index S&P 500 gain since Jan 1, 1950?

sp %>% 
  summarise(gain = last(close)/first(close))

Annual returns can be calculated as the following

sp_annual_returns <- 
  sp %>%
  tq_transmute(select     = close, 
               mutate_fun = annualReturn,
               col_rename = "gain") %>% 
  mutate(year= year(date))
sp_annual_returns %>% 
  mutate(is_pos = gain > 0) %>% 
  ggplot(aes(x = year, y = gain))+
  geom_col(aes(fill = is_pos), width = 1,show.legend = FALSE)+
  scale_x_continuous(breaks = pretty_breaks(8))+
  scale_y_continuous(breaks = pretty_breaks(8), labels = percent_format(accuracy = 1))+
  labs(x = "", y="", title = "Annual Returns S&P 500 Index")+
  theme_bw()+
  geom_smooth(method = "lm", se = FALSE, lty = "dashed", size  = 0.5)

Average annual return

sp_annual_returns %>% 
  filter(year < 2020) %>% 
  summarise(average_gain = percent(mean(gain),accuracy = 0.01))
NA

2 Stocks comprising the S&P500 Index

df <- tq_index("SP500") %>% 
  select(symbol, company, weight, sector)

df %>% 
  DT::datatable(
    caption = "S&P 500 Stocks",
    filter = "top",
    options = list(
      dom = 'tip',
      pageLength = 10,
      autoWidth = TRUE
    ),
    rownames = FALSE) %>% 
  formatPercentage(columns = 3,digits = 2)

Let’s now find the composition of the index based on various sectors

df %>% 
  group_by(sector) %>% 
  summarise(N= n(),
            weight = sum(weight)) %>% 
  arrange(-weight) %>% 
  DT::datatable(
    caption = "S&P 500 Breakdown by Sector",
    options = list(
      dom = 't',
      pageLength = 11,
      autoWidth = TRUE
    ),
    rownames = FALSE) %>% 
  formatPercentage(columns = 3,digits = 2)
df %>% 
  ggplot(aes(area = weight, fill = fct_reorder(sector, weight, .fun = sum,.desc = TRUE),label = symbol, subgroup = sector))+
  geom_treemap(alpha = 0.5, start = "topleft")+
  geom_treemap_text(start = "topleft", size = 12)+
  scale_fill_viridis_d(name = "Sector")

Which sector performed best since the beginning of the month?

qt<- 
  df %>% 
  pull(symbol) %>% 
  tq_get(from="2020-06-01")
dfj <- 
  qt %>% 
  filter(date > today()-ddays(7)) %>% 
  group_by(symbol) %>% 
  summarise(gain = last(close)/first(close)-1) %>% 
  left_join(df, by = "symbol")
dfj %>% 
  group_by(sector) %>% 
  summarise(average_gain = sum(gain*weight,na.rm = TRUE)/sum(weight,na.rm = TRUE)) %>% 
  arrange(-average_gain) %>% 
  DT::datatable(
    caption = "Last Week Perfomance by Sector",
    width = "500px",
    options = list(
      dom = 't',
      pageLength = 11,
      autoWidth = TRUE
    ),
    rownames = FALSE) %>% 
  formatPercentage(columns = 2,digits = 2)

Which stocks had the highest gain in each sector?

dfj %>% 
  group_by(sector) %>% 
  top_n(1,gain) %>% 
  select(symbol, company, gain, sector) %>% 
  arrange(-gain) %>% 
  DT::datatable(
    caption = "Best Stocks by Sector in June",
    width = "500px",
    options = list(
      dom = 't',
      pageLength = 11,
      autoWidth = TRUE
    ),
    rownames = FALSE) %>% 
  formatPercentage(columns = 3,digits = 2)

NA

3 Individual Stocks Performance

3.1 Candlestick Plot

c("AAPL") %>% 
  tq_get(from = "2020-01-01") %>% 
  ggplot(aes(date, close))+
  geom_candlestick(aes(open = open, high = high, low = low, close = close)) +
  scale_y_continuous(breaks = breaks_extended(6), labels = comma_format(prefix = "$"))+
  scale_x_date(breaks = pretty_breaks(6))+
  facet_wrap(~symbol, scales = "free_y") +
  theme_tq()+
  labs(x= "",y= "")

NA
LS0tCnRpdGxlOiAiSW50cm9kdWN0aW9uIHRvIFN0b2NrIE1hcmtldCBBbmFseXNpcyIKYXV0aG9yOiAiQWxleCIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUKICAgIG51bWJlcl9zZWN0aW9uczogeWVzCiAgICB0aGVtZTogY2VydWxlYW4KICAgIHRvYzogeWVzCi0tLQoKYGBge3J9CnN1cHByZXNzUGFja2FnZVN0YXJ0dXBNZXNzYWdlcyh7CiAgbGlicmFyeSh0aWR5dmVyc2UpCiAgbGlicmFyeShzY2FsZXMpCiAgbGlicmFyeShnbHVlKQogIGxpYnJhcnkodGlkeXF1YW50KQogIGxpYnJhcnkoRFQpCiAgbGlicmFyeShsdWJyaWRhdGUpCiAgbGlicmFyeSh0cmVlbWFwaWZ5KQp9KQpgYGAKCiMgUyZQIDUwMCBJbmRleAoKSW4gdGhpcyBwcm9qZWN0IHdlIHdpbGwgYW5hbHl6ZSBzdG9ja3MgaW4gUyZQIDUwMCBpbmRleCAoc3ltYm9sOiBeR1NQQykgd2hpY2ggbWVhc3VyZXMgdGhlIHN0b2NrIHBlcmZvcm1hbmNlIG9mIDUwMCBsYXJnZSBjb21wYW5pZXMgbGlzdGVkIG9uIHN0b2NrIGV4Y2hhbmdlcyBpbiB0aGUgVW5pdGVkIFN0YXRlcyAoW1dpa2lwZWRpYV0oaHR0cHM6Ly9lbi53aWtpcGVkaWEub3JnL3dpa2kvUyUyNlBfNTAwX0luZGV4KSkuCgpXZSBjYW4gZ2V0IGRhaWx5IHZhbHVlcyBvZiB0aGUgaW5kZXggdXNpbmcgYHRxX2dldCgpYCBmdW5jdGlvbiBvZiB0aGUgYHRpZHlxdWFudGAgcGFja2FnZQoKYGBge3J9CnNwIDwtIHRxX2dldCgiXkdTUEMiLCBmcm9tID0gIjE5NTAtMDEtMDEiKSAKCnNwICU+JSAKICBzZWxlY3QoLWFkanVzdGVkLCAtc3ltYm9sKSAlPiUgCiAgRFQ6OmRhdGF0YWJsZSgKICAgIGNhcHRpb24gPSAiUyZQIDUwMCBSYXcgRGF0YSIsCiAgICBvcHRpb25zID0gbGlzdCgKICAgICAgZG9tID0gJ3RpcCcsCiAgICAgIHBhZ2VMZW5ndGggPSAxMCwKICAgICAgYXV0b1dpZHRoID0gVFJVRQogICAgKSwKICAgIHJvd25hbWVzID0gRkFMU0UpICU+JSAKICBmb3JtYXRSb3VuZChjb2x1bW5zID0gMjo1LGRpZ2l0cyA9IDIpCmBgYAoKTGV0J3MgdXNlIGBnZ3Bsb3RgIHRvIHBsb3QgdGhlIGBjbG9zZWAgY29sdW1uIHZzIGBkYXRlYCAgKGZpbGwgaW4gdGhlIG1pc3NpbmcgY29kZSkKCmBgYHtyfQpzcCAlPiUgIAogIGdncGxvdChhZXMoeCA9IGRhdGUsIHkgPSBjbG9zZSkpKwogIGdlb21fbGluZShjb2xvcj0gInN0ZWVsYmx1ZSIpKwogIHNjYWxlX3hfZGF0ZShicmVha3MgPSBwcmV0dHlfYnJlYWtzKDgpKSsKICBzY2FsZV95X2NvbnRpbnVvdXMoYnJlYWtzID0gcHJldHR5X2JyZWFrcyg4KSwgbGFiZWxzID0gY29tbWFfZm9ybWF0KHByZWZpeCA9ICIkIikpKwogIGxhYnMoeCA9ICIiLCB5PSIiLCB0aXRsZSA9ICJTJlAgNTAwIEluZGV4IikrCiAgdGhlbWVfYncoKQpgYGAKCgpBIHF1ZXN0aW9uOiBob3cgbXVjaCBkaWQgdGhlIGluZGV4IFMmUCA1MDAgZ2FpbiBzaW5jZSBKYW4gMSwgMTk1MD8KCmBgYHtyfQpzcCAlPiUgCiAgc3VtbWFyaXNlKGdhaW4gPSBsYXN0KGNsb3NlKS9maXJzdChjbG9zZSkpCmBgYAoKQW5udWFsIHJldHVybnMgY2FuIGJlIGNhbGN1bGF0ZWQgYXMgdGhlIGZvbGxvd2luZwoKYGBge3J9CnNwX2FubnVhbF9yZXR1cm5zIDwtIAogIHNwICU+JQogIHRxX3RyYW5zbXV0ZShzZWxlY3QgICAgID0gY2xvc2UsIAogICAgICAgICAgICAgICBtdXRhdGVfZnVuID0gYW5udWFsUmV0dXJuLAogICAgICAgICAgICAgICBjb2xfcmVuYW1lID0gImdhaW4iKSAlPiUgCiAgbXV0YXRlKHllYXI9IHllYXIoZGF0ZSkpCgoKCmBgYAoKCmBgYHtyLCBtZXNzYWdlID0gRkFMU0V9CnNwX2FubnVhbF9yZXR1cm5zICU+JSAKICBtdXRhdGUoaXNfcG9zID0gZ2FpbiA+IDApICU+JSAKICBnZ3Bsb3QoYWVzKHggPSB5ZWFyLCB5ID0gZ2FpbikpKwogIGdlb21fY29sKGFlcyhmaWxsID0gaXNfcG9zKSwgd2lkdGggPSAxLHNob3cubGVnZW5kID0gRkFMU0UpKwogIHNjYWxlX3hfY29udGludW91cyhicmVha3MgPSBwcmV0dHlfYnJlYWtzKDgpKSsKICBzY2FsZV95X2NvbnRpbnVvdXMoYnJlYWtzID0gcHJldHR5X2JyZWFrcyg4KSwgbGFiZWxzID0gcGVyY2VudF9mb3JtYXQoYWNjdXJhY3kgPSAxKSkrCiAgbGFicyh4ID0gIiIsIHk9IiIsIHRpdGxlID0gIkFubnVhbCBSZXR1cm5zIFMmUCA1MDAgSW5kZXgiKSsKICB0aGVtZV9idygpKwogIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIsIHNlID0gRkFMU0UsIGx0eSA9ICJkYXNoZWQiLCBzaXplICA9IDAuNSkKYGBgCgpBdmVyYWdlIGFubnVhbCByZXR1cm4KCmBgYHtyfQpzcF9hbm51YWxfcmV0dXJucyAlPiUgCiAgZmlsdGVyKHllYXIgPCAyMDIwKSAlPiUgCiAgc3VtbWFyaXNlKGF2ZXJhZ2VfZ2FpbiA9IHBlcmNlbnQobWVhbihnYWluKSxhY2N1cmFjeSA9IDAuMDEpKQoKYGBgCgojIFN0b2NrcyBjb21wcmlzaW5nIHRoZSBTJlA1MDAgSW5kZXgKCmBgYHtyLCBtZXNzYWdlPUZBTFNFfQpkZiA8LSB0cV9pbmRleCgiU1A1MDAiKSAlPiUgCiAgc2VsZWN0KHN5bWJvbCwgY29tcGFueSwgd2VpZ2h0LCBzZWN0b3IpCgpkZiAlPiUgCiAgRFQ6OmRhdGF0YWJsZSgKICAgIGNhcHRpb24gPSAiUyZQIDUwMCBTdG9ja3MiLAogICAgZmlsdGVyID0gInRvcCIsCiAgICBvcHRpb25zID0gbGlzdCgKICAgICAgZG9tID0gJ3RpcCcsCiAgICAgIHBhZ2VMZW5ndGggPSAxMCwKICAgICAgYXV0b1dpZHRoID0gVFJVRQogICAgKSwKICAgIHJvd25hbWVzID0gRkFMU0UpICU+JSAKICBmb3JtYXRQZXJjZW50YWdlKGNvbHVtbnMgPSAzLGRpZ2l0cyA9IDIpCmBgYAoKTGV0J3Mgbm93IGZpbmQgdGhlIGNvbXBvc2l0aW9uIG9mIHRoZSBpbmRleCBiYXNlZCBvbiB2YXJpb3VzIHNlY3RvcnMKCmBgYHtyLCBtZXNzYWdlPUZBTFNFfQpkZiAlPiUgCiAgZ3JvdXBfYnkoc2VjdG9yKSAlPiUgCiAgc3VtbWFyaXNlKE49IG4oKSwKICAgICAgICAgICAgd2VpZ2h0ID0gc3VtKHdlaWdodCkpICU+JSAKICBhcnJhbmdlKC13ZWlnaHQpICU+JSAKICBEVDo6ZGF0YXRhYmxlKAogICAgY2FwdGlvbiA9ICJTJlAgNTAwIEJyZWFrZG93biBieSBTZWN0b3IiLAogICAgb3B0aW9ucyA9IGxpc3QoCiAgICAgIGRvbSA9ICd0JywKICAgICAgcGFnZUxlbmd0aCA9IDExLAogICAgICBhdXRvV2lkdGggPSBUUlVFCiAgICApLAogICAgcm93bmFtZXMgPSBGQUxTRSkgJT4lIAogIGZvcm1hdFBlcmNlbnRhZ2UoY29sdW1ucyA9IDMsZGlnaXRzID0gMikKYGBgCgpgYGB7ciwgZmlnLmhlaWdodD0gMTAsIGZpZy53aWR0aCA9IDEyfQpkZiAlPiUgCiAgZ2dwbG90KGFlcyhhcmVhID0gd2VpZ2h0LCBmaWxsID0gZmN0X3Jlb3JkZXIoc2VjdG9yLCB3ZWlnaHQsIC5mdW4gPSBzdW0sLmRlc2MgPSBUUlVFKSxsYWJlbCA9IHN5bWJvbCwgc3ViZ3JvdXAgPSBzZWN0b3IpKSsKICBnZW9tX3RyZWVtYXAoYWxwaGEgPSAwLjUsIHN0YXJ0ID0gInRvcGxlZnQiKSsKICBnZW9tX3RyZWVtYXBfdGV4dChzdGFydCA9ICJ0b3BsZWZ0Iiwgc2l6ZSA9IDEyKSsKICBzY2FsZV9maWxsX3ZpcmlkaXNfZChuYW1lID0gIlNlY3RvciIpCmBgYAoKCldoaWNoIHNlY3RvciBwZXJmb3JtZWQgYmVzdCBzaW5jZSB0aGUgYmVnaW5uaW5nIG9mIHRoZSBtb250aD8KCgpgYGB7ciwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KcXQ8LSAKICBkZiAlPiUgCiAgcHVsbChzeW1ib2wpICU+JSAKICB0cV9nZXQoZnJvbT0iMjAyMC0wNi0wMSIpCgpgYGAKCmBgYHtyLCBtZXNzYWdlPUZBTFNFLHdhcm5pbmc9RkFMU0V9CmRmaiA8LSAKICBxdCAlPiUgCiAgZmlsdGVyKGRhdGUgPiB0b2RheSgpLWRkYXlzKDcpKSAlPiUgCiAgZ3JvdXBfYnkoc3ltYm9sKSAlPiUgCiAgc3VtbWFyaXNlKGdhaW4gPSBsYXN0KGNsb3NlKS9maXJzdChjbG9zZSktMSkgJT4lIAogIGxlZnRfam9pbihkZiwgYnkgPSAic3ltYm9sIikKYGBgCgpgYGB7cixtZXNzYWdlPUZBTFNFfQpkZmogJT4lIAogIGdyb3VwX2J5KHNlY3RvcikgJT4lIAogIHN1bW1hcmlzZShhdmVyYWdlX2dhaW4gPSBzdW0oZ2Fpbip3ZWlnaHQsbmEucm0gPSBUUlVFKS9zdW0od2VpZ2h0LG5hLnJtID0gVFJVRSkpICU+JSAKICBhcnJhbmdlKC1hdmVyYWdlX2dhaW4pICU+JSAKICBEVDo6ZGF0YXRhYmxlKAogICAgY2FwdGlvbiA9ICJMYXN0IFdlZWsgUGVyZm9tYW5jZSBieSBTZWN0b3IiLAogICAgd2lkdGggPSAiNTAwcHgiLAogICAgb3B0aW9ucyA9IGxpc3QoCiAgICAgIGRvbSA9ICd0JywKICAgICAgcGFnZUxlbmd0aCA9IDExLAogICAgICBhdXRvV2lkdGggPSBUUlVFCiAgICApLAogICAgcm93bmFtZXMgPSBGQUxTRSkgJT4lIAogIGZvcm1hdFBlcmNlbnRhZ2UoY29sdW1ucyA9IDIsZGlnaXRzID0gMikKYGBgCgpXaGljaCBzdG9ja3MgaGFkIHRoZSBoaWdoZXN0IGdhaW4gaW4gZWFjaCBzZWN0b3I/CgpgYGB7cn0KZGZqICU+JSAKICBncm91cF9ieShzZWN0b3IpICU+JSAKICB0b3BfbigxLGdhaW4pICU+JSAKICBzZWxlY3Qoc3ltYm9sLCBjb21wYW55LCBnYWluLCBzZWN0b3IpICU+JSAKICBhcnJhbmdlKC1nYWluKSAlPiUgCiAgRFQ6OmRhdGF0YWJsZSgKICAgIGNhcHRpb24gPSAiQmVzdCBTdG9ja3MgYnkgU2VjdG9yIGluIEp1bmUiLAogICAgd2lkdGggPSAiNTAwcHgiLAogICAgb3B0aW9ucyA9IGxpc3QoCiAgICAgIGRvbSA9ICd0JywKICAgICAgcGFnZUxlbmd0aCA9IDExLAogICAgICBhdXRvV2lkdGggPSBUUlVFCiAgICApLAogICAgcm93bmFtZXMgPSBGQUxTRSkgJT4lIAogIGZvcm1hdFBlcmNlbnRhZ2UoY29sdW1ucyA9IDMsZGlnaXRzID0gMikKICAKYGBgCgoKIyBJbmRpdmlkdWFsIFN0b2NrcyBQZXJmb3JtYW5jZQoKIyMgQ2FuZGxlc3RpY2sgUGxvdAoKYGBge3IsZmlnLndpZHRoPTEyLCBmaWcuaGVpZ2h0PTR9CmMoIkFBUEwiKSAlPiUgCiAgdHFfZ2V0KGZyb20gPSAiMjAyMC0wMS0wMSIpICU+JSAKICBnZ3Bsb3QoYWVzKGRhdGUsIGNsb3NlKSkrCiAgZ2VvbV9jYW5kbGVzdGljayhhZXMob3BlbiA9IG9wZW4sIGhpZ2ggPSBoaWdoLCBsb3cgPSBsb3csIGNsb3NlID0gY2xvc2UpKSArCiAgc2NhbGVfeV9jb250aW51b3VzKGJyZWFrcyA9IGJyZWFrc19leHRlbmRlZCg2KSwgbGFiZWxzID0gY29tbWFfZm9ybWF0KHByZWZpeCA9ICIkIikpKwogIHNjYWxlX3hfZGF0ZShicmVha3MgPSBwcmV0dHlfYnJlYWtzKDYpKSsKICBmYWNldF93cmFwKH5zeW1ib2wsIHNjYWxlcyA9ICJmcmVlX3kiKSArCiAgdGhlbWVfdHEoKSsKICBsYWJzKHg9ICIiLHk9ICIiKQogIApgYGAKCgoKCgoKCgo=