Introduction
In this report we are going to analyze babynames
dataset to learn about names popularity over time and sharpen our data analysis skills using R!
Data
This is the dataset we will be using in this report (first 1000 rows)
babynames
The dataset has 1924665 rows
The dataset has 97310 unique names
The dataset spans a period from 1880 to 2017
my_year <- 2007
Most Popular Baby Names Born in 2007
Boys
boys <-
babynames %>%
filter(sex=="M", year == my_year) %>%
top_n(10,prop) %>%
arrange(-prop) %>%
select(name, prop)
boys
boys %>%
ggplot(aes(x = fct_reorder(name, -prop), y =prop)) +
geom_col(fill = "steelblue", alpha = 0.5)+
theme_bw()+
labs(title = glue("Most Popular Boys Names born in {my_year}"),
x="",
y = "Average Proportion")+
scale_y_continuous(labels = scales::percent)
Girls
# Write code to find top 10 girls names born in your year
girls <-
babynames %>%
filter(sex=="F", year == my_year) %>%
top_n(10,prop) %>%
arrange(-prop) %>%
select(name, prop)
girls
NA
# Write code to plot top 10 girls names born in your year
girls %>%
ggplot(aes(x = fct_reorder(name, -prop), y = prop)) +
geom_col(fill = "pink", alpha = 0.5)+
theme_bw()+
labs(title = glue("Most Popular Girls Names born in {my_year}"),
x="",
y = "Average Proportion")+
scale_y_continuous(labels = scales::percent)
Popularity of My Name Over Time
my_decade <- 1970
Most popular names in 2000
names_by_decade <-
babynames %>%
mutate(decade = floor(year/10)*10) %>%
filter(decade == my_decade) %>%
group_by(sex, name) %>%
summarise(prop = mean(prop)) %>%
group_by(sex) %>%
top_n(10, prop) %>%
arrange(-prop) %>%
ungroup() %>%
mutate(sex = recode(sex, "M" = "Boys","F" = "Girls"))
names_by_decade %>%
ggplot(aes(x = fct_reorder(name, prop), y =prop)) +
geom_col(aes(fill = sex), alpha = 0.7,show.legend = FALSE)+
geom_text(aes(label = percent(prop,accuracy = 0.01)), color = "darkblue", nudge_y = -0.003)+
labs(title = glue("Most Popular Baby Names born in {my_decade}'s"),
x="",
y = "Average Popularity")+
scale_y_continuous(labels = percent)+
coord_flip()+
facet_wrap(~sex,scales = "free_y")+
theme_bw()+
theme(text = element_text(size = 14))+
scale_fill_manual(values = c("Girls"= "pink","Boys" = "steelblue"))
LS0tCnRpdGxlOiAiQmFieSBOYW1lcyIKYXV0aG9yOiAiQWxleCIKZGF0ZTogJ2ByIGZvcm1hdChTeXMudGltZSgpLCAiJUIgJWQsICVZICVIOiVNOiVTIiwgdHo9IkFtZXJpY2EvTG9zX0FuZ2VsZXMiLHVzZXR6PVRSVUUpYCcKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUKICAgIG51bWJlcl9zZWN0aW9uczogeWVzCiAgICB0b2M6IHllcwotLS0KCmBgYHtyfQpzdXBwcmVzc1BhY2thZ2VTdGFydHVwTWVzc2FnZXMoewogIGxpYnJhcnkodGlkeXZlcnNlKQogIGxpYnJhcnkoYmFieW5hbWVzKQogIGxpYnJhcnkoZ2x1ZSkKICBsaWJyYXJ5KHNjYWxlcykKfSkKYGBgCgoKCiMgSW50cm9kdWN0aW9uCgpJbiB0aGlzIHJlcG9ydCB3ZSBhcmUgZ29pbmcgdG8gYW5hbHl6ZSBgYmFieW5hbWVzYCBkYXRhc2V0IHRvIGxlYXJuIGFib3V0IG5hbWVzIHBvcHVsYXJpdHkgb3ZlciB0aW1lIGFuZCBzaGFycGVuIG91ciBkYXRhIGFuYWx5c2lzIHNraWxscyB1c2luZyBSIQoKIyBEYXRhCgpUaGlzIGlzIHRoZSBkYXRhc2V0IHdlIHdpbGwgYmUgdXNpbmcgaW4gdGhpcyByZXBvcnQgKGZpcnN0IDEwMDAgcm93cykKCgoKYGBge3J9CmJhYnluYW1lcyAKYGBgCgotIFRoZSBkYXRhc2V0IGhhcyBgciBucm93KGJhYnluYW1lcylgIHJvd3MKCi0gVGhlIGRhdGFzZXQgaGFzIGByIG5fZGlzdGluY3QoYmFieW5hbWVzJG5hbWUpYCB1bmlxdWUgbmFtZXMKCi0gVGhlIGRhdGFzZXQgc3BhbnMgYSBwZXJpb2QgZnJvbSBgciBtaW4oYmFieW5hbWVzJHllYXIpYCB0byBgciBtYXgoYmFieW5hbWVzJHllYXIpYAoKCmBgYHtyfQpteV95ZWFyIDwtIDIwMDcKYGBgCgojIE1vc3QgUG9wdWxhciBCYWJ5IE5hbWVzIEJvcm4gaW4gYHIgbXlfeWVhcmAKCiMjIEJveXMKCmBgYHtyfQpib3lzIDwtIAogIGJhYnluYW1lcyAlPiUgCiAgZmlsdGVyKHNleD09Ik0iLCB5ZWFyID09IG15X3llYXIpICU+JSAKICB0b3BfbigxMCxwcm9wKSAlPiUgCiAgYXJyYW5nZSgtcHJvcCkgJT4lIAogIHNlbGVjdChuYW1lLCBwcm9wKQoKYm95cwpgYGAKCmBgYHtyfQpib3lzICU+JSAKICBnZ3Bsb3QoYWVzKHggPSBmY3RfcmVvcmRlcihuYW1lLCAtcHJvcCksIHkgPXByb3ApKSArCiAgZ2VvbV9jb2woZmlsbCA9ICJzdGVlbGJsdWUiLCBhbHBoYSA9IDAuNSkrCiAgdGhlbWVfYncoKSsKICBsYWJzKHRpdGxlID0gZ2x1ZSgiTW9zdCBQb3B1bGFyIEJveXMgTmFtZXMgYm9ybiBpbiB7bXlfeWVhcn0iKSwgCiAgICAgICB4PSIiLAogICAgICAgeSA9ICJBdmVyYWdlIFByb3BvcnRpb24iKSsKICBzY2FsZV95X2NvbnRpbnVvdXMobGFiZWxzID0gc2NhbGVzOjpwZXJjZW50KQpgYGAKCiMjIEdpcmxzCgoKYGBge3J9CiMgV3JpdGUgY29kZSB0byBmaW5kIHRvcCAxMCBnaXJscyBuYW1lcyBib3JuIGluIHlvdXIgeWVhciAKZ2lybHMgPC0gCiAgYmFieW5hbWVzICU+JSAKICBmaWx0ZXIoc2V4PT0iRiIsIHllYXIgPT0gbXlfeWVhcikgJT4lIAogIHRvcF9uKDEwLHByb3ApICU+JSAKICBhcnJhbmdlKC1wcm9wKSAlPiUgCiAgc2VsZWN0KG5hbWUsIHByb3ApCgpnaXJscwogIApgYGAKCmBgYHtyfQojIFdyaXRlIGNvZGUgdG8gcGxvdCB0b3AgMTAgZ2lybHMgbmFtZXMgYm9ybiBpbiB5b3VyIHllYXIKZ2lybHMgJT4lIAogIGdncGxvdChhZXMoeCA9IGZjdF9yZW9yZGVyKG5hbWUsIC1wcm9wKSwgeSA9IHByb3ApKSArCiAgZ2VvbV9jb2woZmlsbCA9ICJwaW5rIiwgYWxwaGEgPSAwLjUpKwogIHRoZW1lX2J3KCkrCiAgbGFicyh0aXRsZSA9IGdsdWUoIk1vc3QgUG9wdWxhciBHaXJscyBOYW1lcyBib3JuIGluIHtteV95ZWFyfSIpLCAKICAgICAgIHg9IiIsCiAgICAgICB5ID0gIkF2ZXJhZ2UgUHJvcG9ydGlvbiIpKwogIHNjYWxlX3lfY29udGludW91cyhsYWJlbHMgPSBzY2FsZXM6OnBlcmNlbnQpCmBgYAoKIyBQb3B1bGFyaXR5IG9mIE15IE5hbWUgT3ZlciBUaW1lCgoKYGBge3J9Cm15X25hbWUgPC0gIkFsZXgiCgoKZGZfbXluYW1lIDwtCiAgYmFieW5hbWVzICU+JSAKICBmaWx0ZXIobmFtZSA9PSBteV9uYW1lLCBzZXggPT0gIk0iKQoKcGVhayA8LSAKICBkZl9teW5hbWUgJT4lIAogIGZpbHRlcihwcm9wID09IG1heChwcm9wKSkKCmRmX215bmFtZSAlPiUgCiAgZ2dwbG90KGFlcyh4ID0geWVhciwgeSA9IHByb3ApKSsKICBnZW9tX2xpbmUoY29sb3IgPSAicm95YWxibHVlIiwgc2l6ZSA9IDEpKwogIGdlb21fdGV4dChkYXRhID0gcGVhaywgYWVzKGxhYmVsID0gZ2x1ZSgicGVhayBwb3B1bGFyaXR5OiB7eWVhcn0iKSksbnVkZ2VfeSA9IDAuMDAwMSwgY29sb3IgPSAiZGFya2dyZWVuIikgKwogIHNjYWxlX3lfY29udGludW91cyhsYWJlbCA9IHBlcmNlbnQsIGJyZWFrcyA9IHByZXR0eV9icmVha3MoOCkpKwogIHNjYWxlX3hfY29udGludW91cyhicmVha3MgPSBwcmV0dHlfYnJlYWtzKDgpKSsKICBsYWJzKHRpdGxlID0gZ2x1ZSgiUG9wdWxhcml0eSBvZiB0aGUgbmFtZSB7bXlfbmFtZX0iKSwKICAgICAgIHggPSAiWWVhciIsCiAgICAgICB5ID0gIlBvcHVsYXJpdHkiKSsKICB0aGVtZV9idygpCiAgCiN1c2UgZ2dwbG90IHRvIHBsb3QgcG9wdWxhcml0eSBvZiB5b3VyIG5hbWUgb3ZlciB0aW1lLiBUcnkgdG8gbWFrZSB5b3UgZ3JhcGggcHJldHR5IQoKYGBgCgoKYGBge3J9Cm15X2RlY2FkZSA8LSAyMDAwCmBgYAoKIyBNb3N0IHBvcHVsYXIgbmFtZXMgaW4gYHIgbXlfZGVjYWRlYCAKCmBgYHtyLCBmaWcud2lkdGg9MTAsIGZpZy5oZWlnaHQgPSA2fQoKbmFtZXNfYnlfZGVjYWRlIDwtCiAgYmFieW5hbWVzICU+JSAKICBtdXRhdGUoZGVjYWRlID0gZmxvb3IoeWVhci8xMCkqMTApICU+JSAKICBmaWx0ZXIoZGVjYWRlID09IG15X2RlY2FkZSkgJT4lCiAgZ3JvdXBfYnkoc2V4LCBuYW1lKSAlPiUgCiAgc3VtbWFyaXNlKHByb3AgPSBtZWFuKHByb3ApKSAlPiUgICAKICBncm91cF9ieShzZXgpICU+JSAKICB0b3BfbigxMCwgcHJvcCkgJT4lIAogIGFycmFuZ2UoLXByb3ApICU+JSAKICB1bmdyb3VwKCkgJT4lIAogIG11dGF0ZShzZXggPSByZWNvZGUoc2V4LCAiTSIgPSAiQm95cyIsIkYiID0gIkdpcmxzIikpCgoKbmFtZXNfYnlfZGVjYWRlICU+JSAKICBnZ3Bsb3QoYWVzKHggPSBmY3RfcmVvcmRlcihuYW1lLCBwcm9wKSwgeSA9cHJvcCkpICsKICBnZW9tX2NvbChhZXMoZmlsbCA9IHNleCksIGFscGhhID0gMC43LHNob3cubGVnZW5kID0gRkFMU0UpKwogIGdlb21fdGV4dChhZXMobGFiZWwgPSBwZXJjZW50KHByb3AsYWNjdXJhY3kgPSAwLjAxKSksIGNvbG9yID0gImRhcmtibHVlIiwgbnVkZ2VfeSA9IC0wLjAwMykrCiAgbGFicyh0aXRsZSA9IGdsdWUoIk1vc3QgUG9wdWxhciBCYWJ5IE5hbWVzIGJvcm4gaW4ge215X2RlY2FkZX0ncyIpLCAKICAgICAgIHg9IiIsCiAgICAgICB5ID0gIkF2ZXJhZ2UgUG9wdWxhcml0eSIpKwogIHNjYWxlX3lfY29udGludW91cyhsYWJlbHMgPSBwZXJjZW50KSsKICBjb29yZF9mbGlwKCkrCiAgZmFjZXRfd3JhcCh+c2V4LHNjYWxlcyA9ICJmcmVlX3kiKSsKICB0aGVtZV9idygpKwogIHRoZW1lKHRleHQgPSBlbGVtZW50X3RleHQoc2l6ZSA9IDE0KSkrCiAgc2NhbGVfZmlsbF9tYW51YWwodmFsdWVzID0gYygiR2lybHMiPSAicGluayIsIkJveXMiID0gInN0ZWVsYmx1ZSIpKQpgYGAKCgpgYGB7ciwgZmlnLndpZHRoPTEwLCBmaWcuaGVpZ2h0PTZ9Cm5hbWVzX2J5X2RlY2FkZSAlPiUgCiAgZ2dwbG90KGFlcyh4ID0gZmN0X3Jlb3JkZXIobmFtZSwgcHJvcCksIHkgPXByb3ApKSArCiAgZ2VvbV9jb2woYWVzKGZpbGwgPSBzZXgpLCBhbHBoYSA9IDAuNykrCiAgZmFjZXRfd3JhcCh+c2V4LHNjYWxlcyA9ICJmcmVlX3kiKSsKICBjb29yZF9mbGlwKCkrCiAgZ2VvbV90ZXh0KGFlcyhsYWJlbCA9IHBlcmNlbnQocHJvcCxhY2N1cmFjeSA9IDAuMDEpKSwgY29sb3IgPSAiZGFya2JsdWUiLG51ZGdlX3kgPSAtMC4wMDMpCmBgYAoKCg==