Figure 1: Bump chart illustrating the ranking of U.S. National Parks over time (1904-2016) by the count of visitors. This chart highlights the top 5 national parks as of 2016, and shows their relative popularity over time.

1. Load Packages & Setup

Show code

if (!require("pacman")) install.packages("pacman")
pacman::p_load(
    tidyverse,
    tidytuesdayR,
    dlookr,
    ggtext,
    gt,
    gtExtras,  #for font awesome icons in gt tables
    ggbump,
    showtext,
    janitor,   #for clean_names()
    scales,
    htmltools, #for tagList()
    glue,
    here,
    geomtextpath
)    

font_add('fa-brands', 'fonts/Font Awesome 6 Brands-Regular-400.otf')

sysfonts::font_add_google("Lato","lato")
showtext::showtext_auto()
showtext::showtext_opts(dpi=300)

2. Read in the Data

Show code

tt_year <- 2019
tt_week <- 38

tuesdata <- tidytuesdayR::tt_load(tt_year, week = tt_week)

df <- tuesdata$national_parks

3. Examine the Data

Show code

df %>% 
  glimpse()

Rows: 21,560
Columns: 12
$ year              <chr> "1904", "1941", "1961", "1935", "1982", "1919", "196…
$ gnis_id           <dbl> 1163670, 1531834, 2055170, 1530459, 277263, 578853, …
$ geometry          <chr> "POLYGON", "MULTIPOLYGON", "MULTIPOLYGON", "MULTIPOL…
$ metadata          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ number_of_records <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ parkname          <chr> "Crater Lake", "Lake Roosevelt", "Lewis and Clark", …
$ region            <chr> "PW", "PW", "PW", "PW", "PW", "NE", "IM", "NE", "IM"…
$ state             <chr> "OR", "WA", "WA", "WA", "CA", "ME", "TX", "MD", "TX"…
$ unit_code         <chr> "CRLA", "LARO", "LEWI", "OLYM", "SAMO", "ACAD", "AMI…
$ unit_name         <chr> "Crater Lake National Park", "Lake Roosevelt Nationa…
$ unit_type         <chr> "National Park", "National Recreation Area", "Nation…
$ visitors          <dbl> 1500, 0, 69000, 2200, 468144, 64000, 448000, 738700,…

It’s important to note that there are records for sites other than U.S. National Parks, which can be identified by the unit_type field.

Show code

df %>% 
  diagnose_category() %>% 
  filter(variables == 'unit_type')

# A tibble: 10 × 6
   variables levels                       N  freq ratio  rank
   <chr>     <chr>                    <int> <int> <dbl> <int>
 1 unit_type National Monument        21560  5051 23.4      1
 2 unit_type National Park            21560  4743 22.0      2
 3 unit_type National Historic Site   21560  3274 15.2      3
 4 unit_type National Historical Park 21560  1984  9.20     4
 5 unit_type National Memorial        21560  1486  6.89     5
 6 unit_type National Battlefield     21560   859  3.98     6
 7 unit_type National Recreation Area 21560   806  3.74     7
 8 unit_type National Military Park   21560   613  2.84     8
 9 unit_type National Preserve        21560   559  2.59     9
10 unit_type National Seashore        21560   494  2.29    10

4. Tidy the Data

Show code

df_annual <- df %>% 
  filter(year != 'Total',
         unit_type == 'National Park') %>% 
  mutate(year = as.numeric(year)) %>% 
  mutate(decade = as.factor(year - (year %% 10))) %>% 
  arrange(year) %>% 
  group_by(year) %>% 
  mutate(annual_visitor_rank = as.integer(rank(-visitors))) %>% 
  select(annual_visitor_rank, year, unit_name, visitors, everything()) %>% 
  arrange(year, annual_visitor_rank) %>% 
  ungroup() %>% 
  separate(col=unit_name, into = c("parkname_full", "parktype"), sep = "National Park",
           remove=FALSE)

highlight_list_annual <- df_annual %>% 
  filter(year ==2016,
         annual_visitor_rank <=5) %>% 
  pull(unit_name)

df_decade <- df %>% 
  filter(unit_type == 'National Park',
         year != 'Total') %>% 
  mutate(year = as.numeric(year)) %>% 
  mutate(decade = year - (year %% 10)) %>% 
  group_by(decade, unit_name) %>% 
  summarise(visitors_by_decade = sum(visitors, na.rm = TRUE),
            .groups = 'drop') %>% 
  group_by(decade) %>% 
  mutate(rank_visitors_by_decade = as.integer(rank(-visitors_by_decade))) %>% 
  ungroup() %>% 
  separate(col=unit_name, into = c("parkname_full", "parktype"), sep = "National Park",
           remove=FALSE)

top_1900s <- df_decade %>% filter(decade == 1900) %>% arrange(rank_visitors_by_decade) %>% head(5) %>% pull(unit_name)

top_2010s <- df_decade %>% filter(decade == 2010) %>% arrange(rank_visitors_by_decade) %>% head(5) %>% pull(unit_name)

5. Visualization Parameters

Show code

my_theme <- theme(
  text = element_text(family = 'lato'), 
  plot.title = element_textbox_simple(color="black", face="bold", size=20, hjust=0), 
  plot.subtitle = element_textbox_simple(color="black", size=12, hjust=0), 
  axis.title = element_blank(), 
  axis.text = element_blank(), 
  axis.ticks = element_blank(),
  axis.line = element_blank(), 
  plot.caption = element_textbox_simple(color="black", size=12), 
  panel.background =  element_blank(), 
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  panel.border = element_blank(), 
  legend.title=element_blank(), 
  legend.text = element_text(color="black", size=12, hjust=0),
  legend.position = 'top',
  strip.text = element_text(color="black", size=14))

title <- tagList(p('Ranking of popularity of U.S. National Parks'))

subtitle <- tagList(span('*by the number of visitors annually*'))

caption <- paste0("<span style='font-family:lato;'>**Source**: TidyTuesday Week 38 (2019)</span><br>",
                "<span style='font-family:fa-brands;'>&#xf099;</span>",
                 "<span style='font-family:lato;'>@mickey.rafa</span>",
                 "<span style='font-family:lato;color:white;'>....</span>",
                 "<span style='font-family:fa-brands;'>&#xf09b;</span>",
                 "<span style='font-family:lato;color:white;'>.</span>",
                 "<span style='font-family:lato;'>mrafa3</span>")

description_color <- 'grey40'

subtitle_2 <- tagList(span('*by the number of visitors by decade*'))

6. Plot

For the first plot, I wanted to replicate the bump chart created by FiveThirtyEight, which shows the ranking of U.S. national parks by annual visitors.

Show code

(plot_viz_538 <- df_annual %>% 
  ggplot(.,
         aes(x=year,
             y=-annual_visitor_rank,
             group=unit_name,
             color=unit_name)) + 
  geom_line(color='gray80') + 
  geom_line(data=. %>% filter(unit_name %in% highlight_list_annual)) + 
  ggrepel::geom_text_repel(
    data = df_annual %>% filter(year == 2016, unit_name %in% highlight_list_annual),
    aes(label = paste("#", annual_visitor_rank, parkname_full)),
    nudge_x = 15,
    size = 3,
    direction = 'y',
    fontface = 'bold'
  ) + 
  labs(x='',
       title = title,
       subtitle = subtitle,
       caption = caption) + 
  coord_cartesian(xlim = c(1900, 2040), ylim = c(-65, 2), expand = F) + 
  my_theme + 
  theme(legend.position = 'none'))

I like this high-level plot, and I think it’s effective if you want to highlight individual parks (or show a top 5, as I’ve done), but there is plenty of noise. In the next plot, I aggregated visitors by decade, and I’m only showing parks that were in the top five in the first decade of the dataset or the last decade. This strips out the noise and shows some interesting changes in park rankings over time.

Show code

(plot_viz_decade_bump <- df_decade %>% 
  filter(unit_name %in% c(top_1900s, top_2010s),
         decade >= 1900) %>% 
  ggplot(.,
         aes(x=decade, 
             y=-rank_visitors_by_decade,
             col=unit_name)) + 
  geom_point(shape = '|', stroke = 6) + 
  geom_bump(linewidth = 1) + 
  ggrepel::geom_text_repel(
    data = df_decade %>% filter(decade == 1900, unit_name %in% top_1900s),
    aes(label = paste('#',rank_visitors_by_decade, " ", parkname_full, sep = "")),
    #nudge_x = -1,
    hjust = 1, 
    size = 4, 
    direction = "y", 
    fontface = 'bold'
  ) + 
  ggrepel::geom_text_repel(
    data = df_decade %>% filter(decade == 2010, unit_name %in% top_2010s),
    aes(label = paste('#',rank_visitors_by_decade, " ", parkname_full, sep = "")),
    hjust = 0,
    nudge_x = 1,
    size = 4,
    direction = "y", 
    fontface = 'bold'
  ) + 
  geom_text(
    data = df_decade %>% filter(decade == 2010, unit_name %in% c('Hot Springs National Park', 'Wind Cave National Park', 'Crater Lake National Park')),
    aes(label = paste('#',rank_visitors_by_decade, " ", parkname_full, sep = "")),
    hjust = 0,
    nudge_x = 1,
    size = 4,
    fontface = 'bold'
  ) + 
  annotate(
    'text',
    x = c(1898, 2012),
    y = c(5, 5),
    label = c('1900s', '2010s'),
    hjust = c(0, 1),
    vjust = 1,
    size = 6,
    fontface = 'bold') + 
  coord_cartesian(xlim = c(1860, 2070), ylim = c(-45, 10), expand = F) + 
  #theme_void() + 
  my_theme + 
  theme(legend.position = 'none',
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
    text = element_text(
      color = description_color
    )
  ) +
  labs(
    title = title,
    subtitle = subtitle_2,
    caption = caption
  ))

7. Save

Show code

# Save the plot as PNG
ggsave(
  filename = glue("tt_{tt_year}_{tt_week}.png"), 
  plot = plot_viz_538,
  width = 10, height = 8, units = "in", dpi = 320
)

# make thumbnail for page
magick::image_read(glue("tt_{tt_year}_{tt_week}.png")) %>% 
  magick::image_resize(geometry = "400") %>% 
  magick::image_write(glue("tt_{tt_year}_{tt_week}_thumbnail.png"))

8. Session Info

Expand for Session Info

R version 4.4.0 (2024-04-24)
Platform: aarch64-apple-darwin20
Running under: macOS Sonoma 14.6.1

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: America/Denver
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] geomtextpath_0.1.4 here_1.0.1         glue_1.8.0         htmltools_0.5.8.1 
 [5] scales_1.3.0       janitor_2.2.0      showtext_0.9-7     showtextdb_3.0    
 [9] sysfonts_0.8.9     ggbump_0.1.0       gtExtras_0.5.0     gt_0.11.1         
[13] ggtext_0.1.2       dlookr_0.6.3       tidytuesdayR_1.1.2 lubridate_1.9.3   
[17] forcats_1.0.0      stringr_1.5.1      dplyr_1.1.4        purrr_1.0.2       
[21] readr_2.1.5        tidyr_1.3.1        tibble_3.2.1       ggplot2_3.5.1     
[25] tidyverse_2.0.0    pacman_0.5.1      

loaded via a namespace (and not attached):
 [1] tidyselect_1.2.1        viridisLite_0.4.2       farver_2.1.2           
 [4] fastmap_1.2.0           gh_1.4.1                pagedown_0.21          
 [7] fontquiver_0.2.1        promises_1.3.0          digest_0.6.37          
[10] timechange_0.3.0        mime_0.12               lifecycle_1.0.4        
[13] magrittr_2.0.3          compiler_4.4.0          rlang_1.1.4            
[16] tools_4.4.0             utf8_1.2.4              yaml_2.3.10            
[19] knitr_1.48              labeling_0.4.3          htmlwidgets_1.6.4      
[22] bit_4.5.0               curl_5.2.3              xml2_1.3.6             
[25] withr_3.0.1             grid_4.4.0              fansi_1.0.6            
[28] gdtools_0.4.0           xtable_1.8-4            colorspace_2.1-1       
[31] extrafontdb_1.0         paletteer_1.6.0         gitcreds_0.1.2         
[34] cli_3.6.3               crayon_1.5.3            rmarkdown_2.28         
[37] ragg_1.3.3              generics_0.1.3          rstudioapi_0.17.1      
[40] tzdb_0.4.0              commonmark_1.9.2        parallel_4.4.0         
[43] vctrs_0.6.5             jsonlite_1.8.9          fontBitstreamVera_0.1.1
[46] hms_1.1.3               ggrepel_0.9.6           bit64_4.5.2            
[49] magick_2.8.5            systemfonts_1.1.0       fontawesome_0.5.2      
[52] rematch2_2.1.2          stringi_1.8.4           gtable_0.3.5           
[55] later_1.3.2             extrafont_0.19          munsell_0.5.1          
[58] pillar_1.9.0            rappdirs_0.3.3          reactable_0.4.4        
[61] R6_2.5.1                httr2_1.0.5             textshaping_0.4.0      
[64] rprojroot_2.0.4         vroom_1.6.5             evaluate_1.0.1         
[67] shiny_1.9.1             kableExtra_1.4.0        markdown_1.13          
[70] gridtext_0.1.5          snakecase_0.11.1        fontLiberation_0.1.0   
[73] httpuv_1.6.15           hrbrthemes_0.8.7        Rcpp_1.0.13            
[76] svglite_2.1.3           gridExtra_2.3           Rttf2pt1_1.3.12        
[79] xfun_0.48               pkgconfig_2.0.3

9. Github Repository

Expand for GitHub Repo

Access the GitHub repository here