1 Challenge Description

This month, the challenge is to create and share a radial data visualization. Any circular view of data is welcome, but the data and scenario should lend itself well to this.

2 Dataset

Data came from Kaggle and includes historical hourly weather data for 36 different cities from 2012-2017. However, only Seattle data will be used in this project.

3 Setup

3.1 Load Libraries

if (!require("pacman")) install.packages("pacman")
pacman::p_load("tidyverse","visdat","lubridate","packcircles","ggforce")

3.2 Import Data

df <- read_csv("weather_description.csv")

4 Data Wrangling

  • There are 45,253 observations.
  • Each city has its own column with a character description of the weather pattern
  • The first row is NA for all cities.

4.1 View Data

glimpse(df)
## Observations: 45,253
## Variables: 37
## $ datetime            <dttm> 2012-10-01 12:00:00, 2012-10-01 13:00:00,...
## $ Vancouver           <chr> NA, "mist", "broken clouds", "broken cloud...
## $ Portland            <chr> NA, "scattered clouds", "scattered clouds"...
## $ `San Francisco`     <chr> NA, "light rain", "sky is clear", "sky is ...
## $ Seattle             <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ `Los Angeles`       <chr> NA, "mist", "sky is clear", "sky is clear"...
## $ `San Diego`         <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ `Las Vegas`         <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Phoenix             <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Albuquerque         <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Denver              <chr> NA, "light rain", "broken clouds", "broken...
## $ `San Antonio`       <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Dallas              <chr> NA, "mist", "sky is clear", "sky is clear"...
## $ Houston             <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ `Kansas City`       <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Minneapolis         <chr> NA, "broken clouds", "broken clouds", "bro...
## $ `Saint Louis`       <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Chicago             <chr> NA, "overcast clouds", "overcast clouds", ...
## $ Nashville           <chr> NA, "mist", "overcast clouds", "overcast c...
## $ Indianapolis        <chr> NA, "overcast clouds", "overcast clouds", ...
## $ Atlanta             <chr> NA, "light rain", "overcast clouds", "over...
## $ Detroit             <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Jacksonville        <chr> NA, "scattered clouds", "scattered clouds"...
## $ Charlotte           <chr> NA, "mist", "overcast clouds", "overcast c...
## $ Miami               <chr> NA, "light intensity drizzle", "broken clo...
## $ Pittsburgh          <chr> NA, "mist", "scattered clouds", "scattered...
## $ Toronto             <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Philadelphia        <chr> NA, "broken clouds", "broken clouds", "bro...
## $ `New York`          <chr> NA, "few clouds", "few clouds", "few cloud...
## $ Montreal            <chr> NA, "overcast clouds", "sky is clear", "sk...
## $ Boston              <chr> NA, "sky is clear", "few clouds", "few clo...
## $ Beersheba           <chr> NA, "sky is clear", "sky is clear", "overc...
## $ `Tel Aviv District` <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Eilat               <chr> "haze", "haze", "broken clouds", "broken c...
## $ Haifa               <chr> NA, "sky is clear", "overcast clouds", "ov...
## $ Nahariyya           <chr> NA, "sky is clear", "sky is clear", "overc...
## $ Jerusalem           <chr> NA, "sky is clear", "overcast clouds", "ov...
head(df)
## # A tibble: 6 x 37
##   datetime            Vancouver Portland `San Francisco` Seattle
##   <dttm>              <chr>     <chr>    <chr>           <chr>  
## 1 2012-10-01 12:00:00 <NA>      <NA>     <NA>            <NA>   
## 2 2012-10-01 13:00:00 mist      scatter~ light rain      sky is~
## 3 2012-10-01 14:00:00 broken c~ scatter~ sky is clear    sky is~
## 4 2012-10-01 15:00:00 broken c~ scatter~ sky is clear    sky is~
## 5 2012-10-01 16:00:00 broken c~ scatter~ sky is clear    sky is~
## 6 2012-10-01 17:00:00 broken c~ scatter~ sky is clear    sky is~
## # ... with 32 more variables: `Los Angeles` <chr>, `San Diego` <chr>, `Las
## #   Vegas` <chr>, Phoenix <chr>, Albuquerque <chr>, Denver <chr>, `San
## #   Antonio` <chr>, Dallas <chr>, Houston <chr>, `Kansas City` <chr>,
## #   Minneapolis <chr>, `Saint Louis` <chr>, Chicago <chr>,
## #   Nashville <chr>, Indianapolis <chr>, Atlanta <chr>, Detroit <chr>,
## #   Jacksonville <chr>, Charlotte <chr>, Miami <chr>, Pittsburgh <chr>,
## #   Toronto <chr>, Philadelphia <chr>, `New York` <chr>, Montreal <chr>,
## #   Boston <chr>, Beersheba <chr>, `Tel Aviv District` <chr>, Eilat <chr>,
## #   Haifa <chr>, Nahariyya <chr>, Jerusalem <chr>
levels(as.factor(df$Seattle))
##  [1] "broken clouds"                "drizzle"                     
##  [3] "few clouds"                   "fog"                         
##  [5] "haze"                         "heavy intensity drizzle"     
##  [7] "heavy intensity rain"         "heavy intensity shower rain" 
##  [9] "heavy snow"                   "light intensity drizzle"     
## [11] "light intensity shower rain"  "light rain"                  
## [13] "light shower snow"            "light snow"                  
## [15] "mist"                         "moderate rain"               
## [17] "overcast clouds"              "proximity thunderstorm"      
## [19] "scattered clouds"             "shower rain"                 
## [21] "sky is clear"                 "smoke"                       
## [23] "snow"                         "squalls"                     
## [25] "thunderstorm"                 "thunderstorm with heavy rain"
## [27] "thunderstorm with light rain" "thunderstorm with rain"      
## [29] "very heavy rain"
summary(df$datetime)
##                  Min.               1st Qu.                Median 
## "2012-10-01 12:00:00" "2014-01-15 21:00:00" "2015-05-02 06:00:00" 
##                  Mean               3rd Qu.                  Max. 
## "2015-05-02 06:00:00" "2016-08-15 15:00:00" "2017-11-30 00:00:00"

4.2 Filter Data

Data before December 2012 was removed in order to have an equal number of each month so that Winter will not show skewed data.

df <- df %>% select(datetime,Seattle)
df <- df %>% filter(datetime > "2012-11-30") 

4.3 Missing Values

There are no missing values.

#Visualize missing values
vis_miss(df, sort_miss=TRUE)

4.4 Prepare Dataframe

df <- df %>% transmute(month = month(datetime),
                      season = case_when(month %in% c(12,1,2) ~ "Winter",
                                       month %in% c(3,4,5) ~ "Spring",
                                       month %in% c(6,7,8) ~ "Summer",
                                       month %in% c(9,10,11) ~ "Fall"),
                      weather = case_when(str_detect(Seattle, "rain|drizzle") ~ "rain",
                                          str_detect(Seattle, "mist|fog") ~ "fog",
                                          str_detect(Seattle, "clear") ~ "clear",
                                          str_detect(Seattle, "squalls") ~ "squalls",
                                          str_detect(Seattle, "cloud") ~ "clouds",
                                          str_detect(Seattle, "smoke|haze") ~ "haze",
                                          str_detect(Seattle, "snow") ~ "snow",
                                          str_detect(Seattle, "thunderstorm") ~ "thunder"))

# aster chart
df_aster <- df %>% group_by(weather) %>% summarize(count=n()) %>% arrange(count) %>% ungroup()
df_aster <- df_aster %>% mutate_at(vars(weather), as.factor)
df_aster$weather <- fct_reorder(df_aster$weather, df_aster$count)
df_aster <- df_aster %>% filter(count > 20)

# circle chart
df_circle <- df %>% group_by(season,weather) %>% summarize(count=n()) %>% arrange(count) %>% ungroup()
df_circle <- df_circle %>% mutate_if(is.character, as.factor)
df_circle <- df_circle %>% filter(count > 20)

5 Visualizations

5.1 Aster Chart

df_aster %>% ggplot(aes(weather,sqrt(count),fill=weather))+
      geom_col()+
      geom_text(aes(y=50,label=weather))+
      coord_polar()+
      ylim(-10,125)+
      labs(title="Seattle Weather Occurrences",subtitle="Measured from 2012 - 2017",fill="Weather Type")+
      theme_minimal()+
      theme(axis.title = element_blank(),
            axis.text = element_blank(),
            plot.title = element_text(hjust=0.5),
            plot.subtitle = element_text(hjust=0.5),
            legend.position = "none")

5.2 Circle Packed Chart

set.seed(333)
df_packs <- df_circle$count %>% circleRepelLayout() %>% .$layout

df_packs <- bind_cols(df_circle, df_packs) %>% select(-count)

# set colors
colors <- c("#BFD1E5","#8CC7A1","#C1666B","#D4B483") %>% set_names(nm = c("Winter","Spring","Summer","Fall"))

# main circle plot
p <- df_packs %>% ggplot() +
  geom_circle(aes(x0=x, y0=y, r=radius, fill=season, color=season), size=0.1, show.legend=FALSE) +
  geom_text(aes(x=x, y=y, label=weather, size=radius*0.5), show.legend=FALSE) +
  scale_fill_manual(values=colors) +
  scale_color_manual(values=colors) +
  coord_fixed() +
  ggtitle("Seattle Weather Occurrences")+
  theme_void() +
  theme(text = element_text(family="courier"),
        plot.title = element_text(face="bold", hjust=0.5, size=14),
        plot.background = element_rect(fill="snow", color="black"))

# make legend maually
legend <-
  tibble(y=rep(1, each=4), x=rep(1:4, times=1), fill=colors %>% names(), r=0.1) %>%
  ggplot() +
  geom_text(aes(x=x, y=y-.2, label=fill), size=2.25, vjust=1, family="courier", fontface="bold") +
  geom_circle(aes(x0=x, y0=y, fill=fill, color=fill, r=r), show.legend=FALSE) +
  scale_fill_manual(values=colors) +
  scale_colour_manual(values=colors) +
  coord_fixed() +
  theme_void() +
  lims(x=c(.5,6), y=c(0.5, 2.3))

leg_text <- tibble(x=0.16, y=0.17, label="Season") %>% 
  ggplot()+
  geom_text(aes(x=x, y=y, label=label), family="courier", fontface="bold", size=3.75, show.legend=FALSE)+
  coord_fixed()+
  lims(x=c(-4,5), y=c(-1, 1))+
  theme_void()

# combine plot with legend
p + annotation_custom(
      grob = ggplotGrob(legend),
      xmin = -190,
      xmax = -40,
      ymin = -125,
      ymax = -75) +
    annotation_custom(
      grob = ggplotGrob(leg_text),
      xmin = -165,
      xmax = -100,
      ymin = -110,
      ymax = -90)

# save final plot
ggsave("seattle-weather.png", width=6.1)