This month, the challenge is to create and share a radial data visualization. Any circular view of data is welcome, but the data and scenario should lend itself well to this.
Data came from Kaggle and includes historical hourly weather data for 36 different cities from 2012-2017. However, only Seattle data will be used in this project.
if (!require("pacman")) install.packages("pacman")
pacman::p_load("tidyverse","visdat","lubridate","packcircles","ggforce")
df <- read_csv("weather_description.csv")
glimpse(df)
## Observations: 45,253
## Variables: 37
## $ datetime <dttm> 2012-10-01 12:00:00, 2012-10-01 13:00:00,...
## $ Vancouver <chr> NA, "mist", "broken clouds", "broken cloud...
## $ Portland <chr> NA, "scattered clouds", "scattered clouds"...
## $ `San Francisco` <chr> NA, "light rain", "sky is clear", "sky is ...
## $ Seattle <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ `Los Angeles` <chr> NA, "mist", "sky is clear", "sky is clear"...
## $ `San Diego` <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ `Las Vegas` <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Phoenix <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Albuquerque <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Denver <chr> NA, "light rain", "broken clouds", "broken...
## $ `San Antonio` <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Dallas <chr> NA, "mist", "sky is clear", "sky is clear"...
## $ Houston <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ `Kansas City` <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Minneapolis <chr> NA, "broken clouds", "broken clouds", "bro...
## $ `Saint Louis` <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Chicago <chr> NA, "overcast clouds", "overcast clouds", ...
## $ Nashville <chr> NA, "mist", "overcast clouds", "overcast c...
## $ Indianapolis <chr> NA, "overcast clouds", "overcast clouds", ...
## $ Atlanta <chr> NA, "light rain", "overcast clouds", "over...
## $ Detroit <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Jacksonville <chr> NA, "scattered clouds", "scattered clouds"...
## $ Charlotte <chr> NA, "mist", "overcast clouds", "overcast c...
## $ Miami <chr> NA, "light intensity drizzle", "broken clo...
## $ Pittsburgh <chr> NA, "mist", "scattered clouds", "scattered...
## $ Toronto <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Philadelphia <chr> NA, "broken clouds", "broken clouds", "bro...
## $ `New York` <chr> NA, "few clouds", "few clouds", "few cloud...
## $ Montreal <chr> NA, "overcast clouds", "sky is clear", "sk...
## $ Boston <chr> NA, "sky is clear", "few clouds", "few clo...
## $ Beersheba <chr> NA, "sky is clear", "sky is clear", "overc...
## $ `Tel Aviv District` <chr> NA, "sky is clear", "sky is clear", "sky i...
## $ Eilat <chr> "haze", "haze", "broken clouds", "broken c...
## $ Haifa <chr> NA, "sky is clear", "overcast clouds", "ov...
## $ Nahariyya <chr> NA, "sky is clear", "sky is clear", "overc...
## $ Jerusalem <chr> NA, "sky is clear", "overcast clouds", "ov...
head(df)
## # A tibble: 6 x 37
## datetime Vancouver Portland `San Francisco` Seattle
## <dttm> <chr> <chr> <chr> <chr>
## 1 2012-10-01 12:00:00 <NA> <NA> <NA> <NA>
## 2 2012-10-01 13:00:00 mist scatter~ light rain sky is~
## 3 2012-10-01 14:00:00 broken c~ scatter~ sky is clear sky is~
## 4 2012-10-01 15:00:00 broken c~ scatter~ sky is clear sky is~
## 5 2012-10-01 16:00:00 broken c~ scatter~ sky is clear sky is~
## 6 2012-10-01 17:00:00 broken c~ scatter~ sky is clear sky is~
## # ... with 32 more variables: `Los Angeles` <chr>, `San Diego` <chr>, `Las
## # Vegas` <chr>, Phoenix <chr>, Albuquerque <chr>, Denver <chr>, `San
## # Antonio` <chr>, Dallas <chr>, Houston <chr>, `Kansas City` <chr>,
## # Minneapolis <chr>, `Saint Louis` <chr>, Chicago <chr>,
## # Nashville <chr>, Indianapolis <chr>, Atlanta <chr>, Detroit <chr>,
## # Jacksonville <chr>, Charlotte <chr>, Miami <chr>, Pittsburgh <chr>,
## # Toronto <chr>, Philadelphia <chr>, `New York` <chr>, Montreal <chr>,
## # Boston <chr>, Beersheba <chr>, `Tel Aviv District` <chr>, Eilat <chr>,
## # Haifa <chr>, Nahariyya <chr>, Jerusalem <chr>
levels(as.factor(df$Seattle))
## [1] "broken clouds" "drizzle"
## [3] "few clouds" "fog"
## [5] "haze" "heavy intensity drizzle"
## [7] "heavy intensity rain" "heavy intensity shower rain"
## [9] "heavy snow" "light intensity drizzle"
## [11] "light intensity shower rain" "light rain"
## [13] "light shower snow" "light snow"
## [15] "mist" "moderate rain"
## [17] "overcast clouds" "proximity thunderstorm"
## [19] "scattered clouds" "shower rain"
## [21] "sky is clear" "smoke"
## [23] "snow" "squalls"
## [25] "thunderstorm" "thunderstorm with heavy rain"
## [27] "thunderstorm with light rain" "thunderstorm with rain"
## [29] "very heavy rain"
summary(df$datetime)
## Min. 1st Qu. Median
## "2012-10-01 12:00:00" "2014-01-15 21:00:00" "2015-05-02 06:00:00"
## Mean 3rd Qu. Max.
## "2015-05-02 06:00:00" "2016-08-15 15:00:00" "2017-11-30 00:00:00"
Data before December 2012 was removed in order to have an equal number of each month so that Winter will not show skewed data.
df <- df %>% select(datetime,Seattle)
df <- df %>% filter(datetime > "2012-11-30")
There are no missing values.
#Visualize missing values
vis_miss(df, sort_miss=TRUE)
df <- df %>% transmute(month = month(datetime),
season = case_when(month %in% c(12,1,2) ~ "Winter",
month %in% c(3,4,5) ~ "Spring",
month %in% c(6,7,8) ~ "Summer",
month %in% c(9,10,11) ~ "Fall"),
weather = case_when(str_detect(Seattle, "rain|drizzle") ~ "rain",
str_detect(Seattle, "mist|fog") ~ "fog",
str_detect(Seattle, "clear") ~ "clear",
str_detect(Seattle, "squalls") ~ "squalls",
str_detect(Seattle, "cloud") ~ "clouds",
str_detect(Seattle, "smoke|haze") ~ "haze",
str_detect(Seattle, "snow") ~ "snow",
str_detect(Seattle, "thunderstorm") ~ "thunder"))
# aster chart
df_aster <- df %>% group_by(weather) %>% summarize(count=n()) %>% arrange(count) %>% ungroup()
df_aster <- df_aster %>% mutate_at(vars(weather), as.factor)
df_aster$weather <- fct_reorder(df_aster$weather, df_aster$count)
df_aster <- df_aster %>% filter(count > 20)
# circle chart
df_circle <- df %>% group_by(season,weather) %>% summarize(count=n()) %>% arrange(count) %>% ungroup()
df_circle <- df_circle %>% mutate_if(is.character, as.factor)
df_circle <- df_circle %>% filter(count > 20)
df_aster %>% ggplot(aes(weather,sqrt(count),fill=weather))+
geom_col()+
geom_text(aes(y=50,label=weather))+
coord_polar()+
ylim(-10,125)+
labs(title="Seattle Weather Occurrences",subtitle="Measured from 2012 - 2017",fill="Weather Type")+
theme_minimal()+
theme(axis.title = element_blank(),
axis.text = element_blank(),
plot.title = element_text(hjust=0.5),
plot.subtitle = element_text(hjust=0.5),
legend.position = "none")
set.seed(333)
df_packs <- df_circle$count %>% circleRepelLayout() %>% .$layout
df_packs <- bind_cols(df_circle, df_packs) %>% select(-count)
# set colors
colors <- c("#BFD1E5","#8CC7A1","#C1666B","#D4B483") %>% set_names(nm = c("Winter","Spring","Summer","Fall"))
# main circle plot
p <- df_packs %>% ggplot() +
geom_circle(aes(x0=x, y0=y, r=radius, fill=season, color=season), size=0.1, show.legend=FALSE) +
geom_text(aes(x=x, y=y, label=weather, size=radius*0.5), show.legend=FALSE) +
scale_fill_manual(values=colors) +
scale_color_manual(values=colors) +
coord_fixed() +
ggtitle("Seattle Weather Occurrences")+
theme_void() +
theme(text = element_text(family="courier"),
plot.title = element_text(face="bold", hjust=0.5, size=14),
plot.background = element_rect(fill="snow", color="black"))
# make legend maually
legend <-
tibble(y=rep(1, each=4), x=rep(1:4, times=1), fill=colors %>% names(), r=0.1) %>%
ggplot() +
geom_text(aes(x=x, y=y-.2, label=fill), size=2.25, vjust=1, family="courier", fontface="bold") +
geom_circle(aes(x0=x, y0=y, fill=fill, color=fill, r=r), show.legend=FALSE) +
scale_fill_manual(values=colors) +
scale_colour_manual(values=colors) +
coord_fixed() +
theme_void() +
lims(x=c(.5,6), y=c(0.5, 2.3))
leg_text <- tibble(x=0.16, y=0.17, label="Season") %>%
ggplot()+
geom_text(aes(x=x, y=y, label=label), family="courier", fontface="bold", size=3.75, show.legend=FALSE)+
coord_fixed()+
lims(x=c(-4,5), y=c(-1, 1))+
theme_void()
# combine plot with legend
p + annotation_custom(
grob = ggplotGrob(legend),
xmin = -190,
xmax = -40,
ymin = -125,
ymax = -75) +
annotation_custom(
grob = ggplotGrob(leg_text),
xmin = -165,
xmax = -100,
ymin = -110,
ymax = -90)
# save final plot
ggsave("seattle-weather.png", width=6.1)