Code
<-
wind %>%
wind select(-c(case_id, faa_ors, faa_asn, usgs_pr_id, t_img_date,
%>%
t_img_srce, eia_id, t_fips)) mutate(t_manu = ifelse(t_manu == "", NA, t_manu),
t_model = ifelse(t_model == "", NA, t_model))
Mathias Steilen
August 9, 2022
The data comes from USGS, who regularly update their Wind Turbine Data Base for the US. It covers 71,666 wind turbines in the United States over a period from 1981 to 2022.
To show only the results, large code chunks are hidden, but can be unfolded by clicking the “Code” boxes on the top right of each hidden code chunk.
Next steps are the removal of unnecessary variables and the cleaning of missing observations.
Looking at missing observations:
colMeans(is.na(wind)) %>%
tidy() %>%
rename(pct = x) %>%
mutate(names = fct_reorder(names, pct)) %>%
filter(pct > 0) %>%
ggplot(aes(pct, names)) +
geom_col(fill = "midnightblue") +
labs(title = "Missing Data In Variables",
subtitle = "Percent missingness calculated for each column
",
y = NULL,
x = NULL) +
scale_x_continuous(labels = scales::percent_format(),
limits = c(0,1)) +
theme_bw() +
theme(plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"))
It is visible that turbines which have not been retrofit don’t exhibit any data in the year of retrofitting, which is not a problem for later visualisation. Additionally, there are low levels of missing data across other variables, such as the total height, hub height, rotor swept area, rotor diameter, turbine model, manufacturer, turbine capacity, project capacity and year of project completion. All of the latter will not be imputed, as this blog post does not entail modelling, but visualisation exclusively.
Rows: 71,666
Columns: 19
$ t_state <chr> "CA", "CA", "CA", "IA", "IA", "IA", "IA", "IA", "IA", "I…
$ t_county <chr> "Kern County", "Kern County", "Kern County", "Story Coun…
$ p_name <chr> "251 Wind", "251 Wind", "251 Wind", "30 MW Iowa DG Portf…
$ p_year <int> 1987, 1987, 1987, 2017, 2017, 2017, 2017, 2017, 2017, 20…
$ p_tnum <int> 194, 194, 194, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2…
$ p_cap <dbl> 18.43, 18.43, 18.43, 30.00, 30.00, 30.00, 30.00, 30.00, …
$ t_manu <chr> "Vestas", "Vestas", "Vestas", "Nordex", "Nordex", "Norde…
$ t_model <chr> NA, NA, NA, "AW125/3000", "AW125/3000", "AW125/3000", "A…
$ t_cap <int> 95, 95, 95, 3000, 3000, 3000, 3000, 3000, 3000, 3000, 30…
$ t_hh <dbl> NA, NA, NA, 87.5, 87.5, 87.5, 87.5, 87.5, 87.5, 87.5, 87…
$ t_rd <dbl> NA, NA, NA, 125.0, 125.0, 125.0, 125.0, 125.0, 125.0, 12…
$ t_rsa <dbl> NA, NA, NA, 12271.85, 12271.85, 12271.85, 12271.85, 1227…
$ t_ttlh <dbl> NA, NA, NA, 150.0, 150.0, 150.0, 150.0, 150.0, 150.0, 15…
$ retrofit <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ retrofit_year <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ t_conf_atr <int> 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
$ t_conf_loc <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
$ xlong <dbl> -118.36441, -118.36420, -118.36376, -93.63284, -93.36780…
$ ylat <dbl> 35.07744, 35.07764, 35.07791, 41.88248, 42.49794, 41.742…
At this stage, we have a nice and large dataset, which can be used for visualisation. The following sections will answer questions I was interested in while looking at the data.
us <- map_data('state')
wind %>%
filter(!is.na(p_year),
!is.na(p_cap)) %>%
select(p_name, p_cap, xlong, ylat, p_year) %>%
group_by(p_name) %>%
slice(1) %>%
ungroup() %>%
mutate(p_year = cut(p_year, seq(1980, 2020, 10), dig.lab = 5)) %>%
filter(dplyr::between(xlong, -125, -65),
dplyr::between(ylat, 20, 50)) %>% # Removing Alaska and Islands
distinct() %>%
drop_na() %>%
ggplot(aes(x = xlong, y = ylat)) +
geom_polygon(data = us, aes(x = long, y = lat, group = group),
color = 'gray', fill = "gray95", alpha = 0.3) +
geom_point(aes(size = p_cap, colour = p_year), alpha = 0.25, shape = 19) +
scale_size_continuous(range = c(0.5, 4),
labels = scales::comma_format(suffix = " MW")) +
xlim(-125, -65) +
ylim(20, 50) +
labs(title = "Wind Energy Infrastructure in the USA",
subtitle = "Each dot constitutes one completed wind farm with size representing production capacity",
y = NULL,
x = NULL,
size = "Project Capacity:",
colour = "Period of Construction:") +
theme_minimal() +
# coord_fixed() +
theme(panel.background = element_blank(),
panel.grid = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank(),
axis.text = element_blank(),
plot.title = element_text(size = 12, face="bold", colour="black"),
plot.subtitle = element_text(face = "italic", colour = "gray50")) +
scale_colour_manual(values = c("firebrick", "darkorange",
"dodgerblue4", "dodgerblue"))
wind %>%
count(t_state) %>%
mutate(t_state = usdata::abbr2state(t_state),
t_state = fct_reorder(t_state, n)) %>%
drop_na() %>%
ggplot(aes(x = n,
y = t_state)) +
geom_col(fill = "midnightblue") +
labs(title = "Number Of Wind Turbines In Each State In The US",
subtitle = "Some states exhibit no wind power at all",
y = NULL,
x = "Number of Wind Turbines") +
scale_x_continuous(labels = scales::comma_format()) +
theme_bw() +
theme(plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"))
wind %>%
group_by(p_year) %>%
summarise(turbines = n()) %>%
drop_na() %>%
ggplot(aes(x = p_year,
y = turbines)) +
geom_col(fill = "midnightblue") +
labs(title = "Wind Turbines Built Each Year In The US",
subtitle = "The late 90s constitute a turning point for wind energy",
y = "Number of Wind Turbines",
x = NULL) +
theme_bw() +
scale_y_continuous(labels = scales::comma_format()) +
scale_x_continuous(breaks = 1981:2022) +
theme(plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())
wind %>%
group_by(p_name) %>%
slice(1) %>%
ungroup() %>%
ggplot(aes(p_tnum)) +
geom_histogram(fill = "midnightblue", colour = "white") +
labs(title = "Wind Turbines Per Project",
subtitle = "Many projects have either one or close to 100 turbines at a time",
x = "Number of Wind Turbines (Base 10 Log Scale)",
y = "Frequency") +
scale_x_log10() +
theme_bw() +
theme(plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())
What is this extremely large wind project with close to 1000 turbines?
# A tibble: 10 × 4
t_state p_name p_year p_tnum
<chr> <chr> <int> <int>
1 CA unknown San Gorgonio Pass 1 1981 731
2 CA unknown Tehachapi Wind Resource Area 1 1982 713
3 CA Mesa Wind Farm 1983 460
4 OR Stateline Wind Project 2001 454
5 NM Western Spirit 2021 377
6 CA Mojave 16, 17 & 18 (Desertwind III PPC Tru 1989 297
7 KS Flat Ridge 2 2012 294
8 KS Flat Ridge 2 Expansion 2012 294
9 CO Cedar Creek 2007 274
10 CO Peetz Table 2007 267
It looks like there are two of them and they are considerably larger and older than anything after them. Unfortunately, additional data for these two is missing.
wind %>%
filter(!is.na(p_tnum)) %>%
group_by(p_name) %>%
slice(1) %>%
ungroup() %>%
select(p_year, p_tnum) %>%
drop_na() %>%
ggplot(aes(x = p_year %>% as.factor(), y = p_tnum)) +
geom_boxplot(outlier.alpha = 0.4, fill = "midnightblue",
colour = "midnightblue", alpha = 0.4) +
labs(title = "Wind Turbines Per Wind Farm",
subtitle = "Number of turbines per project have drastically decreased since the 1980s",
y = NULL,
x = "Year") +
theme_bw() +
theme(plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
And how was the development of average and median projects over the past two decades?
wind %>%
filter(!is.na(p_tnum),
between(p_year, 2000, 2022)) %>%
group_by(p_name) %>%
slice(1) %>%
ungroup() %>%
select(p_year, p_tnum) %>%
group_by(p_year) %>%
summarise(Mean = mean(p_tnum),
Median = median(p_tnum)) %>%
pivot_longer(-c(p_year)) %>%
ggplot(aes(x = p_year, y = value, colour = name)) +
geom_line() +
geom_point() +
geom_smooth(se = F, method = "loess", size = 0.5, lty = "dashed") +
labs(title = "Wind Turbines Per Wind Farm Over The Two Most Recent Decades",
subtitle = "Number of turbines per project show a rebound from the pre-2000 drop",
y = NULL,
x = "Year",
colour = NULL) +
scale_colour_manual(values = c("midnightblue", "dodgerblue")) +
theme_bw() +
theme(plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
legend.position = "bottom")
wind %>%
filter(!is.na(t_cap)) %>%
group_by(p_year) %>%
summarise(lower_band = quantile(t_cap, 0.1),
middle_band = mean(t_cap),
higher_band = quantile(t_cap, 0.9)) %>%
drop_na() %>%
ggplot(aes(x = p_year, y = middle_band)) +
geom_line(colour = "midnightblue") +
geom_ribbon(aes(ymin = lower_band, ymax = higher_band),
fill = "midnightblue", colour = "midnightblue",
lty = "dotted", alpha = 0.4) +
labs(title = "Wind Turbine Rated Capacity Over Time",
subtitle = "Confidence band represents the 10th and 90th percentile",
y = NULL,
x = "Year") +
scale_y_continuous(labels = scales::comma_format(suffix = " kW")) +
theme_bw() +
theme(plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())
Bearing in mind the decrease in wind turbines per project and the increase of wind turbine capacity, it would be interesting to complete the picture with total installed capacity over time:
wind %>%
group_by(p_name) %>%
slice(1) %>%
ungroup() %>%
select(p_year, p_cap) %>%
drop_na() %>%
group_by(p_year) %>%
summarise(total_cap = sum(p_cap)) %>%
ggplot(aes(x = p_year, y = total_cap)) +
geom_col(fill = "midnightblue") +
labs(title = "Total Wind Turbine Capacity Installed By Year",
subtitle = NULL,
y = NULL,
x = "Year") +
scale_y_continuous(labels = scales::comma_format(suffix = " MW")) +
theme_bw() +
theme(plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())
It is now visible, how the increase in wind turbine capacity and the slight creeping up of wind turbines per project since the mid-2000s lead to a strong increase in installed capacity by year.
Looking at the cumulative picture:
wind %>%
group_by(p_name) %>%
slice(1) %>%
ungroup() %>%
select(p_year, p_cap) %>%
drop_na() %>%
group_by(p_year) %>%
summarise(total_cap = sum(p_cap)) %>%
mutate(total_cap_cum = cumsum(total_cap)) %>%
ggplot(aes(x = p_year, y = total_cap_cum)) +
geom_point(colour = "midnightblue") +
geom_line(colour = "midnightblue") +
labs(title = "Cumulative Wind Turbine Capacity Installed Over Time In The US",
subtitle = NULL,
y = NULL,
x = "Year") +
scale_y_continuous(labels = scales::comma_format(suffix = " MW")) +
theme_bw() +
theme(plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())
wind %>%
filter(!is.na(t_hh)) %>%
group_by(p_year) %>%
summarise(lower_band_height = quantile(t_hh, 0.1),
middle_band_height = mean(t_hh),
higher_band_height = quantile(t_hh, 0.9)) %>%
drop_na() %>%
ggplot(aes(x = p_year, y = middle_band_height)) +
geom_line(colour = "midnightblue") +
geom_ribbon(aes(ymin = lower_band_height, ymax = higher_band_height),
fill = "midnightblue", colour = "midnightblue",
lty = "dotted", alpha = 0.4) +
labs(title = "Wind Turbine Hub Height Over Time",
subtitle = "Confidence band represents the 10th and 90th percentile",
y = NULL,
x = "Year") +
scale_y_continuous(labels = scales::comma_format(suffix = " m")) +
theme_bw() +
theme(plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())
wind %>%
filter(!is.na(t_rd)) %>%
group_by(p_year) %>%
summarise(lower_band_diameter = quantile(t_rd, 0.1),
middle_band_diameter = mean(t_rd),
higher_band_diameter = quantile(t_rd, 0.9),) %>%
drop_na() %>%
ggplot(aes(x = p_year, y = middle_band_diameter)) +
geom_line(colour = "midnightblue") +
geom_ribbon(aes(ymin = lower_band_diameter, ymax = higher_band_diameter),
fill = "midnightblue", colour = "midnightblue",
lty = "dotted", alpha = 0.4) +
labs(title = "Wind Turbine Rotor Diameter Over Time",
subtitle = "Confidence band represents the 10th and 90th percentile",
y = NULL,
x = "Year") +
scale_y_continuous(labels = scales::comma_format(suffix = " m")) +
theme_bw() +
theme(plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())
It looks like the diameter has continually increased, while the hub height has stagnated. Let’s look at a ratio:
wind %>%
filter(!is.na(t_rd),
!is.na(t_hh)) %>%
group_by(p_year) %>%
summarise(ratio = t_rd/t_hh) %>%
drop_na() %>%
ggplot(aes(x = p_year %>% factor(), y = ratio)) +
geom_boxplot(outlier.colour = NA, alpha = 0.4,
fill = "midnightblue", colour = "midnightblue") +
labs(title = "Wind Turbine Diameter In Relation To Hub Height Over Time",
subtitle = "Wind turbine height increases less strongly than wind turbine diameter",
y = "Diameter/Hub Height Ratio",
x = "Year") +
theme_bw() +
theme(plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
It becomes clear now, that the height of wind turbines is not increasing as strongly as the diameter of the rotors any more now. It seems like the optimal height for capturing wind has been reached, but that the trend of increasing wind turbine capacity leads to further increasing rotor diameters.
wind %>%
count(t_manu) %>%
drop_na() %>%
slice_max(n = 15, order_by = n) %>%
mutate(t_manu = fct_reorder(t_manu, n)) %>%
ggplot(aes(x = n, y = t_manu)) +
geom_col(fill = "midnightblue") +
labs(title = "Most Common Wind Turbine Manufacturers In The US",
subtitle = NULL,
y = NULL,
x = "Wind Turbines Installed") +
scale_x_continuous(labels = scales::comma_format()) +
theme_bw() +
theme(plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank())
A work by Mathias Steilen