The Tampa Feasibility Report featuring R based Visualizations
Visualizations for Professional Writing's Final Deliverable
These visualizations were taken from FDOT data for our local district on the subject of transit usage, and crash data that was munged into relative ratios that had a regression performed on these variables within.
Pie Chart
This figure here shows the monthly totals for crashes, fatalities and serious injuries from accidents recorded within FDOT data and the regression line method was used against the 'free_y' inputs which allowed there to be a 95 percent confidence regression on the other two variables from the one in question showing certain trends within and between variables.
Bus versus Pubic Transport with 95 percent regression line fitted
The same style of regression was sued here but this time it was a proportionality histogram showing monthly counts for bus riders compared to the total ridership data which collected data from our TECO Streetcar downtown (Whiting Street to Centennial Park, Ybor), HART bus lines that operate within the city limits as well as unincorporated Hillsborough county, Rail Lines and other methods of publicly sponsored transportation. Busses make up the largest proportion of public transit here and it has a direct influence because of the weight of its entries in the dataset. The trendlines became closer after the pandemic which is what constituted such a sharp drop around January 2020, and as a result began to converge on each other because of such a large logistical interruption that caused many employees to leave, and leaving many bay area residents with lesser of an option matched with equal demand for these public bus services.
Below, is the R script used to generate all of the visuals which were based within ggplot, scales and other packages used for advanced visualization
##Professional Writing Visualizations
library(dplyr)
library(tidyverse)
library(ggplot2)
library(ggthemes)
library(ggbreak)
library(RColorBrewer)
library(caret)
library(ISLR)
library(glmnet)
library(base)
library(reshape2)
library(rpart)
library(rpart.plot)
library(scales)
library(zoo)
##Crash Data
setwd("C:\\Users\\tyler\\Desktop\\ProfessionalWritingData")
crash.df <- read.csv("crashdata.csv")
statecrashdata <- crash.df[crash.df$FDOT.District == 'Statewide', c("Year","Month","Measure.Names","Measure.Values")]
district7crash <- crash.df[crash.df$FDOT.District == '7', c("Year","Month","Measure.Names","Measure.Values")]
##Transit Data
setwd("C:\\Users\\tyler\\Desktop\\ProfessionalWritingData")
transit.df <- read.csv("transitdata.csv")
d7transit <- transit.df[transit.df$FDOT.District == "7",c("Year","Month","Measure.Names","Measure.Values")]
#Transit Hist Data Cleaning and Pull Frame
d7transitclean <- d7transit[d7transit$Measure.Names == "Bus" | d7transit$Measure.Names == "Total Ridership", c("Year","Month","Measure.Names","Measure.Values")]
d7transitclean$Date <- as.yearmon(paste(d7transitclean$Year, d7transitclean$Month), "%Y %m")
colnames(d7transitclean) <- c("Year","Month","Mode of Transportation","Number of Users","Date")
###Subsetting for the Composite data
##Crashes
d7c2017 <- district7crash[district7crash$Year == '2017' & district7crash$Measure.Names == "Crashes", c("Year","Month","Measure.Names","Measure.Values")]
d7c2018 <- district7crash[district7crash$Year == '2018' & district7crash$Measure.Names == "Crashes", c("Year","Month","Measure.Names","Measure.Values")]
d7c2019 <- district7crash[district7crash$Year == '2019' & district7crash$Measure.Names == "Crashes", c("Year","Month","Measure.Names","Measure.Values")]
d7c2020 <- district7crash[district7crash$Year == '2020' & district7crash$Measure.Names == "Crashes", c("Year","Month","Measure.Names","Measure.Values")]
d7cT <- rbind(d7c2017,d7c2018,d7c2019,d7c2020)
##Fatalities
d7f2017 <- district7crash[district7crash$Year == '2017' & district7crash$Measure.Names == "Fatalities", c("Year","Month","Measure.Names","Measure.Values")]
d7f2018 <- district7crash[district7crash$Year == '2018' & district7crash$Measure.Names == "Fatalities", c("Year","Month","Measure.Names","Measure.Values")]
d7f2019 <- district7crash[district7crash$Year == '2019' & district7crash$Measure.Names == "Fatalities", c("Year","Month","Measure.Names","Measure.Values")]
d7f2020 <- district7crash[district7crash$Year == '2020' & district7crash$Measure.Names == "Fatalities", c("Year","Month","Measure.Names","Measure.Values")]
d7fT <- rbind(d7f2017,d7f2018,d7f2019,d7f2020)
##Serious Injuries
d7si2017 <- district7crash[district7crash$Year == '2017' & district7crash$Measure.Names == "Serious Injuries", c("Year","Month","Measure.Names","Measure.Values")]
d7si2018 <- district7crash[district7crash$Year == '2018' & district7crash$Measure.Names == "Serious Injuries", c("Year","Month","Measure.Names","Measure.Values")]
d7si2019 <- district7crash[district7crash$Year == '2019' & district7crash$Measure.Names == "Serious Injuries", c("Year","Month","Measure.Names","Measure.Values")]
d7si2020 <- district7crash[district7crash$Year == '2020' & district7crash$Measure.Names == "Serious Injuries", c("Year","Month","Measure.Names","Measure.Values")]
d7siT <- rbind(d7si2017,d7si2018,d7si2019,d7si2020)
##Aggregate Data
#Aggregate Serious Injuries
si2017 <- sum(d7si2017$Measure.Values)
si2018 <- sum(d7si2018$Measure.Values)
si2019 <- sum(d7si2019$Measure.Values)
si2020 <- sum(d7si2020$Measure.Values)
#Aggregate Fatalities
f2017 <- sum(d7f2017$Measure.Values)
f2018 <- sum(d7f2018$Measure.Values)
f2019 <- sum(d7f2019$Measure.Values)
f2020 <- sum(d7f2020$Measure.Values)
#Aggregate Crashes
c2017 <- sum(d7c2017$Measure.Values)
c2018 <- sum(d7c2018$Measure.Values)
c2019 <- sum(d7c2019$Measure.Values)
c2020 <- sum(d7c2020$Measure.Values)
#Merged Data
Year <- c(2017,2018,2019,2020)
TotalCrashes <- c(c2017,c2018,c2019,c2020)
TotalFatalities <- c(f2017,f2018,f2019,f2020)
TotalSeriousInjuries <- c(si2017,si2018,si2019,si2020)
compositedata <- cbind(Year,TotalCrashes,TotalFatalities,TotalSeriousInjuries)
finalcomposite <- as.data.frame(compositedata)
view(finalcomposite)
#Visuals
district7crash$Date <- as.yearmon(paste(district7crash$Year, district7crash$Month), "%Y %m")
crashgraph <- ggplot(district7crash, aes(x = Date, y = Measure.Values , fill = Measure.Names)) +
geom_col() +
geom_smooth(method="auto", se=TRUE, fullrange=FALSE, level=0.95) +
facet_wrap("Measure.Names", scales = "free_y", ncol = 1) +
scale_y_continuous(labels = label_comma()) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
ggtitle("FDOT District 7 Accidents and Outcomes 2017-2020") +
ylab("Number of Occurances")
###Transit Visuals
transitgraph <- ggplot(d7transit, aes(x = Year, y = Measure.Values , fill = Measure.Names)) +
geom_bar(stat = "identity") +
facet_wrap("Measure.Names", scales = "free_y", ncol = 1) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
ggtitle("FDOT District 7 Transit Usage by Method 2017-2020") +
ylab("Number of Occurances") +
scale_y_continuous(labels = scales::label_number_si())
#Transit Histogram
transithistogram <- ggplot(d7transitclean, aes(x = Date, y = `Number of Users` , fill = `Mode of Transportation`)) +
geom_bar(position = "dodge", stat = "identity") +
ggtitle("Regression over Bus versus Total Public Transport Ridership District 7") +
geom_smooth(method="auto", se=TRUE, fullrange=FALSE, level=0.95) +
scale_y_continuous(labels = label_comma())
#Transit Regression
d7trandate <- d7transit
d7trandate$Date <- as.yearmon(paste(d7trandate$Year, d7trandate$Month), "%Y %m")
colnames(d7trandate) <- c("Year","Month","Mode of Transportation","Number of Users","Date")
transitgraphregression <- ggplot(d7trandate, aes(x = Date, y = `Number of Users` , fill = `Mode of Transportation`)) +
geom_bar(stat = "identity") +
geom_smooth(method="auto", se=TRUE, fullrange=FALSE, level=0.95) +
facet_wrap("`Mode of Transportation`", scales = "free_y", ncol = 1) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
ggtitle("FDOT District 7 Transit Use Method 2017-2020; with Regression") +
ylab("Number of Occurances") +
scale_y_continuous(labels = label_comma())
transitgraphregression
##Ratio Visuals
fatalitiestocrash <- (finalcomposite$TotalFatalities / finalcomposite$TotalCrashes)
fatalitiestoinjury <- (finalcomposite$TotalFatalities / finalcomposite$TotalSeriousInjuries)
injurytocrash <- (finalcomposite$TotalSeriousInjuries / finalcomposite$TotalCrashes)
Year <- c(2017,2018,2019,2020)
ratio.df <- data.frame(Year,fatalitiestoinjury,fatalitiestocrash,injurytocrash)
view(ratio.df)
##Pie chart
#max limits
`Private Motor Vehichles` <- 1600
`Mixed Traffic` <- 2800
`Two Way Protected Bike Lane` <- 7500
`Dedicated Transit Lines` <- 8000
Sidewalk <- 9000
`On Street Transitway (Bus & Rail)` <- 25000
Total <- 53900
`Mode of Transport` <- c(`Private Motor Vehichles`, `Mixed Traffic`, `Two Way Protected Bike Lane`, `Dedicated Transit Lines`, Sidewalk, `On Street Transitway (Bus & Rail)`)
piedata <- as.data.frame(`Mode of Transport`)
piedata$Method <- c("Private Motor Vehichles", "Mixed Traffic", "Two Way Protected Bike Lane", "Dedicated Transit Lines", "Sidewalk", "On Street Transitway (Bus & Rail)")
row.names(piedata) <- c("Private Motor Vehichles", "Mixed Traffic", "Two Way Protected Bike Lane", "Dedicated Transit Lines", "Sidewalk", "On Street Transitway (Bus & Rail)")
custom.col <- c("#FFDB6D", "#C4961A", "#F4EDCA",
"#D16103", "#C3D7A4", "#52854C")
piechart <- ggplot(piedata, aes(x="", y = `Mode of Transport`, fill = Method)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
geom_text(aes(label = paste0(`Mode of Transport`)), position = position_stack(vjust=0.5)) +
theme(text=element_text(size=6)) +
scale_fill_manual(values=custom.col) +
theme_void() +
ggtitle("Predicted Max Limit of People That Could Use Transit in Peak Hours")
## Visual Calls
piechart
transitgraph
transithistogram
crashgraph
transitgraphregression
Comments
Post a Comment