The Tampa Feasibility Report featuring R based Visualizations

 

Visualizations for Professional Writing's Final Deliverable

These visualizations were taken from FDOT data for our local district on the subject of transit usage, and crash data that was munged into relative ratios that had a regression performed on these variables within. 

Pie Chart



The above figure is a pie chart using data collected that estimates the higher end limits of hourly numbers of passengers that could utilize each method of public transportation during peak hours

Transit Regression


This figure here shows the monthly totals for crashes, fatalities and serious injuries from accidents recorded within FDOT data and the regression line method was used against the 'free_y' inputs which allowed there to be a 95 percent confidence regression on the other two variables from the one in question showing certain trends within and between variables.

Bus versus Pubic Transport with 95 percent regression line fitted

The same style of regression was sued here but this time it was a proportionality histogram showing monthly counts for bus riders compared to the total ridership data which collected data from our TECO Streetcar downtown (Whiting Street to Centennial Park, Ybor), HART bus lines that operate within the city limits as well as unincorporated Hillsborough county, Rail Lines and other methods of publicly sponsored transportation. Busses make up the largest proportion of public transit here and it has a direct influence because of the weight of its entries in the dataset. The trendlines became closer after the pandemic which is what constituted such a sharp drop around January 2020, and as a result began to converge on each other because of such a large logistical interruption that caused many employees to leave, and leaving many bay area residents with lesser of an option matched with equal demand for these public bus services.


Below, is the R script used to generate all of the visuals which were based within ggplot, scales and other packages used for advanced visualization

##Professional Writing Visualizations

library(dplyr)

library(tidyverse)

library(ggplot2)

library(ggthemes)

library(ggbreak)

library(RColorBrewer)

library(caret)

library(ISLR)

library(glmnet)

library(base)

library(reshape2)

library(rpart)

library(rpart.plot)

library(scales)

library(zoo)


##Crash Data


setwd("C:\\Users\\tyler\\Desktop\\ProfessionalWritingData")

crash.df <- read.csv("crashdata.csv")

statecrashdata <- crash.df[crash.df$FDOT.District == 'Statewide', c("Year","Month","Measure.Names","Measure.Values")]

district7crash <- crash.df[crash.df$FDOT.District == '7', c("Year","Month","Measure.Names","Measure.Values")]


##Transit Data

setwd("C:\\Users\\tyler\\Desktop\\ProfessionalWritingData")

transit.df <- read.csv("transitdata.csv")

d7transit <- transit.df[transit.df$FDOT.District == "7",c("Year","Month","Measure.Names","Measure.Values")]



#Transit Hist Data Cleaning and Pull Frame

d7transitclean <- d7transit[d7transit$Measure.Names == "Bus" | d7transit$Measure.Names == "Total Ridership", c("Year","Month","Measure.Names","Measure.Values")]

d7transitclean$Date <- as.yearmon(paste(d7transitclean$Year, d7transitclean$Month), "%Y %m")

colnames(d7transitclean) <- c("Year","Month","Mode of Transportation","Number of Users","Date")


###Subsetting for the Composite data

##Crashes

d7c2017 <- district7crash[district7crash$Year == '2017' & district7crash$Measure.Names == "Crashes", c("Year","Month","Measure.Names","Measure.Values")]

d7c2018 <- district7crash[district7crash$Year == '2018' & district7crash$Measure.Names == "Crashes", c("Year","Month","Measure.Names","Measure.Values")]

d7c2019 <- district7crash[district7crash$Year == '2019' & district7crash$Measure.Names == "Crashes", c("Year","Month","Measure.Names","Measure.Values")]

d7c2020 <- district7crash[district7crash$Year == '2020' & district7crash$Measure.Names == "Crashes", c("Year","Month","Measure.Names","Measure.Values")]

d7cT <- rbind(d7c2017,d7c2018,d7c2019,d7c2020)

##Fatalities

d7f2017 <- district7crash[district7crash$Year == '2017' & district7crash$Measure.Names == "Fatalities", c("Year","Month","Measure.Names","Measure.Values")]

d7f2018 <- district7crash[district7crash$Year == '2018' & district7crash$Measure.Names == "Fatalities", c("Year","Month","Measure.Names","Measure.Values")]

d7f2019 <- district7crash[district7crash$Year == '2019' & district7crash$Measure.Names == "Fatalities", c("Year","Month","Measure.Names","Measure.Values")]

d7f2020 <- district7crash[district7crash$Year == '2020' & district7crash$Measure.Names == "Fatalities", c("Year","Month","Measure.Names","Measure.Values")]

d7fT <- rbind(d7f2017,d7f2018,d7f2019,d7f2020)

##Serious Injuries

d7si2017 <- district7crash[district7crash$Year == '2017' & district7crash$Measure.Names == "Serious Injuries", c("Year","Month","Measure.Names","Measure.Values")]

d7si2018 <- district7crash[district7crash$Year == '2018' & district7crash$Measure.Names == "Serious Injuries", c("Year","Month","Measure.Names","Measure.Values")]

d7si2019 <- district7crash[district7crash$Year == '2019' & district7crash$Measure.Names == "Serious Injuries", c("Year","Month","Measure.Names","Measure.Values")]

d7si2020 <- district7crash[district7crash$Year == '2020' & district7crash$Measure.Names == "Serious Injuries", c("Year","Month","Measure.Names","Measure.Values")]

d7siT <- rbind(d7si2017,d7si2018,d7si2019,d7si2020)

##Aggregate Data

#Aggregate Serious Injuries 

si2017 <- sum(d7si2017$Measure.Values)

si2018 <- sum(d7si2018$Measure.Values)

si2019 <- sum(d7si2019$Measure.Values)

si2020 <- sum(d7si2020$Measure.Values)


#Aggregate Fatalities

f2017 <- sum(d7f2017$Measure.Values)

f2018 <- sum(d7f2018$Measure.Values)

f2019 <- sum(d7f2019$Measure.Values)

f2020 <- sum(d7f2020$Measure.Values)


#Aggregate Crashes

c2017 <- sum(d7c2017$Measure.Values)

c2018 <- sum(d7c2018$Measure.Values)

c2019 <- sum(d7c2019$Measure.Values)

c2020 <- sum(d7c2020$Measure.Values)


#Merged Data

Year <- c(2017,2018,2019,2020)

TotalCrashes <- c(c2017,c2018,c2019,c2020)

TotalFatalities <- c(f2017,f2018,f2019,f2020)

TotalSeriousInjuries <- c(si2017,si2018,si2019,si2020)

compositedata <- cbind(Year,TotalCrashes,TotalFatalities,TotalSeriousInjuries)

finalcomposite <- as.data.frame(compositedata)

view(finalcomposite)


#Visuals 

district7crash$Date <- as.yearmon(paste(district7crash$Year, district7crash$Month), "%Y %m")

crashgraph <- ggplot(district7crash, aes(x = Date, y = Measure.Values , fill = Measure.Names)) +

    geom_col() + 

    geom_smooth(method="auto", se=TRUE, fullrange=FALSE, level=0.95) +

    facet_wrap("Measure.Names", scales = "free_y", ncol = 1) +

    scale_y_continuous(labels = label_comma()) +

    theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +

    ggtitle("FDOT District 7 Accidents and Outcomes 2017-2020") +

    ylab("Number of Occurances")


###Transit Visuals

transitgraph <- ggplot(d7transit, aes(x = Year, y = Measure.Values , fill = Measure.Names)) +

  geom_bar(stat = "identity") + 

  facet_wrap("Measure.Names", scales = "free_y", ncol = 1) +

  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +

  ggtitle("FDOT District 7 Transit Usage by Method 2017-2020") +

  ylab("Number of Occurances") +

  scale_y_continuous(labels = scales::label_number_si())


#Transit Histogram

transithistogram <- ggplot(d7transitclean, aes(x = Date, y = `Number of Users` , fill = `Mode of Transportation`)) +

  geom_bar(position = "dodge", stat = "identity") +

  ggtitle("Regression over Bus versus Total Public Transport Ridership District 7") +

  geom_smooth(method="auto", se=TRUE, fullrange=FALSE, level=0.95) +

  scale_y_continuous(labels = label_comma())


#Transit Regression

d7trandate <- d7transit

d7trandate$Date <- as.yearmon(paste(d7trandate$Year, d7trandate$Month), "%Y %m")

colnames(d7trandate) <- c("Year","Month","Mode of Transportation","Number of Users","Date")


transitgraphregression <- ggplot(d7trandate, aes(x = Date, y = `Number of Users` , fill = `Mode of Transportation`)) +

  geom_bar(stat = "identity") + 

  geom_smooth(method="auto", se=TRUE, fullrange=FALSE, level=0.95) +

  facet_wrap("`Mode of Transportation`", scales = "free_y", ncol = 1) +

  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +

  ggtitle("FDOT District 7 Transit Use Method 2017-2020; with Regression") +

  ylab("Number of Occurances") +

  scale_y_continuous(labels = label_comma())

transitgraphregression


##Ratio Visuals 

fatalitiestocrash <- (finalcomposite$TotalFatalities / finalcomposite$TotalCrashes) 

fatalitiestoinjury <- (finalcomposite$TotalFatalities / finalcomposite$TotalSeriousInjuries) 

injurytocrash <- (finalcomposite$TotalSeriousInjuries / finalcomposite$TotalCrashes) 

Year <- c(2017,2018,2019,2020)

ratio.df <- data.frame(Year,fatalitiestoinjury,fatalitiestocrash,injurytocrash)

view(ratio.df)


##Pie chart

#max limits


`Private Motor Vehichles` <- 1600

`Mixed Traffic` <- 2800

`Two Way Protected Bike Lane` <- 7500

`Dedicated Transit Lines` <- 8000

Sidewalk <- 9000

`On Street Transitway (Bus & Rail)` <- 25000

Total <- 53900


`Mode of Transport` <- c(`Private Motor Vehichles`, `Mixed Traffic`, `Two Way Protected Bike Lane`, `Dedicated Transit Lines`, Sidewalk, `On Street Transitway (Bus & Rail)`)

piedata <- as.data.frame(`Mode of Transport`)

piedata$Method <- c("Private Motor Vehichles", "Mixed Traffic", "Two Way Protected Bike Lane", "Dedicated Transit Lines", "Sidewalk", "On Street Transitway (Bus & Rail)")

row.names(piedata) <- c("Private Motor Vehichles", "Mixed Traffic", "Two Way Protected Bike Lane", "Dedicated Transit Lines", "Sidewalk", "On Street Transitway (Bus & Rail)")


custom.col <- c("#FFDB6D", "#C4961A", "#F4EDCA", 

                "#D16103", "#C3D7A4", "#52854C")


piechart <- ggplot(piedata, aes(x="", y = `Mode of Transport`, fill = Method)) +

  geom_bar(stat = "identity", width = 1) +

  coord_polar("y", start = 0) +

  geom_text(aes(label = paste0(`Mode of Transport`)), position = position_stack(vjust=0.5)) +

  theme(text=element_text(size=6)) +

  scale_fill_manual(values=custom.col) +

  theme_void() +

  ggtitle("Predicted Max Limit of People That Could Use Transit in Peak Hours")


## Visual Calls

piechart

transitgraph

transithistogram

crashgraph

transitgraphregression


Comments

Popular posts from this blog

R Package: pfStat

Module 7 Assignment

Visual Analytics Final Project: Portuguese Wine and Which Components Correlate to Higher Quality