pitching_data$yearID <- as.factor(pitching_data$yearID)
ggplot(pitching_data)+geom_boxplot(aes(x=yearID, y=ERA))
## Warning: Removed 90 rows containing non-finite values (stat_boxplot).
summary_pitching <- summarize(group_by(pitching_data, yearID), median=median(ERA,na.rm=T), Q1=quantile(ERA, .25,na.rm=T), Q3=quantile(ERA, .75,na.rm=T))
summary_pitching$yearID <- as.numeric(as.character(summary_pitching$yearID))
ggplot(summary_pitching)+geom_line(aes(x=yearID,y=median))
ggplot(summary_pitching)+geom_ribbon(aes(x=yearID, ymin=Q1, ymax=Q3),fill="lightgreen")+geom_line(aes(x=yearID,y=median),color="darkblue")
pitching_data$ERA <- as.numeric(pitching_data$ERA)
pitched_ten_games <- filter(pitching_data, G >= 10)
summary_pitching_2 <- summarize(group_by(pitched_ten_games, yearID), three_era_proportion = mean(ERA <= 3), six_era_proportion = mean(ERA >= 6))
summary_pitching_2$yearID <- as.numeric(as.character(summary_pitching_2$yearID))
ggplot(summary_pitching_2, aes(x=yearID))+
geom_line(aes(y=three_era_proportion,color="3 or under"))+
geom_line(aes(y=six_era_proportion,color="6 or higher"))+
theme_classic()+
scale_color_manual(values=c("3 or under"="darkblue","6 or higher"="red"),name="ERA")+
labs(y="Proportion",x="Year", title="Proportion of Pitchers (pitching at least 10 games)\n With Low and High ERAs by Year")
player_data$playerID <- as.character(player_data$playerID)
salary_data$playerID <- as.character(salary_data$playerID)
joined_players_salary <- inner_join(player_data,salary_data,by="playerID")
joined_players_salary$playerID <- as.factor(joined_players_salary$playerID)
joined_players_salary <- mutate(joined_players_salary, usa_born=ifelse(birthCountry=="USA","Born in USA",ifelse(birthCountry!="USA","Born outside USA","NA")))
summary_playerdata <- summarize(group_by(joined_players_salary, yearID,usa_born), median=median(salary, na.rm=T), Q1 = quantile(salary,.25,na.rm=T), Q3=quantile(salary,.75,na.rm=T))
names(inflation_index)[1] <- "yearID"
summary_playerdata_left <- left_join(summary_playerdata, inflation_index, by="yearID")
summary_playerdata_left[summary_playerdata_left$yearID==2015,"inflation2015"]<-1
summary_player_data <- mutate(summary_playerdata_left, median_inflation_adjusted = median*inflation2015, Q1_inflation_adjusted = Q1*inflation2015, Q3_inflation_adjusted = Q3*inflation2015)
ggplot(summary_player_data)+
geom_ribbon(aes(x=yearID, ymin=Q1_inflation_adjusted, ymax=Q3_inflation_adjusted, fill=usa_born),alpha=.4)+
geom_line(aes(x=yearID, y=median_inflation_adjusted, color=usa_born),size=1)+
scale_y_continuous(labels = scales::dollar)+
scale_color_discrete("Median Salary")+
scale_fill_discrete("Middle 50% of Earners")+
labs(title="Salaries of Middle 50% of Earners in Major League Baseball", x="Year",y="Annual Salary\n(Adjusted for Inflation")+
theme_minimal()