pitching_data$yearID <- as.factor(pitching_data$yearID)


ggplot(pitching_data)+geom_boxplot(aes(x=yearID, y=ERA))
## Warning: Removed 90 rows containing non-finite values (stat_boxplot).

summary_pitching <- summarize(group_by(pitching_data, yearID), median=median(ERA,na.rm=T), Q1=quantile(ERA, .25,na.rm=T), Q3=quantile(ERA, .75,na.rm=T))
summary_pitching$yearID <- as.numeric(as.character(summary_pitching$yearID))
ggplot(summary_pitching)+geom_line(aes(x=yearID,y=median))

ggplot(summary_pitching)+geom_ribbon(aes(x=yearID, ymin=Q1, ymax=Q3),fill="lightgreen")+geom_line(aes(x=yearID,y=median),color="darkblue")

pitching_data$ERA <- as.numeric(pitching_data$ERA)

pitched_ten_games <- filter(pitching_data, G >= 10)


summary_pitching_2 <- summarize(group_by(pitched_ten_games, yearID), three_era_proportion = mean(ERA <= 3), six_era_proportion = mean(ERA >= 6))

summary_pitching_2$yearID <- as.numeric(as.character(summary_pitching_2$yearID))

ggplot(summary_pitching_2, aes(x=yearID))+
  geom_line(aes(y=three_era_proportion,color="3 or under"))+
  geom_line(aes(y=six_era_proportion,color="6 or higher"))+
  theme_classic()+
  scale_color_manual(values=c("3 or under"="darkblue","6 or higher"="red"),name="ERA")+
  labs(y="Proportion",x="Year", title="Proportion of Pitchers (pitching at least 10 games)\n With Low and High ERAs by Year")

player_data$playerID <- as.character(player_data$playerID)
salary_data$playerID <- as.character(salary_data$playerID)

joined_players_salary <- inner_join(player_data,salary_data,by="playerID")

joined_players_salary$playerID <- as.factor(joined_players_salary$playerID)

joined_players_salary <- mutate(joined_players_salary, usa_born=ifelse(birthCountry=="USA","Born in USA",ifelse(birthCountry!="USA","Born outside USA","NA")))



summary_playerdata <- summarize(group_by(joined_players_salary, yearID,usa_born), median=median(salary, na.rm=T), Q1 = quantile(salary,.25,na.rm=T), Q3=quantile(salary,.75,na.rm=T))


names(inflation_index)[1] <- "yearID"


summary_playerdata_left <- left_join(summary_playerdata, inflation_index, by="yearID")

summary_playerdata_left[summary_playerdata_left$yearID==2015,"inflation2015"]<-1

summary_player_data <- mutate(summary_playerdata_left, median_inflation_adjusted = median*inflation2015, Q1_inflation_adjusted = Q1*inflation2015, Q3_inflation_adjusted = Q3*inflation2015)

ggplot(summary_player_data)+
  geom_ribbon(aes(x=yearID, ymin=Q1_inflation_adjusted, ymax=Q3_inflation_adjusted, fill=usa_born),alpha=.4)+
  geom_line(aes(x=yearID, y=median_inflation_adjusted, color=usa_born),size=1)+
  scale_y_continuous(labels = scales::dollar)+
  scale_color_discrete("Median Salary")+
  scale_fill_discrete("Middle 50% of Earners")+
  labs(title="Salaries of Middle 50% of Earners in Major League Baseball", x="Year",y="Annual Salary\n(Adjusted for Inflation")+
  theme_minimal()