> payroll_tab %>%
+ filter(teamID %in% c("OAK", "BOS", "NYA", "ATL", "TBA")) %>%
+ ggplot(aes(x=yearID, y=efficiency)) +
+ geom_smooth() +
+ geom_point(aes(colour=teamID)) +
+ xlab("Year") +
+ ylab("Winning Efficiency") +
+ ggtitle("Efficiency of Teams Over Time") +
+ labs(colour="Team")
> payroll_tab %>%
+ filter(teamID %in% c("OAK", "BOS", "NYA", "ATL", "TBA")) %>%
+ ggplot(aes(x=yearID, y=efficiency, color = teamID)) +
+ geom_smooth() +
+ #geom_point(aes(colour=teamID)) +
+ xlab("Year") +
+ ylab("Winning Efficiency") +
+ ggtitle("Efficiency of Teams Over Time") +
+ labs(colour="Team")
> save.image("~/project2/project2_env.RData")
> install.packages("UScensus2010")
Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/src/contrib/UScensus2010_0.11.tar.gz'
Content type 'application/x-gzip' length 63114 bytes (61 KB)
==================================================
downloaded 61 KB
* installing *source* package ‘UScensus2010’ ...
** package ‘UScensus2010’ successfully unpacked and MD5 sums checked
** R
** data
** inst
** preparing package for lazy loading
** help
*** installing help indices
** building package indices
** testing if installed package can be loaded
* DONE (UScensus2010)
The downloaded source packages are in
‘/tmp/Rtmp9uHv2u/downloaded_packages’
> library(UScensus2010)
Loading required package: maptools
Loading required package: sp
Checking rgeos availability: TRUE
Loading required package: foreign
Package UScensus2010: US Census 2010 Suite of R Packages
Version 0.11 created on 2011-11-18.
Zack Almquist, University of California-Irvine
ne
For citation information, type citation("UScensus2010").
Type help(package=UScensus2010) to get started.
> help(package="UScensus2010")
> data(maryland)
Warning message:
In data(maryland) : data set ‘maryland’ not found
> data("maryland")
Warning message:
In data("maryland") : data set ‘maryland’ not found
> ?UScensus2010::county
> ?UScensus2010::state
No documentation for ‘state’ in specified packages and libraries:
you could try ‘??state’
> ?UScensus2010::city
> md <- UScensus2010::city("Maryland")
Error in if (!statefips & nchar(state) == 2) { :
argument is of length zero
> md <- UScensus2010::city(",aryland")
Error in if (!statefips & nchar(state) == 2) { :
argument is of length zero
> md <- UScensus2010::city(state="maryland")
Error in get(paste(state, ".cdp10", sep = "")) :
object 'maryland.cdp10' not found
In addition: Warning message:
In data(list = paste(state, ".cdp10", sep = ""), envir = parent.frame()) :
data set ‘maryland.cdp10’ not found
> md <- UScensus2010::city(state="montgomery")
Error in city.aux(name, state, statefips, sp.object, proj) :
Not a State!
> md <- UScensus2010::city("montgomery", state="maryland")
Error in get(paste(state, ".cdp10", sep = "")) :
object 'maryland.cdp10' not found
In addition: Warning message:
In data(list = paste(state, ".cdp10", sep = ""), envir = parent.frame()) :
data set ‘maryland.cdp10’ not found
02 Nov 2016 15:07:22 [rsession-rstudio] ERROR session hadabend; LOGGED FROM: rstudio::core::Error {anonymous}::rInit(const rstudio::r::session::RInitInfo&) /home/ubuntu/rstudio/src/cpp/session/SessionMain.cpp:1862
Checking rgeos availability: TRUE
Error in library(packageName, lib.loc = lib, character.only = TRUE) :
there is no package called ‘UScensus2010’
02 Nov 2016 15:07:24 [rsession-rstudio] ERROR r error 4 (R code execution error) [errormsg=Error in library(packageName, lib.loc = lib, character.only = TRUE) :
there is no package called ‘UScensus2010’
, context=Error restoring session data (loading package UScensus2010)]; OCCURRED AT: rstudio::core::Error rstudio::r::exec::{anonymous}::evaluateExpressionsUnsafe(SEXP, SEXP, SEXPREC**, rstudio::r::sexp::Protect*) /home/ubuntu/rstudio/src/cpp/r/RExec.cpp:147; LOGGED FROM: void rstudio::r::session::search_path::{anonymous}::loadPackage(const string&, const string&) /home/ubuntu/rstudio/src/cpp/r/session/RSearchPath.cpp:198
Error restoring session data (loading package UScensus2010): R code execution error
> payroll_tab %>%
+ filter(yearID >=1990 & yearID <= 2014) %>%
+ ggplot(aes(x=yearID, y=payroll)) +
+ geom_line() +
+ facet_wrap(~teamID) +
+ xlab("Year") +
+ ylab("Total Payroll") +
+ ggtitle("Team Payrolls Over Time for Teams") +
+ theme(text = element_text(size=20),
+ axis.text.x = element_text(angle=90, vjust=1))
> payroll_tab %>%
+ filter(yearID >=1990 & yearID <= 2014) %>%
+ ggplot(aes(x=yearID, y=payroll)) +
+ geom_point() +
+ geom_smooth() +
+ xlab("Year") +
+ ylab("Total Payroll") +
+ ggtitle("Payrolls of Teams Over Time")
> payroll_tab %>%
+ group_by(yearID) %>%
+ summarise(avg_payroll = mean(payroll)) %>%
+ ggplot(aes(x=yearID, y=avg_payroll)) +
+ geom_bar(stat = "identity") +
+ xlab("Year") +
+ ylab("Average Payroll of Baseball Teams") +
+ ggtitle("Average Payroll of Baseball Teams over Time") +
+ geom_smooth()
> payroll_tab %>%
+ filter(yearID >=1990 & yearID <= 2014) %>%
+ ggplot(aes(x=yearID, y=payroll)) +
+ geom_line() +
+ facet_wrap(~teamID) +
+ xlab("Year") +
+ ylab("Total Payroll") +
+ ggtitle("Team Payrolls Over Time for Teams") +
+ theme(text = element_text(size=10),
+ axis.text.x = element_text(angle=90, vjust=1))
> payroll_tab %>%
+ filter(yearID >=1990 & yearID <= 2014) %>%
+ ggplot(aes(x=yearID, y=payroll)) +
+ geom_line() +
+ facet_wrap(~teamID) +
+ xlab("Year") +
+ ylab("Total Payroll") +
+ ggtitle("Team Payrolls Over Time for Teams") +
+ theme(text = element_text(),
+ axis.text.x = element_text(angle=90, vjust=1))
> payroll_tab %>%
+ filter(yearID >=1990 & yearID <= 2014) %>%
+ ggplot(aes(x=yearID, y=payroll)) +
+ geom_point() +
+ geom_smooth() +
+ xlab("Year") +
+ ylab("Total Payroll") +
+ ggtitle("Payrolls of Teams Over Time")
> ?theme
> payroll_tab %>%
+ group_by(yearID) %>%
+ summarise(avg_payroll = mean(payroll)) %>%
+ ggplot(aes(x=yearID, y=avg_payroll)) +
+ geom_bar(stat = "identity") +
+ xlab("Year") +
+ ylab("Average Payroll of Baseball Teams") +
+ ggtitle("Average Payroll of Baseball Teams over Time") +
+ geom_smooth()
> payroll_tab %>%
+ group_by(yearID) %>%
+ summarise(max_payroll = max(payroll), min_payroll = min(payroll)) %>%
+ ggplot(aes(x = yearID, y = (max_payroll-min_payroll))) +
+ geom_bar(stat = "identity") +
+ xlab("Year") +
+ ylab("Payroll Spread") +
+ ggtitle("Difference in Payroll Between Wealthy and Poor Teams Over Time") +
+ geom_smooth()
> payroll_tab %>%
+ group_by(yearID) %>%
+ summarise(max_payroll = max(payroll), min_payroll = min(payroll)) %>%
+ ggplot(aes(x = yearID, y = (max_payroll-min_payroll))) +
+ geom_bar(stat = "identity") +
+ xlab("Year") +
+ ylab("Payroll Spread") +
+ ggtitle("Difference in Payroll Between Wealthy and Poor Teams Over Time") +
+ geom_smooth() +
+ scale_x_continuous(breaks = (max(payroll_tab$yearID) - min(payroll_tab$yearID)))
> payroll_tab %>%
+ group_by(yearID) %>%
+ summarise(max_payroll = max(payroll), min_payroll = min(payroll)) %>%
+ ggplot(aes(x = yearID, y = (max_payroll-min_payroll))) +
+ geom_bar(stat = "identity") +
+ xlab("Year") +
+ ylab("Payroll Spread") +
+ ggtitle("Difference in Payroll Between Wealthy and Poor Teams Over Time") +
+ geom_smooth() +
+ scale_x_continuous(breaks = 10)
> payroll_tab %>%
+ group_by(yearID) %>%
+ summarise(max_payroll = max(payroll), min_payroll = min(payroll)) %>%
+ ggplot(aes(x = yearID, y = (max_payroll-min_payroll))) +
+ geom_bar(stat = "identity") +
+ xlab("Year") +
+ ylab("Payroll Spread") +
+ ggtitle("Difference in Payroll Between Wealthy and Poor Teams Over Time") +
+ geom_smooth() +
+ scale_x_continuous(breaks = 10)
> payroll_tab %>%
+ group_by(yearID) %>%
+ summarise(max_payroll = max(payroll), min_payroll = min(payroll)) %>%
+ ggplot(aes(x = yearID, y = (max_payroll-min_payroll))) +
+ geom_bar(stat = "identity") +
+ xlab("Year") +
+ ylab("Payroll Spread") +
+ ggtitle("Difference in Payroll Between Wealthy and Poor Teams Over Time") +
+ geom_smooth()
> payroll_tab %>%
+ group_by(yearID) %>%
+ summarise(max_payroll = max(payroll), min_payroll = min(payroll)) %>%
+ ggplot(aes(x = yearID, y = (max_payroll-min_payroll))) +
+ geom_bar(stat = "identity") +
+ xlab("Year") +
+ ylab("Payroll Spread") +
+ ggtitle("Difference in Payroll Between Wealthy and Poor Teams Over Time") +
+ geom_smooth() +
+ scale_x_continuous(breaks = number_ticks(10))
Error in check_breaks_labels(breaks, labels) :
could not find function "number_ticks"
> ?pretty
> pretty(10,n)
Error in n%/%3 : non-numeric argument to binary operator
> pretty(10,1)
[1] 10
> pretty(10,2)
[1] 10
> pretty(10,3)
[1] 0 10
> pretty(limits,10)
Error in pretty(limits, 10) : object 'limits' not found
> ?pretty_breaks
> pretty_breaks(3)
Error: could not find function "pretty_breaks"
> payroll_tab %>%
+ group_by(yearID) %>%
+ summarise(max_payroll = max(payroll), min_payroll = min(payroll)) %>%
+ ggplot(aes(x = yearID, y = (max_payroll-min_payroll))) +
+ geom_bar(stat = "identity") +
+ xlab("Year") +
+ ylab("Payroll Spread") +
+ ggtitle("Difference in Payroll Between Wealthy and Poor Teams Over Time") +
+ geom_smooth() +
+ scale_x_continuous(breaks = pretty_breaks(10))
Error in check_breaks_labels(breaks, labels) :
could not find function "pretty_breaks"
> payroll_tab %>%
+ group_by(yearID) %>%
+ summarise(max_payroll = max(payroll), min_payroll = min(payroll)) %>%
+ ggplot(aes(x = yearID, y = (max_payroll-min_payroll))) +
+ geom_bar(stat = "identity") +
+ xlab("Year") +
+ ylab("Payroll Spread") +
+ ggtitle("Difference in Payroll Between Wealthy and Poor Teams Over Time") +
+ geom_smooth() +
+ scale_x_continuous(breaks = scales::pretty_breaks(10))
> payroll_tab %>%
+ group_by(yearID) %>%
+ summarise(max_payroll = max(payroll), min_payroll = min(payroll)) %>%
+ ggplot(aes(x = yearID, y = (max_payroll-min_payroll))) +
+ geom_bar(stat = "identity") +
+ xlab("Year") +
+ ylab("Payroll Spread") +
+ ggtitle("Difference in Payroll Between Wealthy and Poor Teams Over Time") +
+ geom_smooth() +
+ scale_x_continuous(breaks = scales::pretty_breaks(15))
> ?range
> range(payroll$yearID)
Error: object 'payroll' not found
> range(payroll_tab$yearID)
[1] 1985 2014
> diff(range(payroll_tab$yearID))
[1] 29
> payroll_tab %>%
+ group_by(yearID) %>%
+ summarise(max_payroll = max(payroll), min_payroll = min(payroll)) %>%
+ ggplot(aes(x = yearID, y = (max_payroll-min_payroll))) +
+ geom_bar(stat = "identity") +
+ xlab("Year") +
+ ylab("Payroll Spread") +
+ ggtitle("Difference in Payroll Between Wealthy and Poor Teams Over Time") +
+ geom_smooth() +
+ scale_x_continuous(breaks = scales::pretty_breaks(diff(range(payroll_tab$yearID))))
> payroll_tab %>%
+ group_by(yearID) %>%
+ summarise(max_payroll = max(payroll), min_payroll = min(payroll)) %>%
+ ggplot(aes(x = yearID, y = (max_payroll-min_payroll))) +
+ geom_bar(stat = "identity") +
+ xlab("Year") +
+ ylab("Payroll Spread") +
+ ggtitle("Difference in Payroll Between Wealthy and Poor Teams Over Time") +
+ geom_smooth() +
+ scale_x_continuous(breaks = scales::pretty_breaks(diff(range(payroll_tab$yearID)))) +
+ theme(text = element_text(),
+ axis.text.x = element_text(angle=90, vjust=1))
> payroll_tab %>%
+ group_by(yearID) %>%
+ summarise(max_payroll = max(payroll), min_payroll = min(payroll)) %>%
+ ggplot(aes(x = yearID, y = (max_payroll-min_payroll))) +
+ geom_bar(stat = "identity") +
+ xlab("Year") +
+ ylab("Payroll Spread") +
+ ggtitle("Difference in Payroll Between Wealthy and Poor Teams Over Time") +
+ geom_smooth() +
+ scale_x_continuous(breaks = scales::pretty_breaks(diff(range(payroll_tab$yearID)))/2) +
+ theme(text = element_text(),
+ axis.text.x = element_text(angle=90, vjust=1))
Error in scales::pretty_breaks(diff(range(payroll_tab$yearID)))/2 :
non-numeric argument to binary operator
> payroll_tab %>%
+ group_by(yearID) %>%
+ summarise(max_payroll = max(payroll), min_payroll = min(payroll)) %>%
+ ggplot(aes(x = yearID, y = (max_payroll-min_payroll))) +
+ geom_bar(stat = "identity") +
+ xlab("Year") +
+ ylab("Payroll Spread") +
+ ggtitle("Difference in Payroll Between Wealthy and Poor Teams Over Time") +
+ geom_smooth() +
+ scale_x_continuous(breaks = scales::pretty_breaks(diff(range(payroll_tab$yearID)))) +
+ theme(text = element_text(),
+ axis.text.x = element_text(angle=90, vjust=1))
> payroll_tab %>%
+ group_by(yearID) %>%
+ summarise(max_payroll = max(payroll), min_payroll = min(payroll)) %>%
+ ggplot(aes(x = yearID, y = (max_payroll-min_payroll))) +
+ geom_bar(stat = "identity") +
+ xlab("Year") +
+ ylab("Payroll Spread") +
+ ggtitle("Difference in Payroll Between Wealthy and Poor Teams Over Time") +
+ geom_smooth() +
+ scale_x_continuous(breaks = scales::pretty_breaks(20)) +
+ theme(text = element_text(),
+ axis.text.x = element_text(angle=90, vjust=1))
> payroll_tab %>% sample_n(10) %>% select(teamID, yearID,year_range)
# A tibble: 10 × 3
teamID yearID year_range
<chr> <int> <fctr>
1 CAL 1991 (1991,1997]
2 CHN 2006 (2002,2008]
3 DET 2001 (1997,2002]
4 NYA 1993 (1991,1997]
5 SEA 1986 (1985,1991]
6 CHN 1988 (1985,1991]
7 ARI 2009 (2008,2014]
8 HOU 1999 (1997,2002]
9 SFN 1985 (1985,1991]
10 SEA 2010 (2008,2014]
> avg_stats_per_year %>% sample_n(1) %>% select(teamID,average_pay_in_years,average_win_percent_in_years,year_range)
Source: local data frame [0 x 4]
Groups: year_range [0]
# ... with 4 variables: teamID <chr>, average_pay_in_years <dbl>, average_win_percent_in_years <dbl>,
# year_range <fctr>
> avg_stats_per_year %>% select(teamID,average_pay_in_years,average_win_percent_in_years,year_range)
Source: local data frame [148 x 4]
Groups: year_range [5]
teamID average_pay_in_years average_win_percent_in_years year_range
<chr> <dbl> <dbl> <fctr>
1 ATL 14475059 40.22038 (1985,1991]
2 BAL 11658262 45.40360 (1985,1991]
3 BOS 14563356 52.89024 (1985,1991]
4 CAL 15077312 51.74897 (1985,1991]
5 CHA 9008958 48.18396 (1985,1991]
6 CHN 13605046 48.44389 (1985,1991]
7 CIN 10646369 52.73049 (1985,1991]
8 CLE 9232153 44.49431 (1985,1991]
9 DET 13402658 50.97960 (1985,1991]
10 HOU 13020061 51.23457 (1985,1991]
# ... with 138 more rows
> avg_stats_per_year
Source: local data frame [148 x 4]
Groups: year_range [5]
year_range teamID average_pay_in_years average_win_percent_in_years
<fctr> <chr> <dbl> <dbl>
1 (1985,1991] ATL 14475059 40.22038
2 (1985,1991] BAL 11658262 45.40360
3 (1985,1991] BOS 14563356 52.89024
4 (1985,1991] CAL 15077312 51.74897
5 (1985,1991] CHA 9008958 48.18396
6 (1985,1991] CHN 13605046 48.44389
7 (1985,1991] CIN 10646369 52.73049
8 (1985,1991] CLE 9232153 44.49431
9 (1985,1991] DET 13402658 50.97960
10 (1985,1991] HOU 13020061 51.23457
# ... with 138 more rows
> avg_stats_per_year %>% sample_n(10)
Source: local data frame [50 x 4]
Groups: year_range [5]
year_range teamID average_pay_in_years average_win_percent_in_years
<fctr> <chr> <dbl> <dbl>
1 (1985,1991] BOS 14563356 52.89024
2 (1985,1991] PHI 11807505 46.30896
3 (1985,1991] TOR 13027137 55.87318
4 (1985,1991] SEA 7085071 45.00230
5 (1985,1991] CLE 9232153 44.49431
6 (1985,1991] CHA 9008958 48.18396
7 (1985,1991] OAK 12618240 55.55556
8 (1985,1991] LAN 16466313 51.33359
9 (1985,1991] CAL 15077312 51.74897
10 (1985,1991] SFN 11856147 50.92593
# ... with 40 more rows
> avg_stats_per_year %>% sample_n(10)
Source: local data frame [50 x 4]
Groups: year_range [5]
year_range teamID average_pay_in_years average_win_percent_in_years
<fctr> <chr> <dbl> <dbl>
1 (1985,1991] DET 13402658 50.97960
2 (1985,1991] HOU 13020061 51.23457
3 (1985,1991] NYA 17883336 51.80968
4 (1985,1991] ATL 14475059 40.22038
5 (1985,1991] TOR 13027137 55.87318
6 (1985,1991] TEX 7901650 47.40945
7 (1985,1991] BOS 14563356 52.89024
8 (1985,1991] MON 11252155 51.49272
9 (1985,1991] CLE 9232153 44.49431
10 (1985,1991] PHI 11807505 46.30896
# ... with 40 more rows
> avg_stats_per_year %>%
+ ggplot(
+ aes(x=average_pay_in_years, y=average_win_percent_in_years, label=teamID)) +
+ geom_point() +
+ geom_text() +
+ facet_wrap(~year_range) +
+ xlab("Average Team Payroll") +
+ ylab("Average Winning Percentage") +
+ ggtitle("Average Winning Percentage vs. Average Payroll across Time") +
+ geom_smooth(method = 'lm')
> avg_stats_per_year %>%
+ ggplot(
+ aes(x=average_pay_in_years, y=average_win_percent_in_years, label=teamID)) +
+ geom_text() +
+ facet_wrap(~year_range) +
+ xlab("Average Team Payroll") +
+ ylab("Average Winning Percentage") +
+ ggtitle("Average Winning Percentage vs. Average Payroll across Time") +
+ geom_smooth(method = 'lm')
> avg_stats_per_year %>%
+ ggplot(aes(x=average_pay_in_years, y=average_win_percent_in_years)) +
+ geom_point(aes(colour=ifelse(teamID=="OAK", 'Oakland As', "Other Team"))) +
+ facet_wrap(~year_range) +
+ xlab("Average Team Payroll") +
+ ylab("Average Winning Percentage") +
+ ggtitle("Oakland A's Spending Efficency Over Time") +
+ geom_smooth(method = 'lm') +
+ labs(colour="Team")
> head(standard_payrolls)
# A tibble: 6 × 3
yearID average_payroll_year st_dev_payroll_year
<int> <dbl> <dbl>
1 1985 10075565 2470845
2 1986 11840558 3186956
3 1987 10483668 3848337
4 1988 11555862 3386331
5 1989 13845989 3568844
6 1990 17072354 3771834
> avg_std_stats_per_year %>%
+ ggplot(
+ aes(x=average_pay_in_years, y=average_win_percent_in_years, label=teamID)) +
+ geom_point() +
+ geom_text() +
+ facet_wrap(~year_range) +
+ xlab("Average Standard Team Payroll") +
+ ylab("Average Winning Percentage") +
+ ggtitle("Average Winning Percentage vs. Average Standard Payroll across Time") +
+ geom_smooth(method = 'lm')
> payroll_tab %>%
+ ggplot(
+ aes(x=standard_payroll, y=win_percentage, label=teamID)) +
+ geom_point(aes(colour=yearID)) +
+ #geom_text() +
+ xlab("Standard Team Payroll") +
+ ylab("Winning Percentage") +
+ ggtitle("Winning Percentage vs. Standard Payroll across Time") +
+ geom_smooth(method = 'lm') +
+ labs(colour = "Year")
> payroll_tab %>%
+ ggplot(aes(x=standard_payroll, y=win_percentage, label=teamID)) +
+ geom_point(aes(colour=yearID)) +
+ xlab("Standard Team Payroll") +
+ ylab("Winning Percentage") +
+ ggtitle("Winning Percentage vs. Standard Payroll across Time") +
+ geom_smooth(method = 'lm') +
+ labs(colour = "Year")
> head(payroll_tab %>% select(teamID, yearID,win_percentage, expected_win_pct))
# A tibble: 6 × 4
teamID yearID win_percentage expected_win_pct
<chr> <int> <dbl> <dbl>
1 ATL 1985 40.74074 54.78726
2 BAL 1985 51.55280 51.50267
3 BOS 1985 49.69325 50.83169
4 CAL 1985 55.55556 54.40368
5 CHA 1985 52.14724 49.76791
6 CHN 1985 47.53086 52.65835
> payroll_tab <- payroll_tab %>% mutate(efficiency = win_percentage-expected_win_pct)
> payroll_tab %>%
+ filter(teamID %in% c("OAK", "BOS", "NYA", "ATL", "TBA")) %>%
+ ggplot(aes(x=yearID, y=efficiency)) +
+ geom_smooth() +
+ geom_point(aes(colour=teamID)) +
+ xlab("Year") +
+ ylab("Winning Efficiency") +
+ ggtitle("Efficiency of Teams (Overall) Over Time") +
+ labs(colour="Team")
> payroll_tab %>%
+ filter(teamID %in% c("OAK", "BOS", "NYA", "ATL", "TBA")) %>%
+ ggplot(aes(x=yearID, y=efficiency, color = teamID)) +
+ geom_smooth() +
+ #geom_point(aes(colour=teamID)) +
+ xlab("Year") +
+ ylab("Winning Efficiency") +
+ ggtitle("Efficiency of Specific Teams Over Time") +
+ labs(colour="Team")
> avg_stats_per_year %>% sample_n(10)
Source: local data frame [50 x 4]
Groups: year_range [5]
year_range teamID average_pay_in_years average_win_percent_in_years
<fctr> <chr> <dbl> <dbl>
1 (1985,1991] DET 13402658 50.97960
2 (1985,1991] NYA 17883336 51.80968
3 (1985,1991] MIN 10584470 49.17695
4 (1985,1991] CLE 9232153 44.49431
5 (1985,1991] CHN 13605046 48.44389
6 (1985,1991] KCA 15132358 51.64481
7 (1985,1991] NYN 16158735 59.38786
8 (1985,1991] LAN 16466313 51.33359
9 (1985,1991] ML4 11362523 49.58017
10 (1985,1991] OAK 12618240 55.55556
# ... with 40 more rows
> avg_stats_per_year <- payroll_tab %>%
+ group_by(year_range,teamID) %>%
+ summarise(average_pay_in_years = mean(payroll),
+ average_win_percent_in_years = mean(win_percentage, na.rm=TRUE))
> avg_stats_per_year %>% sample_n(10)
Source: local data frame [0 x 4]
Groups: year_range [0]
# ... with 4 variables: year_range <fctr>, teamID <chr>, average_pay_in_years <dbl>,
# average_win_percent_in_years <dbl>
> avg_stats_per_year
Source: local data frame [148 x 4]
Groups: year_range [?]
year_range teamID average_pay_in_years average_win_percent_in_years
<fctr> <chr> <dbl> <dbl>
1 (1985,1991] ATL 14475059 40.22038
2 (1985,1991] BAL 11658262 45.40360
3 (1985,1991] BOS 14563356 52.89024
4 (1985,1991] CAL 15077312 51.74897
5 (1985,1991] CHA 9008958 48.18396
6 (1985,1991] CHN 13605046 48.44389
7 (1985,1991] CIN 10646369 52.73049
8 (1985,1991] CLE 9232153 44.49431
9 (1985,1991] DET 13402658 50.97960
10 (1985,1991] HOU 13020061 51.23457
# ... with 138 more rows
> head(avg_stats_per_year)
Source: local data frame [6 x 4]
Groups: year_range [1]
year_range teamID average_pay_in_years average_win_percent_in_years
<fctr> <chr> <dbl> <dbl>
1 (1985,1991] ATL 14475059 40.22038
2 (1985,1991] BAL 11658262 45.40360
3 (1985,1991] BOS 14563356 52.89024
4 (1985,1991] CAL 15077312 51.74897
5 (1985,1991] CHA 9008958 48.18396
6 (1985,1991] CHN 13605046 48.44389
> avg_stats_per_year %>%
+ ggplot(
+ aes(x=average_pay_in_years, y=average_win_percent_in_years, label=teamID)) +
+ geom_point() +
+ geom_text() +
+ facet_wrap(~year_range) +
+ xlab("Average Team Payroll") +
+ ylab("Average Winning Percentage") +
+ ggtitle("Average Winning Percentage vs. Average Payroll across Time") +
+ geom_smooth(method = 'lm')
> avg_stats_per_year %>%
+ ggplot(aes(x=average_pay_in_years, y=average_win_percent_in_years, label=teamID)) +
+ geom_point() +
+ geom_text() +
+ facet_wrap(~year_range) +
+ xlab("Average Team Payroll") +
+ ylab("Average Winning Percentage") +
+ ggtitle("Average Winning Percentage vs. Average Payroll across Time") +
+ geom_smooth(method = 'lm') +
+ theme(text = element_text(),
+ axis.text.x = element_text(angle=90, vjust=1))
> avg_stats_per_year %>%
+ ggplot(aes(x=average_pay_in_years, y=average_win_percent_in_years)) +
+ geom_point(aes(colour=ifelse(teamID=="OAK", 'Oakland As', "Other Teams"))) +
+ facet_wrap(~year_range) +
+ xlab("Average Team Payroll") +
+ ylab("Average Winning Percentage") +
+ ggtitle("Oakland A's Spending Efficency Over Time") +
+ geom_smooth(method = 'lm') +
+ labs(colour="Team") +
+ theme(text = element_text(),
+ axis.text.x = element_text(angle=90, vjust=1))
> payroll_tab %>%
+ ggplot(aes(x=standard_payroll, y=win_percentage, label=teamID)) +
+ geom_point(aes(colour=Rank)) +
+ xlab("Standard Team Payroll") +
+ ylab("Winning Percentage") +
+ ggtitle("Winning Percentage vs. Standard Payroll across Time") +
+ geom_smooth(method = 'lm') +
+ labs(colour = "Year")
> payroll_tab %>%
+ ggplot(aes(x=standard_payroll, y=win_percentage, label=teamID)) +
+ geom_point(aes(colour=Rank)) +
+ xlab("Standard Team Payroll") +
+ ylab("Winning Percentage") +
+ ggtitle("Winning Percentage vs. Standard Payroll across Time") +
+ geom_smooth(method = 'lm') +
+ labs(colour = "Rank") +
+ scale_colour_gradient(low="red", high="blue")
> payroll_tab %>%
+ ggplot(aes(x=standard_payroll, y=win_percentage, label=teamID)) +
+ geom_point(aes(colour=Rank)) +
+ xlab("Standard Team Payroll") +
+ ylab("Winning Percentage") +
+ ggtitle("Winning Percentage vs. Standard Payroll across Time") +
+ geom_smooth(method = 'lm') +
+ labs(colour = "Rank") +
+ scale_colour_gradient(low="green", high="red")
> payroll_tab %>%
+ filter(yearID >=1990 & yearID <= 2014) %>%
+ ggplot(aes(x=yearID, y=payroll)) +
+ geom_line() +
+ facet_wrap(~teamID) +
+ xlab("Year") +
+ ylab("Total Payroll") +
+ ggtitle("Team Payrolls Over Time for Teams") +
+ theme(text = element_text(size = 10),
+ axis.text.x = element_text(angle=90, vjust=1))
> ?rvest
No documentation for ‘rvest’ in specified packages and libraries:
you could try ‘??rvest’
> ??rvest
> payroll_tab %>% select(teamID, yearID, payroll_tab, average_payroll_year, st_dev_payroll_year)
Error: All select() inputs must resolve to integer column positions.
The following do not:
* payroll_tab
> payroll_tab %>% select(teamID, yearID, payroll_tab, average_payroll_year, st_dev_payroll_year)
Error: All select() inputs must resolve to integer column positions.
The following do not:
* payroll_tab
> payroll_tab %>% select(teamID, yearID, average_payroll_year, st_dev_payroll_year)
# A tibble: 858 × 4
teamID yearID average_payroll_year st_dev_payroll_year
<chr> <int> <dbl> <dbl>
1 ATL 1985 10075565 2470845
2 BAL 1985 10075565 2470845
3 BOS 1985 10075565 2470845
4 CAL 1985 10075565 2470845
5 CHA 1985 10075565 2470845
6 CHN 1985 10075565 2470845
7 CIN 1985 10075565 2470845
8 CLE 1985 10075565 2470845
9 DET 1985 10075565 2470845
10 HOU 1985 10075565 2470845
# ... with 848 more rows
> payroll_tab %>% select(teamID, yearID, average_payroll_year, st_dev_payroll_year) %>% sample_n(5)
# A tibble: 5 × 4
teamID yearID average_payroll_year st_dev_payroll_year
<chr> <int> <dbl> <dbl>
1 SEA 2013 101150855 48830287
2 ATL 1997 40260210 13060728
3 DET 2002 67469251 24692193
4 TEX 1995 33981049 9447998
5 TOR 2009 88824233 33857093
> head(standard_payrolls)
# A tibble: 6 × 3
yearID average_payroll_year st_dev_payroll_year
<int> <dbl> <dbl>
1 1985 10075565 2470845
2 1986 11840558 3186956
3 1987 10483668 3848337
4 1988 11555862 3386331
5 1989 13845989 3568844
6 1990 17072354 3771834
> payroll_tab %>% select(teamID, yearID, average_payroll_year, st_dev_payroll_year) %>% head()
# A tibble: 6 × 4
teamID yearID average_payroll_year st_dev_payroll_year
<chr> <int> <dbl> <dbl>
1 ATL 1985 10075565 2470845
2 BAL 1985 10075565 2470845
3 BOS 1985 10075565 2470845
4 CAL 1985 10075565 2470845
5 CHA 1985 10075565 2470845
6 CHN 1985 10075565 2470845
> payroll_tab %>%
+ select(teamID, yearID, average_payroll_year, st_dev_payroll_year) %>%
+ sample_n(5)
# A tibble: 5 × 4
teamID yearID average_payroll_year st_dev_payroll_year
<chr> <int> <dbl> <dbl>
1 MON 1991 23578785 6894669
2 CHN 2014 99800016 45705053
3 BAL 2000 55537837 21416220
4 TOR 1996 34177984 10688535
5 ML4 1994 33137010 8528749
Checking rgeos availability: TRUE
> payroll_tab %>%
+ filter(yearID >=1990 & yearID <= 2014) %>%
+ ggplot(aes(x=yearID, y=payroll)) +
+ geom_line() +
+ facet_wrap(~teamID) +
+ xlab("Year") +
+ ylab("Total Payroll") +
+ ggtitle("Team Payrolls Over Time for Teams") +
+ theme(text = element_text(size = 7.5),
+ axis.text.x = element_text(angle=90, vjust=1))
|