### PowRful ### # Last edit: 2016-12-28 # Manny # Dependencies require(dplyr) require(ggplot2) require(glmnet) require(doMC) require(Kmisc) # Load data load(url("http://fenwicka.com/shiny/powrful.RData")) # Functions do_shooter <- function(shooter, data) { vect <- 1*(data$Shooter == shooter) vect[which(is.na(vect) == TRUE)] <- 0 return(vect) } do_goalie <- function(goalie, data) { vect <- 1*(data$Goalie == goalie) vect[which(is.na(vect) == TRUE)] <- 0 return(vect) } do_strength <- function(strength, data) { vect <- 1*(data$Strength.State == strength) vect[which(is.na(vect) == TRUE)] <- 0 return(vect) } do_score <- function(score, data) { vect <- 1*(data$Score.Cat == score) vect[which(is.na(vect) == TRUE)] <- 0 return(vect) } ### TEAM CORSI ### # Subset 5v5 pbp %>% filter(Strength.State == "5v5") -> pbp_5 # Compile shots bind_rows(pbp_5 %>% group_by(Home.Team) %>% rename(Team = Home.Team) %>% summarise(Venue = "Home", GP = length(unique(Game.ID)), TOI = sum(na.omit(Event.Length))/60, CF = sum(Event %in% c("GOAL", "SHOT", "MISS", "BLOCK") & ev.team == Team), CA = sum(Event %in% c("GOAL", "SHOT", "MISS", "BLOCK") & ev.team == Away.Team), GF = sum(Event == "GOAL" & ev.team == Team), GA = sum(Event == "GOAL" & ev.team == Away.Team) ), pbp_5 %>% group_by(Away.Team) %>% rename(Team = Away.Team) %>% summarise(Venue = "Away", GP = length(unique(Game.ID)), TOI = sum(na.omit(Event.Length))/60, CF = sum(Event %in% c("GOAL", "SHOT", "MISS", "BLOCK") & ev.team == Team), CA = sum(Event %in% c("GOAL", "SHOT", "MISS", "BLOCK") & ev.team == Home.Team), GF = sum(Event == "GOAL" & ev.team == Team), GA = sum(Event == "GOAL" & ev.team == Home.Team) ) ) %>% data.frame() %>% group_by(Team) %>% summarise(GP = sum(GP), TOI = sum(TOI), CF = sum(CF), CA = sum(CA), GF = sum(GF), GA = sum(GA) ) %>% mutate(Corsi = CF/(CF + CA), Goals = GF/(GF + GA) ) %>% data.frame() -> team_corsi # Top 5 teams team_corsi %>% arrange(desc(Corsi)) %>% mutate(Corsi = scales::percent(Corsi)) %>% select(Team, GP, CF, CA, Corsi) %>% slice(1:5) # Bottom 5 teams team_corsi %>% arrange(Corsi) %>% mutate(Corsi = scales::percent(Corsi)) %>% select(Team, GP, CF, CA, Corsi) %>% slice(1:5) # Scatter plot team_corsi %>% mutate(CF60 = CF/TOI*60, CA60 = CA/TOI*60, GF60 = GF/TOI*60, GA60 = GA/TOI*60 ) -> newdata ggplot(newdata, aes(x = CF60, y = CA60) ) + geom_point(size = 3, alpha = 0.8, colour = "darkblue" ) + geom_text(aes(label = Team), vjust = -1, size = 3, fontface = "bold" ) + theme_bw() + scale_y_reverse() ## Correlation # Plot data plot(newdata$GF60, newdata$CF60) # Linear regression fit1 <- lm(data = newdata, formula = GF60 ~ CF60 ) summary(fit1) ################## ################## ### PLAYER RATINGS ### # Prepare data bind_rows(pbp %>% filter(Event %in% c("GOAL", "SHOT", "MISS"), Strength.State %in% c("3v4", "3v5", "4v5", "5v5", "4v4", "3v3", "5v3", "5v4", "4v3"), ev.team == Away.Team ) %>% mutate(Goalie = Home.Goalie, Venue = "Away", Score.Cat = -Score.Cat, Strength.State = str_rev(Strength.State) ), pbp %>% filter(Event %in% c("GOAL", "SHOT", "MISS"), Strength.State %in% c("3v4", "3v5", "4v5", "5v5", "4v4", "3v3", "5v3", "5v4", "4v3"), ev.team == Home.Team ) %>% mutate(Goalie = Away.Goalie, Venue = "Home" ) ) %>% data.frame() %>% rename(Shooter = p1) %>% select(Event, Shooter, Goalie, xG, Strength.State, Venue, Score.Cat ) %>% mutate(is.Goal = 1*(Event == "GOAL")) %>% data.frame() -> fulldata ymat <- as.matrix(fulldata$is.Goal) unique(fulldata$Shooter) %>% lapply(do_shooter, data = fulldata ) %>% unlist() %>% matrix(ncol = length(unique(fulldata$Shooter)), byrow = FALSE ) -> shootermat unique(fulldata$Goalie) %>% lapply(do_goalie, data = fulldata ) %>% unlist() %>% matrix(ncol = length(unique(fulldata$Goalie)), byrow = FALSE ) -> goaliemat unique(fulldata$Strength.State) %>% lapply(do_strength, data = fulldata ) %>% unlist() %>% matrix(ncol = length(unique(fulldata$Strength.State)), byrow = FALSE ) -> strengthmat unique(fulldata$Score.Cat) %>% lapply(do_score, data = fulldata ) %>% unlist() %>% matrix(ncol = length(unique(fulldata$Score.Cat)), byrow = FALSE ) -> scoremat xmat <- cbind(shootermat, goaliemat, strengthmat, scoremat, fulldata$xG, as.numeric(as.factor(fulldata$Venue)) ) # Regression registerDoMC(cores = 4) cv <- cv.glmnet(xmat, ymat, family = "binomial", nfolds = 4, nlambda = 100, alpha = 0, parallel = TRUE ) ratings <- as.numeric(exp(coef(cv, s = "lambda.min"))) variables <- c("Intercept", unique(fulldata$Shooter), unique(fulldata$Goalie), unique(fulldata$Strength.State), unique(fulldata$Score.Cat), "xG", "Venue" ) table <- data.frame(cbind(variables, ratings)) shooters <- filter(table, variables %in% fulldata$Shooter) goalies <- filter(table, variables %in% fulldata$Goalie) ###################### ######################