Decathlon data set comes from FactoMineR package and represents two competitions: Decastar and Olympic Games.
Department of statistics and computer science, Agrocampus Rennes
library(readr)
library(here)
library(janitor)
library(tidyverse)
library(data.table)
decathlon <-
here("clean_data/clean_data.rds") %>% 
    read_rds()
"raw_data/decathlon.rds" %>% 
    here() %>%
    read_rds() %>% 
    data.table()
decathlon %>% 
    data.table()
Finding the longest long jump in the data
decathlon %>%
    group_by(event) %>% 
    filter(event == "long_jump",
           event_points == max(event_points)) %>% 
    select(-ranking:-overall_competition_points)
Finding the average 100m time for each competition
decathlon %>% 
        filter(event == "100m_sprint") %>%
    group_by(competition) %>%
        summarise(average_100m_time = round(mean(event_points), 2))
Finding the competitor with the highest total points across both competitions
decathlon %>% 
    group_by(competitor) %>%
    summarise(total_competition_points = sum(overall_competition_points)) %>% 
    filter(total_competition_points == max(total_competition_points)) %>% 
    head(3)
## `summarise()` ungrouping output (override with `.groups` argument)
Finding the shot-put scores for the top three competitors in each competition
decathlon %>% 
    select(-overall_competition_points, -ranking) %>% 
    filter(event == "shot_put")%>% 
    group_by(competition) %>%
    top_n(3, event_points) %>% 
    arrange(desc(event_points))
Calculating the average points for competitors who ran the 400m in less than 50 seconds vs. those that ran 400m in more than 50 seconds
decathlon %>% 
    filter(event == "400m_sprint")%>%
    group_by(event_points<50) %>% 
    summarise(average_points = round(mean(overall_competition_points))) %>%
  arrange(desc(average_points))
## `summarise()` ungrouping output (override with `.groups` argument)