Decathlon data set comes from FactoMineR package and represents two competitions: Decastar and Olympic Games.
Department of statistics and computer science, Agrocampus Rennes
library(readr)
library(here)
library(janitor)
library(tidyverse)
library(data.table)
decathlon <-
here("clean_data/clean_data.rds") %>%
read_rds()
"raw_data/decathlon.rds" %>%
here() %>%
read_rds() %>%
data.table()
decathlon %>%
data.table()
Finding the longest long jump in the data
decathlon %>%
group_by(event) %>%
filter(event == "long_jump",
event_points == max(event_points)) %>%
select(-ranking:-overall_competition_points)
Finding the average 100m time for each competition
decathlon %>%
filter(event == "100m_sprint") %>%
group_by(competition) %>%
summarise(average_100m_time = round(mean(event_points), 2))
Finding the competitor with the highest total points across both competitions
decathlon %>%
group_by(competitor) %>%
summarise(total_competition_points = sum(overall_competition_points)) %>%
filter(total_competition_points == max(total_competition_points)) %>%
head(3)
## `summarise()` ungrouping output (override with `.groups` argument)
Finding the shot-put scores for the top three competitors in each competition
decathlon %>%
select(-overall_competition_points, -ranking) %>%
filter(event == "shot_put")%>%
group_by(competition) %>%
top_n(3, event_points) %>%
arrange(desc(event_points))
Calculating the average points for competitors who ran the 400m in less than 50 seconds vs. those that ran 400m in more than 50 seconds
decathlon %>%
filter(event == "400m_sprint")%>%
group_by(event_points<50) %>%
summarise(average_points = round(mean(overall_competition_points))) %>%
arrange(desc(average_points))
## `summarise()` ungrouping output (override with `.groups` argument)