Decathlon data set comes from FactoMineR package and represents two competitions: Decastar and Olympic Games.
Department of statistics and computer science, Agrocampus Rennes
library(readr)
library(here)
library(janitor)
library(tidyverse)
library(data.table)
decathlon <-
here("clean_data/clean_data.rds") %>%
read_rds()
"raw_data/decathlon.rds" %>%
here() %>%
read_rds() %>%
data.table()
100m <dbl> | Long.jump <dbl> | Shot.put <dbl> | High.jump <dbl> | 400m <dbl> | 110m.hurdle <dbl> | Discus <dbl> | Pole.vault <dbl> | |
---|---|---|---|---|---|---|---|---|
11.04 | 7.58 | 14.83 | 2.07 | 49.81 | 14.69 | 43.75 | 5.02 | |
10.76 | 7.40 | 14.26 | 1.86 | 49.37 | 14.05 | 50.72 | 4.92 | |
11.02 | 7.30 | 14.77 | 2.04 | 48.37 | 14.09 | 48.95 | 4.92 | |
11.02 | 7.23 | 14.25 | 1.92 | 48.93 | 14.99 | 40.87 | 5.32 | |
11.34 | 7.09 | 15.19 | 2.10 | 50.42 | 15.31 | 46.26 | 4.72 | |
11.11 | 7.60 | 14.31 | 1.98 | 48.68 | 14.23 | 41.10 | 4.92 | |
11.13 | 7.30 | 13.48 | 2.01 | 48.62 | 14.17 | 45.67 | 4.42 | |
10.83 | 7.31 | 13.76 | 2.13 | 49.91 | 14.38 | 44.41 | 4.42 | |
11.64 | 6.81 | 14.57 | 1.95 | 50.14 | 14.93 | 47.60 | 4.92 | |
11.37 | 7.56 | 14.41 | 1.86 | 51.10 | 15.06 | 44.99 | 4.82 |
decathlon %>%
data.table()
competitor <chr> | ranking <int> | overall_competition_points <int> | competition <fctr> | event <chr> | |
---|---|---|---|---|---|
SEBRLE | 1 | 8217 | Decastar | 100m_sprint | |
SEBRLE | 1 | 8217 | Decastar | long_jump | |
SEBRLE | 1 | 8217 | Decastar | shot_put | |
SEBRLE | 1 | 8217 | Decastar | high_jump | |
SEBRLE | 1 | 8217 | Decastar | 400m_sprint | |
SEBRLE | 1 | 8217 | Decastar | 110m_hurdles | |
SEBRLE | 1 | 8217 | Decastar | discus | |
SEBRLE | 1 | 8217 | Decastar | pole_vault | |
SEBRLE | 1 | 8217 | Decastar | javlin | |
SEBRLE | 1 | 8217 | Decastar | 1500m_race |
Finding the longest long jump in the data
decathlon %>%
group_by(event) %>%
filter(event == "long_jump",
event_points == max(event_points)) %>%
select(-ranking:-overall_competition_points)
competitor <chr> | competition <fctr> | event <chr> | event_points <dbl> | |
---|---|---|---|---|
Clay | OlympicG | long_jump | 7.96 |
Finding the average 100m time for each competition
decathlon %>%
filter(event == "100m_sprint") %>%
group_by(competition) %>%
summarise(average_100m_time = round(mean(event_points), 2))
competition <fctr> | average_100m_time <dbl> | |||
---|---|---|---|---|
Decastar | 11.18 | |||
OlympicG | 10.92 |
Finding the competitor with the highest total points across both competitions
decathlon %>%
group_by(competitor) %>%
summarise(total_competition_points = sum(overall_competition_points)) %>%
filter(total_competition_points == max(total_competition_points)) %>%
head(3)
## `summarise()` ungrouping output (override with `.groups` argument)
competitor <chr> | total_competition_points <int> | |||
---|---|---|---|---|
Sebrle | 88930 |
Finding the shot-put scores for the top three competitors in each competition
decathlon %>%
select(-overall_competition_points, -ranking) %>%
filter(event == "shot_put")%>%
group_by(competition) %>%
top_n(3, event_points) %>%
arrange(desc(event_points))
competitor <chr> | competition <fctr> | event <chr> | event_points <dbl> | |
---|---|---|---|---|
Sebrle | OlympicG | shot_put | 16.36 | |
Karpov | OlympicG | shot_put | 15.93 | |
Macey | OlympicG | shot_put | 15.73 | |
YURKOV | Decastar | shot_put | 15.19 | |
SEBRLE | Decastar | shot_put | 14.83 | |
KARPOV | Decastar | shot_put | 14.77 |
Calculating the average points for competitors who ran the 400m in less than 50 seconds vs. those that ran 400m in more than 50 seconds
decathlon %>%
filter(event == "400m_sprint")%>%
group_by(event_points<50) %>%
summarise(average_points = round(mean(overall_competition_points))) %>%
arrange(desc(average_points))
## `summarise()` ungrouping output (override with `.groups` argument)
event_points < 50 <lgl> | average_points <dbl> | |||
---|---|---|---|---|
TRUE | 8120 | |||
FALSE | 7727 |