聽到閃電波特退休的消息,聯想到年紀越來越大容易造成運動選手身體各個層面的衰老,進而導致比賽的失利而無法取得獎牌。使我們想要探討人的生理年齡是否會大幅度的影響得到獎牌的機率。
library(readr)
athlete<-read_csv("../asset/athlete_all.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_double(),
## country = col_character(),
## Name = col_character(),
## Sex = col_character(),
## NOC = col_character(),
## Games = col_character(),
## Season = col_character(),
## City = col_character(),
## Sport = col_character(),
## Event = col_character(),
## Medal = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 42 parsing failures.
## row col expected actual file
## 1166 female_school no trailing characters r '../asset/athlete_all.csv'
## 1167 female_school no trailing characters r '../asset/athlete_all.csv'
## 1168 female_school no trailing characters r '../asset/athlete_all.csv'
## 1169 female_school no trailing characters r '../asset/athlete_all.csv'
## 1170 female_school no trailing characters r '../asset/athlete_all.csv'
## .... ............. ...................... ...... ..........................
## See problems(...) for more details.
pacman::p_load(devtools,dplyr, ggplot2, readr, plotly, googleVis,ggthemes,d3heatmap,magrittr)
summary(athlete)
## X1 country year ID
## Min. : 1 Length:271116 Min. :1896 Min. : 1
## 1st Qu.: 67780 Class :character 1st Qu.:1960 1st Qu.: 34643
## Median :135559 Mode :character Median :1988 Median : 68205
## Mean :135559 Mean :1978 Mean : 68249
## 3rd Qu.:203337 3rd Qu.:2002 3rd Qu.:102097
## Max. :271116 Max. :2016 Max. :135571
##
## Name Sex Age Height
## Length:271116 Length:271116 Min. :10.00 Min. :127.0
## Class :character Class :character 1st Qu.:21.00 1st Qu.:168.0
## Mode :character Mode :character Median :24.00 Median :175.0
## Mean :25.56 Mean :175.3
## 3rd Qu.:28.00 3rd Qu.:183.0
## Max. :97.00 Max. :226.0
## NA's :9474 NA's :60171
## Weight NOC Games Season
## Min. : 25.0 Length:271116 Length:271116 Length:271116
## 1st Qu.: 60.0 Class :character Class :character Class :character
## Median : 70.0 Mode :character Mode :character Mode :character
## Mean : 70.7
## 3rd Qu.: 79.0
## Max. :214.0
## NA's :62875
## City Sport Event Medal
## Length:271116 Length:271116 Length:271116 Length:271116
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## ce_rate electricity female_school food_suppiy
## Min. : 17.00 Min. :3.500e+09 Min. : 15.40 Min. :1440
## 1st Qu.: 47.60 1st Qu.:7.350e+10 1st Qu.: 96.50 1st Qu.:2890
## Median : 51.70 Median :2.260e+11 Median :100.00 Median :3130
## Mean : 55.43 Mean :6.796e+11 Mean : 96.84 Mean :3090
## 3rd Qu.: 58.20 3rd Qu.:6.020e+11 3rd Qu.:103.00 3rd Qu.:3390
## Max. :118.00 Max. :6.140e+12 Max. :130.00 Max. :3810
## NA's :83417 NA's :166832 NA's :131069 NA's :122790
## health_expense income_GDP baby_pw co2_em
## Min. : 1.69 Min. : 263 Min. :1.12 Min. : 0.01
## 1st Qu.: 5.71 1st Qu.: 7390 1st Qu.:1.61 1st Qu.: 3.04
## Median : 7.76 Median : 14800 Median :2.04 Median : 6.48
## Mean : 7.88 Mean : 19366 Mean :2.52 Mean : 7.34
## 3rd Qu.: 9.49 3rd Qu.: 30100 3rd Qu.:2.87 3rd Qu.:10.00
## Max. :20.60 Max. :136000 Max. :8.30 Max. :61.80
## NA's :206269 NA's :45550 NA's :46166 NA's :59753
## child_mortality inflation unemployment_15 bc_w_death
## Min. : 2.10 Min. : -17.10 Min. : 0.14 Min. : 0.53
## 1st Qu.: 7.60 1st Qu.: 1.67 1st Qu.: 4.65 1st Qu.:15.50
## Median : 19.60 Median : 3.92 Median : 6.83 Median :20.20
## Mean : 52.64 Mean : 17.17 Mean : 7.56 Mean :19.48
## 3rd Qu.: 60.60 3rd Qu.: 8.46 3rd Qu.: 9.62 3rd Qu.:23.80
## Max. :536.00 Max. :6040.00 Max. :35.50 Max. :46.60
## NA's :45763 NA's :115730 NA's :154684 NA's :103013
## democracy_score
## Min. :-10.00
## 1st Qu.: 3.00
## Median : 10.00
## Mean : 5.43
## 3rd Qu.: 10.00
## Max. : 10.00
## NA's :79678
a<-athlete[,c(-1)]
a$Medal[is.na(a[,15])==T]=0
a$Medal<-factor(a$Medal,levels = c(0,"Bronze","Silver","Gold"))
colnames(a)[1:2]<-c("Team","Year")
a1<-a%>%
na.omit() %>% #刪掉空值
group_by(Age,Medal) %>% #用年齡、獎牌分組
summarize(medal=sum(as.double(Medal))) %>% #總結獎牌數
ggplot(aes(Age,medal,fill=Medal ))+geom_col()+ #畫圖
theme_economist() + scale_color_economist()
ggplotly(a1)
demo <- a %>%
select(Team, Year, Sex, democracy_score) %>%
na.omit() %>%
group_by(Year) %>%
mutate(nPeople = n()) %>%
filter(Sex == "F") %>%
mutate(nFemale = n()) %>%
mutate(pFemale = nFemale/nPeople) %>%
distinct(Team, Year, democracy_score, pFemale) %>%
ungroup() %>%
group_by(Year) %>%
mutate(mean_demo = mean(democracy_score)) %>%
distinct(Year, pFemale, mean_demo) %>%
arrange(Year)
ggplot(aes(Year, pFemale, mean_demo), data = demo)+
geom_line()+theme_economist() + scale_color_economist()
ggplot(aes(Year, mean_demo), data = demo)+
geom_line()+theme_economist() + scale_color_economist()