預測2020東京奧運的贏面最大在美洲還是亞洲 從gdp、註冊率、民主分數和得牌數下去切入 ## 讀取套件及資料
library(readr)
## Warning: package 'readr' was built under R version 3.6.2
pacman::p_load(devtools,dplyr, ggplot2, readr, plotly, googleVis,ggthemes,d3heatmap,magrittr)
#奧運女子一百公尺田徑項目得獎之選手身高、體重點狀分布
athlete_all<- read_csv('/Users/CCCM_3051/Documents/Students/Students/files/asset/athlete_all.csv')
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_double(),
## country = col_character(),
## Name = col_character(),
## Sex = col_character(),
## NOC = col_character(),
## Games = col_character(),
## Season = col_character(),
## City = col_character(),
## Sport = col_character(),
## Event = col_character(),
## Medal = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 42 parsing failures.
## row col expected actual file
## 1166 female_school no trailing characters r '/Users/CCCM_3051/Documents/Students/Students/files/asset/athlete_all.csv'
## 1167 female_school no trailing characters r '/Users/CCCM_3051/Documents/Students/Students/files/asset/athlete_all.csv'
## 1168 female_school no trailing characters r '/Users/CCCM_3051/Documents/Students/Students/files/asset/athlete_all.csv'
## 1169 female_school no trailing characters r '/Users/CCCM_3051/Documents/Students/Students/files/asset/athlete_all.csv'
## 1170 female_school no trailing characters r '/Users/CCCM_3051/Documents/Students/Students/files/asset/athlete_all.csv'
## .... ............. ...................... ...... ..........................................................................
## See problems(...) for more details.
#2
a<-athlete_all[,c(-1,-17:-18,-28)]
#3
a$Medal[is.na(a[,15])==T]=0
a$Medal<-factor(a$Medal,levels = c(0,"Bronze","Silver","Gold"))
a<- na.omit(a)
colnames(a)[1:2]<-c("Team","Year")
#4
#我們設定的亞洲五國以及美洲五國,後續想探討這兩組國家來進行組內比較
asian<-c("China","Korea","Japan","India","Chinese Taipei","North Korea")
america<-c("United States","Mexico","Brazil","Canada","Argentina")
#美洲五國(巴西無資料)時間與性別註冊率折線圖
a1<-a%>%filter(a$Team%in%america)%>%
group_by(Year,Team) %>%
ggplot(aes(Year,female_school,col=Team))+geom_line()+
theme_economist() + scale_color_economist()+geom_point()
ggplotly(a1)
#亞洲五國(台灣與北韓無資料)時間與性別註冊率折線圖
a2<-a%>%filter(a$Team%in%asian)%>%
group_by(Year,Team) %>%
ggplot(aes(Year,female_school ,col=Team))+geom_line()+
theme_economist() + scale_color_economist()+geom_point()
ggplotly(a2)
#美洲民主分數趨勢
a1<-a%>%filter(a$Team%in%america)%>%
group_by(Year,Team) %>%
ggplot(aes(Year,democracy_score ,col=Team))+geom_line()+theme_economist() +
scale_color_economist()+geom_point()
ggplotly(a1)
#亞洲民主分數趨勢
a1<-a%>%filter(a$Team%in%asian) %>%
group_by(Year,Team) %>%
ggplot(aes(Year,democracy_score,col=Team))+geom_line()+
theme_economist() + scale_color_economist()+geom_point()
ggplotly(a1)
#美洲五國奧運人均GDP總得牌數年度變化
a1<-a%>%filter(a$Team%in%america)%>%
group_by(Year,Team) %>%
ggplot(aes(Year,income_GDP ,col=Team))+geom_line()+theme_economist() +
scale_color_economist()+geom_point()
ggplotly(a1)
#亞洲五國奧運人均GDP年度變化
a1<-a%>%filter(a$Team%in%asian)%>%
group_by(Year,Team) %>%
ggplot(aes(Year,income_GDP ,col=Team))+geom_line()+
theme_economist() + scale_color_economist()+geom_point()
ggplotly(a1)
亞洲泡泡圖
a1<-athlete_all %>% na.omit() %>%filter(country%in%asian) %>% group_by(country,year,ce_rate,electricity,female_school,food_suppiy,health_expense,income_GDP,baby_pw,co2_em,child_mortality,inflation,unemployment_15,bc_w_death,democracy_score ) %>% summarise(Age=mean(Age),Height=mean(Height),Weight=mean(Weight))
gvisMotionChart(a1, "country", "year") %>% plot
## starting httpd help server ... done
美洲泡泡圖
a1<-athlete_all %>% na.omit() %>%filter(country%in%america) %>% group_by(country,year,ce_rate,electricity,female_school,food_suppiy,health_expense,income_GDP,baby_pw,co2_em,child_mortality,inflation,unemployment_15,bc_w_death,democracy_score ) %>% summarise(Age=mean(Age),Height=mean(Height),Weight=mean(Weight))
gvisMotionChart(a1, "country", "year") %>% plot
#2
a<-athlete_all[,c(-1,-17:-29)]
#3
a$Medal[is.na(a[,15])==T]=0
a$Medal<-factor(a$Medal,levels = c(0,"Bronze","Silver","Gold"))
a<- na.omit(a)
colnames(a)[1:2]<-c("Team","Year")
#我們設定的亞洲五國以及美洲五國,後續想探討這兩組國家來進行組內比較
asian<-c("China","Korea","Japan","India","Chinese Taipei","North Korea")
america<-c("United States","Mexico","Brazil","Canada","Argentina")
#亞洲五國各國得牌數量比較
a1<-a%>%filter(a$Team%in%asian)%>%
group_by(Team,Medal) %>% summarize(medal=sum(as.double(Medal))) %>%
ggplot(aes(Team,medal,fill=Medal ))+geom_col()+
theme_economist() + scale_color_economist()
ggplotly(a1)
#美洲五國各國得牌數量比較
a1<-a%>%filter(a$Team%in%america)%>%
group_by(Team,Medal) %>% summarize(medal=sum(as.double(Medal))) %>%
ggplot(aes(Team,medal,fill=Medal ))+geom_col()+
theme_economist() + scale_color_economist()
ggplotly(a1)
china=c(532,969,1232)
sum(china)
## [1] 2733
chinese_taipei=c(36,84,0)
sum(chinese_taipei)
## [1] 120
india=c(78,51,160)
sum(india)
## [1] 289
japan=c(690,810,904)
sum(japan)
## [1] 2404
north_korea=c(56,45,60)
sum(north_korea)
## [1] 161
#使用基本向量運算sum(x)
asian_total=c(2733,120,289,2404,161)
sum(asian_total)
## [1] 5707
argentina=c(156,198,288)
sum(argentina)
## [1] 642
brazil=c(332,465,404)
sum(brazil)
## [1] 1201
canada=c(676,1008,1400)
sum(canada)
## [1] 3084
mexico=c(60,66,96)
sum(mexico)
## [1] 222
united_states=c(1876,3780,8300)
sum(united_states)
## [1] 13956
#使用基本向量運算sum(x)
america_total=c(642,1201,3084,222,13956)
sum(america_total)
## [1] 19105
#亞洲五國於"夏季"奧運總得牌數年度變化
a1<-a%>%filter(a$Team%in%asian,a$Medal!=0,a$Season=="Summer")%>%
group_by(Year,Team) %>% summarize(medal=sum(as.double(Medal))) %>%
ggplot(aes(Year,medal ,col=Team))+geom_line()+
theme_economist() + scale_color_economist()+geom_point()
ggplotly(a1)
#美洲五國於"夏季"奧運總得牌數年度變化
a1<-a%>%filter(a$Team%in%america,a$Medal!=0,a$Season=="Summer")%>%
group_by(Year,Team) %>% summarize(medal=sum(as.double(Medal))) %>%
ggplot(aes(Year,medal ,col=Team))+geom_line()+theme_economist() +
scale_color_economist()+geom_point()
ggplotly(a1)
#亞洲五國於"冬季"奧運總得牌數年度變化
a1<-a%>%filter(a$Team%in%asian,a$Medal!=0,a$Season=="Winter")%>%
group_by(Year,Team) %>% summarize(medal=sum(as.double(Medal))) %>%
ggplot(aes(Year,medal ,col=Team))+geom_line()+
theme_economist() + scale_color_economist()+geom_point()
ggplotly(a1)
#美洲五國於"冬季"奧運總得牌數年度變化
a1<-a%>%filter(a$Team%in%america,a$Medal!=0,a$Season=="Winter")%>%
group_by(Year,Team) %>% summarize(medal=sum(as.double(Medal))) %>%
ggplot(aes(Year,medal ,col=Team))+geom_line()+theme_economist() +
scale_color_economist()+geom_point()
ggplotly(a1)
# 1.美洲人均GDP整體來說大多高於亞洲國家2.亞洲各國民主分數變動較平穩,相較於每周除了美國加拿大各國變動較於急遽3.美洲整體的性別註冊率較高,可大致推測美洲國家較重視女性教育發展4.美洲和亞洲的比賽狀況與季節無明顯關聯,其中,美洲國家的運動表現較亞洲國家傑出。這也打破了一些人的迷思,例如:某些人會認為在寒冷國家生長的人可能會在炎熱的情況下表現會有落差。