title: “project” author: “Team 2”

第二組研究報告

洪怡伶 郭庭妤 陳律均 吳毓珊 林俐菱 洪浩庭

研究問題:

關於亞洲和美洲運動項目跟變數的關聯及影響

研究動機: 從變數中找到跟當洲季節與運動項目的各個舉例

導入使用工具
#rm(list=ls())
pacman::p_load(devtools,dplyr, ggplot2, readr, plotly, googleVis,ggthemes,d3heatmap,magrittr)
將我們所需要的資料匯入
  • 透過先前所教的read_csv來將我們的資料導入
  • 為了單純化數據資料,我將資料中特定欄位去除
  • 我將獎牌欄位NA設置為零,用以更容易整合獎牌資料,再者重新命名欄位資料
  • 我想透過視覺化來研究「亞洲地區五國」、「美洲地區五國」以及「歷年總得排最高前五國」
#setwd("~/camp")
#1. 
athlete_all<- read_csv('../asset/athlete_all.csv')
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   country = col_character(),
##   Name = col_character(),
##   Sex = col_character(),
##   NOC = col_character(),
##   Games = col_character(),
##   Season = col_character(),
##   City = col_character(),
##   Sport = col_character(),
##   Event = col_character(),
##   Medal = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 42 parsing failures.
##  row           col               expected actual                       file
## 1166 female_school no trailing characters      r '../asset/athlete_all.csv'
## 1167 female_school no trailing characters      r '../asset/athlete_all.csv'
## 1168 female_school no trailing characters      r '../asset/athlete_all.csv'
## 1169 female_school no trailing characters      r '../asset/athlete_all.csv'
## 1170 female_school no trailing characters      r '../asset/athlete_all.csv'
## .... ............. ...................... ...... ..........................
## See problems(...) for more details.
#2
a<-athlete_all[,c(-1)]
#3
a$Medal[is.na(a[,15])==T]=0
a$Medal<-factor(a$Medal,levels = c(0,"Bronze","Silver","Gold"))
a<- na.omit(a) 
colnames(a)[1:2]<-c("Team","Year")
#4
#我設定的亞洲五國以及美洲五國,後續想探討這兩組國家來進行組內比較
asian<-c("China","Korea","Japan","India","Chinese Taipei","North Korea")
america<-c("United States","Mexico","Brazil","Canada","Argentina")
#篩選出歷年拿牌最多的6個國家
top<-a %>% group_by(Team) %>% summarize(medal=sum(as.double(Medal))) %>% arrange(medal)%>% tail()
boxplot (盒鬚圖)
#ggplot版
#亞洲五國的身高分布
a1<-a%>%filter(Team%in%asian, Season == "Winter") %>%  
  ggplot(aes(Medal,food_suppiy))+geom_boxplot(fill="#007799",col="black")+
  theme_economist() + scale_color_economist()
ggplotly(a1)
#美洲五國的身高分布
a1<-a%>%filter(Team%in%america, Season == "Winter") %>%  
  ggplot(aes(Medal,food_suppiy))+geom_boxplot(fill="#007799",col="black")+
  theme_economist() + scale_color_economist()
ggplotly(a1)
#ggplot版
#亞洲五國的身高分布
a1<-a%>%filter(Team%in%asian, Season == "Winter") %>%  
  ggplot(aes(Medal,ce_rate))+geom_boxplot(fill="#007799",col="black")+
  theme_economist() + scale_color_economist()
ggplotly(a1)
#美洲五國的身高分布
a1<-a%>%filter(Team%in%america, Season == "Winter") %>%  
  ggplot(aes(Medal,ce_rate))+geom_boxplot(fill="#007799",col="black")+
  theme_economist() + scale_color_economist()
ggplotly(a1)
#ggplot版
#亞洲五國的身高分布
a1<-a%>%filter(Team%in%asian, Season == "Winter") %>%  
  ggplot(aes(Medal,electricity))+geom_boxplot(fill="#007799",col="black")+
  theme_economist() + scale_color_economist()
ggplotly(a1)
#美洲五國的身高分布
a1<-a%>%filter(Team%in%america, Season == "Winter") %>%  
  ggplot(aes(Medal,electricity))+geom_boxplot(fill="#007799",col="black")+
  theme_economist() + scale_color_economist()
ggplotly(a1)
#ggplot版
#亞洲五國的身高分布
a1<-a%>%filter(Team%in%asian, Season == "Winter") %>%  
  ggplot(aes(Medal,female_school))+geom_boxplot(fill="#007799",col="black")+
  theme_economist() + scale_color_economist()
ggplotly(a1)
#美洲五國的身高分布
a1<-a%>%filter(Team%in%america, Season == "Winter") %>%  
  ggplot(aes(Medal,female_school))+geom_boxplot(fill="#007799",col="black")+
  theme_economist() + scale_color_economist()
ggplotly(a1)


#ggplot版
#亞洲五國的身高分布
a1<-a%>%filter(Team%in%asian, Season == "Winter") %>%  
  ggplot(aes(Medal,income_GDP))+geom_boxplot(fill="#007799",col="black")+
  theme_economist() + scale_color_economist()
ggplotly(a1)
#美洲五國的身高分布
a1<-a%>%filter(Team%in%america, Season == "Winter") %>%  
  ggplot(aes(Medal,income_GDP))+geom_boxplot(fill="#007799",col="black")+
  theme_economist() + scale_color_economist()
ggplotly(a1)