library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.8
## ✔ tidyr   0.8.2     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
options(stringsAsFactors = F)

Q1 Joining educational data

Reading education data

read_csv("data/opendata107Y020.csv") %>%
    slice(-1) %>% 
    glimpse()
## Parsed with column specification:
## cols(
##   .default = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 7,760
## Variables: 51
## $ statistic_yyy                               <chr> "107", "107", "107...
## $ district_code                               <chr> "65000010001", "65...
## $ site_id                                     <chr> "新北市板橋區", "新北市板橋區"...
## $ village                                     <chr> "留侯里", "流芳里", "赤松里...
## $ edu_age_15up_total                          <chr> "1431", "1337", "7...
## $ edu_doctor_graduated_m                      <chr> "4", "7", "5", "2"...
## $ edu_doctor_graduated_f                      <chr> "0", "3", "1", "2"...
## $ edu_doctor_ungraduated_m                    <chr> "1", "2", "0", "3"...
## $ edu_doctor_ungraduated_f                    <chr> "2", "2", "0", "0"...
## $ edu_master_graduated_m                      <chr> "52", "48", "31", ...
## $ edu_master_graduated_f                      <chr> "31", "39", "32", ...
## $ edu_master_ungraduated_m                    <chr> "12", "16", "6", "...
## $ edu_master_ungraduated_f                    <chr> "7", "14", "2", "3...
## $ edu_university_graduated_m                  <chr> "150", "151", "98"...
## $ edu_university_graduated_f                  <chr> "167", "198", "99"...
## $ edu_university_ungraduated_m                <chr> "54", "41", "36", ...
## $ edu_university_ungraduated_f                <chr> "48", "38", "19", ...
## $ edu_juniorcollege_2ys_graduated_m           <chr> "38", "36", "8", "...
## $ edu_juniorcollege_2ys_graduated_f           <chr> "34", "46", "20", ...
## $ edu_juniorcollege_2ys_ungraduated_m         <chr> "8", "3", "3", "3"...
## $ edu_juniorcollege_2ys_ungraduated_f         <chr> "4", "3", "2", "4"...
## $ edu_juniorcollege_5ys_final2y_graduated_m   <chr> "51", "46", "25", ...
## $ edu_juniorcollege_5ys_final2y_graduated_f   <chr> "31", "43", "26", ...
## $ edu_juniorcollege_5ys_final2y_ungraduated_m <chr> "0", "2", "1", "1"...
## $ edu_juniorcollege_5ys_final2y_ungraduated_f <chr> "0", "2", "1", "0"...
## $ edu_senior_graduated_m                      <chr> "73", "60", "27", ...
## $ edu_senior_graduated_f                      <chr> "58", "49", "24", ...
## $ edu_senior_ungraduated_m                    <chr> "15", "21", "14", ...
## $ edu_senior_ungraduated_f                    <chr> "18", "19", "11", ...
## $ edu_seniorvocational_graduated_m            <chr> "106", "73", "37",...
## $ edu_seniorvocational_graduated_f            <chr> "145", "109", "51"...
## $ edu_seniorvocational_ungraduated_m          <chr> "19", "15", "8", "...
## $ edu_seniorvocational_ungraduated_f          <chr> "15", "9", "6", "1...
## $ edu_juniorcollege_5ys_first3y_ungraduated_m <chr> "2", "7", "1", "4"...
## $ edu_juniorcollege_5ys_first3y_ungraduated_f <chr> "4", "8", "0", "3"...
## $ edu_junior_graduated_m                      <chr> "40", "29", "22", ...
## $ edu_junior_graduated_f                      <chr> "74", "42", "19", ...
## $ edu_junior_ungraduated_m                    <chr> "2", "5", "2", "5"...
## $ edu_junior_ungraduated_f                    <chr> "5", "10", "4", "4...
## $ edu_juniorvocational_graduated_m            <chr> "12", "20", "0", "...
## $ edu_juniorvocational_graduated_f            <chr> "37", "22", "2", "...
## $ edu_juniorvocational_ungraduated_m          <chr> "0", "0", "0", "0"...
## $ edu_juniorvocational_ungraduated_f          <chr> "0", "0", "0", "0"...
## $ edu_primary_graduated_m                     <chr> "26", "31", "18", ...
## $ edu_primary_graduated_f                     <chr> "70", "59", "33", ...
## $ edu_primary_ungraduated_m                   <chr> "1", "0", "1", "0"...
## $ edu_primary_ungraduated_f                   <chr> "7", "5", "5", "3"...
## $ edu_selftaughtl_m                           <chr> "1", "0", "0", "0"...
## $ edu_selftaughtl_f                           <chr> "2", "0", "0", "1"...
## $ edu_illiterate_m                            <chr> "0", "0", "1", "1"...
## $ edu_illiterate_f                            <chr> "5", "4", "6", "4"...

Town level educational data

  • Original data is village-level data
  • Following In-Class tutorial, using group_by() to create town-level statistical data.

Add code chunks as you need here.

Loading town-level age, marriage, and referendum data

  • Loading town-level data created in class

Joining data together

  • Joining all town-leave data together (Including new educational dada, and age, marriage, referendum data introduced in class)

Q1 Ans: Joined data dimension

  • using dim() to display data dimension (should be 368). TAs will score Q1 according to the outcome.
# dim(YOUR_DATA_FRAME_NAME)

Q1 Ans: glimpse()

  • Using glimpse() to print out data you join below. TAs will score Q1 according to the glimpse() outcome.
# glimpse(YOUR_DATA_FRAME_NAME)

Q2 Open Question - Joining one more dimension

Read the file

Display the data dimension of the file

Add code chunks as you need

Q2 Ans: Answer following questions

  • Data source link (the page you download the data):
  • Describe the data source in your words:

Q2 Ans: Dim() of joined data frame

# dim(YOUR_DATA_FRAME_NAME)

Q2 Ans: glimpse() of joined data frame

# glimpse(YOUR_DATA_FRAME_NAME) 

Q3 Open question - Linear regression

Q3 Ans

# code for print out regression result 

Q4 Challenge: The median of grouped data

Q4 Ans: glimpse()

# glimpse(YOUR_DATA_FRAME_NAME)