From 028ae82b4970a98318c6bea1d706953288d705b4 Mon Sep 17 00:00:00 2001
From: Chuheng Yu <cy2561@tc.columbia.edu>
Date: Mon, 16 Dec 2019 15:26:30 -0500
Subject: [PATCH 1/2] Finish

---
 Assignment6.Rmd | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Assignment6.Rmd b/Assignment6.Rmd
index 8e65135..d9f1919 100644
--- a/Assignment6.Rmd
+++ b/Assignment6.Rmd
@@ -25,7 +25,7 @@ library(rpart)
 #Upload the data sets MOOC1.csv and MOOC2.csv
 M1 <- read.csv("MOOC1.csv", header = TRUE)
 
-M2 <- 
+M2 <- read.csv("MOOC2.csv", header = TRUE)
 
 ```
 
@@ -33,10 +33,10 @@ M2 <-
 ```{r}
 #Using the rpart package generate a classification tree predicting certified from the other variables in the M1 data frame. Which variables should you use?
 
-c.tree1 <- 
+c.tree1 <- rpart(certified~forum.posts+grade+assignment, method = "class", data = M1)
 
 #Check the results from the classifcation tree using the printcp() command
-
+printcp(c.tree1)
 
 
 #Plot your tree

From abf5f9bd42afdb998ae5ffc3faf2ad8701c243ed Mon Sep 17 00:00:00 2001
From: Chuheng Yu <cy2561@tc.columbia.edu>
Date: Mon, 16 Dec 2019 15:29:42 -0500
Subject: [PATCH 2/2] FInish

---
 .gitignore        |   4 ++++
 Assignment6.Rmd   |  28 +++++++++++++++++++++++++++-
 assignment6.Rproj |  13 +++++++++++++
 tree1.ps          | Bin 0 -> 4728 bytes
 4 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 .gitignore
 create mode 100644 assignment6.Rproj
 create mode 100644 tree1.ps

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5b6a065
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
diff --git a/Assignment6.Rmd b/Assignment6.Rmd
index d9f1919..1eaa4b9 100644
--- a/Assignment6.Rmd
+++ b/Assignment6.Rmd
@@ -42,6 +42,8 @@ printcp(c.tree1)
 #Plot your tree
 
 post(c.tree1, file = "tree1.ps", title = "MOOC") #This creates a pdf image of the tree
+rpart.plot::rpart.plot(c.tree1,type=3,box.palette = c("red", "green"), fallen.leaves = TRUE)
+rpart.plot::rpart.plot(c.tree1)
 
 ```
 
@@ -53,10 +55,16 @@ post(c.tree1, file = "tree1.ps", title = "MOOC") #This creates a pdf image of th
 
 ```{r}
 c.tree2 <- prune(c.tree1, cp = )#Set cp to the level at which you want the tree to end
+plotcp(c.tree1)
+printcp(c.tree1)
+rpart.plot::rpart.plot(c.tree1)
+c.tree2 <- prune(c.tree1, cp =0.058182)
 
 #Visualize this tree and compare it to the one you generated earlier
 
 post(c.tree2, file = "tree2.ps", title = "MOOC") #This creates a pdf image of the tree
+rpart.plot::rpart.plot(c.tree2)
+printcp(c.tree2)
 ```
 
 #Now use both the original tree and the pruned tree to make predictions about the the students in the second data set. Which tree has a lower error rate?
@@ -69,7 +77,8 @@ M2$predict2 <- predict(c.tree2, M2, type = "class")
 table(M2$certified, M2$predict1)
 
 table(M2$certified, M2$predict2)
-
+mean(M2$certified==M2$predict1)
+mean(M2$certified==M2$predict2)
 ```
 
 ##Part III
@@ -77,6 +86,23 @@ table(M2$certified, M2$predict2)
 Choose a data file from the (University of Michigan Open Data Set)[https://github.com/bkoester/PLA/tree/master/data]. Choose an outcome variable that you would like to predict. Build two models that predict that outcome from the other variables. The first model should use raw variables, the second should feature select or feature extract variables from the data. Which model is better according to the cross validation metrics?
 
 ```{r}
+D3<- read.csv("student.record.csv",header = TRUE)
+D3<- D3[,c(4:13)]
+D3<- na.omit(D3)
+c.tree3 <- rpart(as.factor(SEX) ~., method = "class", data = D3)
+printcp(c.tree3)
+HSGPA <- D3[,1] 
+SEX <- D3[, 10]
+ACT <- D3[, 2:6]
+ACT$ACTscore <- rowSums(ACT)
+SAT <- D3[, 7:9]
+SAT$SATscore <- rowSums(SAT)
+D4 <- cbind(HSGPA, ACT, SAT, SEX)
+c.tree4 <- rpart(as.factor(SEX) ~ ACTscore + SATscore + HSGPA, method = "class", data = D4)
+printcp(c.tree4)
+D4$predict1 <- predict(c.tree3, D4 , type = "class")
+table(D4$SEX, D4$predict1)
+sum(diag(table(D4$SEX, D4$predict1)))/sum(table(D4$SEX, D4$predict1))
 
 ```
 
diff --git a/assignment6.Rproj b/assignment6.Rproj
new file mode 100644
index 0000000..8e3c2eb
--- /dev/null
+++ b/assignment6.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
diff --git a/tree1.ps b/tree1.ps
new file mode 100644
index 0000000000000000000000000000000000000000..1a36db213ca45bef6f87aad75fa66477964dbad7
GIT binary patch
literal 4728
zcmd5=-*4MS41U*N!94_UFyyQ|NtP6J1)4P70yIs~6xc)22U)r}+GI(S<Tizu|9u}%
zKO#9!))g4g*0sbVdE`g(cyteby1G7kl@v>T<Oth3ICx#eo3+lHcUmVpd9UkYQ^mSI
zrkf&f=r^5x)=e5mko<XP>FA`$l6~u!%PjqCqxZ2`<q`Y(>v~yaBVR6bl19fAx#Wc&
z1+FBUW{3Dd=0n<K`k3C+uT@murg2S|o2J|}V4YSvYKn@P*TqfqC92qmt7ye=W>KT_
zgm<{CQs9l6w8)R?ZImZ<9F;72Qf%@h%~vPI-7(34Kc4SXpj<G|^5nEwbEIk()vGkW
zs*0>wS@UH{w53%|_4{8>*kxhc@VKd>yv|^jLYt}r7BvgqQ=)II16uMHB+6Qp-9BKN
zeWK4AQp+??jOct-ne|H7KsRM+W4gXAzLbDTki=)KyC&9M7sz5UVPw2sWof?4$jz3y
z#*WSHL?t$yI4f%Xh<9V`+@x7HQ7O!F-BiV&Q^#Y&=&jh~ENqIZc%ls0@X;lO=waUu
zgrvAAzmh{ovR&7A@h!q_9z7rgyn_a*^(Ncp?*~yzHn_*zQZZvyb7qz+GB8(tK6()2
zu$Rs=3i}K!Ci1%Y1{40XDtnAHCY~N0O#|A(Y75kY2>+3U>eF`|3mk|qX@XKHi?U~P
zM034DRI4s4T!N^*Jtf*i?5F#jRWjP+_%%ybQbUe+42?x&Q^|wr7EPZMr^C@dhZ-{Q
zdN^>dtEH}J+G6U1&@Hx)Ugx2L;w&u@jmPJuS}nPpbc2@|tzvpIaoCv~N0~OQ7INk}
z!gfGtSr8jH3DaY?^pWPL=VvETt&>+Lr@d2<eer_kuXS7``t8NL(K=WJtt};+l5Al+
zp$aI|yAno>zqV3|P&rCcl77B_S`1rx9z{t)(!4>MlyDKjlAcgYZ**4gvD_nvgK%=!
z<frMv{31ea-T@?iw1q2`>-oT?q+0k61C*iTxKo`09A0eUc)sT^@Zw8PdI~R6hB8#o
z>Bzu#9kyfpO8OpNLeF+=x$E5Tw`rrViWFBUyn~F#;o*NlXitP+zI?FviHpTlN?k_r
zv|-jmH9ab&gM;%t&NhkeZ|_H!_k6g=(Yx}5>&)Uh$aKEK$xhO^Y40QWvhB-${^tBO
z>YBE=ukLA!<IgSNp4#-?V;ZQUEQ7}UEJwYePtf`G<y%~=xg1vbt|N*!RfU_A?SMj2
zTk|*YLAK`%GPs6J@pF&)S2*q9@qo$DKNK+aoxu9hXoRg>`k~-C-wAI#{f!`wmVYRi
z+{7?id}DcbQ|!Eb{vSY|I7QwQZen8XCe(Ha#!O4nT9%d!ctCBx1LcSy<T+R1K8Q2R
z4TN&3{q{pgEIjfciZw91Q``3tl8S1JXI-1PKUuCqvxD|R3bmlw--jg7BdM{_WIb+f
ze21E8!ut4$m2{It^{pj)9wzYz;t%vsOQP2r!8JN6hB`c~972vGoCQf+iVzc?r5x_b
z7$X&XoLN$03dcQw*N$~G@Cr-WxG8K>fiEz3&TbCb$;djwrNDGi9a6J|(`ffJ+pI-d
z)J^?@{7`tz!JuP#t|MHZln;aITeo=m%Xo`9M#(KF1%FL@Z+{v99U>TtK;hvEZvlAl
zOu-lv858gegknMpNAZp8w?u$>SvVmRcru6Yc_#b-gyi7hr=L=|j6x%Z95aC}4JX78
zAF+^3xI%>>T*EP*NFj{}uD}l?<hm0>&TX+U9w6*|r?xsCu2c}x;oU&k1mAlKa-jOn
zu?47h53@U{etVz>%-%A50@dm{0`7up{WVBH^-us3REyH!M-<usn9zV~V*?GIPrI<0
zfP!ixYGMtl4Yo-GQyhhX;Fu0_jCHcbWcP{T!6z3wCMUhS0}>AK1f<Cc#7v4!PC%H@
z3{m5<aQPf^K-p1~lRnmyG>j88&YV&2wEJAQ+XDGu&s2wT#JJd{p|7Xb^@l?1E0DpC
zOVh-s3;wv6`zq&v_T|r!#?R2w;T}M8;bTT6jBP@OI3*}A$Akpch69A|#FdL5m)+un
zrjOHWqqhH>rYj9Z*LOLtv#$?rL!lgJy6&69&<Ei+1eNd2eTCN1b;G`SVZ=sno8<-V
zvck48DPPX*fwu?^PT8qFZQi#7;W^Z8;!bTp*ZfU?SHs;!(%swNJr}ujXm)XVc{+Hf
Xd2;(uMQNrhymkNWBJp~5`NsMOxKJ5!

literal 0
HcmV?d00001