core-methods-in-edm · YixiongXu · Oct 22, 2020 · YixiongXu · Oct 22, 2020 · YixiongXu
diff --git a/Assignment 3.Rmd b/Assignment 3.Rmd
@@ -103,8 +103,44 @@ plot(g,layout=layout.fruchterman.reingold, vertex.color=VERTEX$gender, edge.widt
 In Part II your task is to [look up](http://igraph.org/r/) in the igraph documentation and modify the graph above so that:
 
 * Ensure that sizing allows for an unobstructed view of the network features (For example, the arrow size is smaller)
+```{r}
+plot(g, layout=layout_with_fr, vertex.size=3,
+     vertex.label.dist=0.6, vertex.color=VERTEX$major, edge.arrow.size=0.3)
+
+```
+
 * The vertices are colored according to major
+```{r}
+g <- sample_gnp(1000, 1/1000)
+comps <- components(g)$membership
+colbar <- rainbow(max(comps)+1)
+V(g)$color <- colbar[comps+1]
+g <- make_tree(10) %>%
+set_vertex_attr("major", value = LETTERS[1:82])
+plot(g, layout=layout_with_fr, vertex.size=5, vertex.color=VERTEX$major, vertex.label=NA)
+
+```
+
 * The vertices are sized according to the number of comments they have recieved
+```{r}
+comments = c("no","yes","NA")
+  for (l in 1:length(comments)) {
+    #if (!is.na(comments[l])) print(comments[l])
+    if (comments[l] != "NA") print(comments[l])
+  }
+g <- make_full_graph(5) %du% make_full_graph(5) %du% make_full_graph(5)
+g <- add_edges(g, c(1,6, 1,11, 6,11))
+com <- cluster_spinglass(g, spins=5)
+V(g)$color <- VERTEX$major+1
+g <- set_graph_attr(g, "layout", layout_with_kk(g))
+shapes <- setdiff(shapes(), "")
+plot(g, vertex.shape=shapes, vertex.label=shapes, vertex.label.dist=1.5)
+color_by_major = c("blue", "orange","green","red")
+g$color <- color_by_major[factor((g)$major)]
+legend("topright", pch=23,col="#99FFFF",pt.bg=color_by_major, legend = c("applied statistics", "cognitive science", NA, "learning analytics", "psychology"))
+#levels(factor(v(g)$major))
+```
+
 
 ## Part III
 
@@ -117,6 +153,112 @@ Once you have done this, also [look up](http://igraph.org/r/) how to generate th
 * Betweeness centrality and dregree centrality. **Who is the most central person in the network according to these two metrics? Write a sentence or two that describes your interpretation of these metrics**
 
 * Color the nodes according to interest. Are there any clusters of interest that correspond to clusters in the network? Write a sentence or two describing your interpetation.
+```{r}
+library(tidyr)
+library(dplyr)
+library(stringr)
+library(igraph)
+
+
+#Input data
+C1 <- read.csv("hudk4050-classes.csv", stringsAsFactors = FALSE, header = TRUE)
+#Copy to play with data?
+C2 <- C1
+```
+
+#Data Tidying
+```{r}
+#Make header first row
+colnames(C2) <- C2[1,]
+#Remove unwanted rows(can choose other?)
+C2 <- slice(C2,3:49)
+#Remove last column
+C2 <- select(C2, 1:8)
+#Merge name columns
+C2 <- unite(C2, "name", 'First Name', 'Last Name', sep = " ")
+#Remove unpredicted characters from names
+C2$name <- str_replace(C2$name, "`", "")
+#Capitalize only first letter
+C2$name <- str_to_title(C2$name)
+#Class letter capital
+C2 <- C2 %>% mutate_at(2:7, list(toupper))
+#Remove whiteness
+C2 <- C2 %>% mutate_at(2:7, str_replace_all, " ", "")
+```
+#Data Restructuring
+```{r}
+C3 <- C2 %>% gather(label, class, 2:7, na.rm = TRUE, convert = FALSE) %>% select(name, class)
+
+#Create a new variable containing 1s that will not become the counts
+C3$count <- 1
+
+#Remove blank classes
+C3 <- filter(C3, class != "")
+
+#Remove duplicates
+C3 <- unique(C3)
+
+#Spread
+C3 <- spread(C3, class, count)
+
+#Make row names student names
+rownames(C3) <- C3$name
+
+
+
+#Shortest:
+#C3[is.na(C3)] <- 0
+
+#Remove names column and HUDK4050
+C3 <- select(C3, -name, -HUDK4050)
+
+#Cheat way:
+C3 <- ifelse(is.na(C3), 0, 1)
+
+
+
+```
+
+#Matrix operations
+```{r}
+C4 <- as.matrix(C3)
+
+#Create person to person matrix
+C4 <- C4 %*% t(C4)
+```
+
+#Graphing
+```{r}
+g <- graph.adjacency(C4, mode="undirected", diag = FALSE)
+plot(g, layout=layout.fruchterman.reingold,
+     vertex.size = 4, 
+     #degree(g)*0.7,
+     vertex.label.cex=0.8,
+     vertex.label.color="blue",
+     vertex.color="black")
+```
+
+#Centrality
+```{r}
+#Calculate the degree centrality of the nodes, showing who has the most connections
+sort(degree(g), decreasing = TRUE)
+
+#Calculate the betweeness centrality
+sort(betweenness(g), decreasing = TRUE)
+
+```
+#Clearly Yifei Zhang has the highest 'betweeness centrality'. 
+
+#Color the nodes according to interest. 
+```{r}
+interest <- as.factor(g$InterestedTopics)
+plot(C4, layout=layout_as_star, vertex.size = 1.5, vertex.label.cex=0.6, vertex.label.color= "lightblue", vertex.color=interest)
+#It was clear that the Clusters were divided into two groups, with some scattered nodes outside.
+```
+
+
+
+
 
 ### To Submit Your Assignment