Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 58 additions & 22 deletions sql-project.Rmd
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: "sql-workshop"
author: "Charles Lang"
author: "DANNY SHAN"
output: html_document
---

Expand All @@ -16,7 +16,7 @@ library(RMySQL)
db_user <- 'admin'
db_password <- 'testsql!'
db_name <- 'oudb'
db_host <- 'PASTE YOUR ENDPOINT HERE'
db_host <- 'database-1.cgssnso4vlwh.us-east-2.rds.amazonaws.com'
db_port <- 3306

mydb <- dbConnect(MySQL(), user = db_user, password = db_password, dbname = db_name, host = db_host, port = db_port)
Expand All @@ -26,6 +26,7 @@ summary(mydb)

## Load OU Data
```{r}
library(dplyr)
#Student demographic data
studentInfo <- read.csv("studentInfo.csv", header = TRUE)
#Student assessment data
Expand All @@ -51,12 +52,17 @@ dbListTables(mydb)

#Read a particular table
dbReadTable(mydb, 'studentInfo')

```
```{r}
#EXERCISE 1
#Make two toy data sets with at least three variables and at least 30 rows each in them. Have a mix of numeric and character variables. Transfer these dataframes to your SQL database using the DBI commands. Name the tables whatever you like.

t1 <- select(studentInfo, "id_student", "gender", "region")
t2 <- select(studentInfo, "code_module", "code_presentation", "age_band")
dbWriteTable(mydb, "t1", t1)
dbWriteTable(mydb, "t2", t2)
dbListTables(mydb)
```

## Getting into SQL - READING
```{r}
#Query a portion of the database (always returns dataframe)
Expand All @@ -79,12 +85,13 @@ dbGetQuery(mydb, "SELECT COUNT(score) FROM studentAssessment WHERE score > 50;")

#Using an AND statement
dbGetQuery(mydb, "SELECT COUNT(*) FROM studentAssessment WHERE score > 50 AND id_assessment = '1752';")

```
```{R}
#EXERCISE 2
#Read one of your toy data tables, make sure the output is ordered in descending order, you rename one of the variables and the output is limited to the first 20 rows.

dbGetQuery(mydb, "SELECT id_student AS id, gender, region FROM t1 ORDER BY id_student DESC LIMIT 20;")
#Read the other table according to a condition of one of the variables.

dbGetQuery(mydb, "SELECT code_module, code_presentation, age_band FROM t2 WHERE code_module = 'AAA';")
```

## Getting into SQL - UPDATING
Expand Down Expand Up @@ -119,11 +126,16 @@ dbGetQuery(mydb, "UPDATE studentAssessment SET score = 'NULL' WHERE id_student =
dbGetQuery(mydb, "DELETE FROM studentAssessment WHERE id_student = 1;")

dbGetQuery(mydb, "SELECT * FROM studentAssessment ORDER BY id_student LIMIT 10;")

```
```{R}
#EXERCISE 3
#Insert a new row in one of your toy data tables leaving one variable empty. Change one value in your other table. Display your new tables. Delete the row you edited and the row you inserted.


dbGetQuery(mydb, "INSERT INTO t2 (code_module, code_presentation) VALUES ('a', 'b');")
dbGetQuery(mydb, "UPDATE t1 SET gender = 'F' WHERE id_student = '11391';")
dbReadTable(mydb, 't1')
dbReadTable(mydb, 't2')
dbGetQuery(mydb, "DELETE FROM t2 WHERE code_module = 'a' AND code_presentation = 'b';")
dbGetQuery(mydb, "DELETE FROM t1 WHERE id_student = '11391';")
```

## Add/Deleting Table
Expand Down Expand Up @@ -155,10 +167,14 @@ dbGetQuery(mydb, "SELECT * FROM test;") #This should produce an error since your

#Delete a table if it exists
dbGetQuery(mydb, "DROP TABLE IF EXISTS test;") #No error since it is only if it exists

```
```{R}
#EXERCISE 4
#Create a table that is exactly the same as your first toy data table but this time use SQL commands. Display your new table. Then delete the original table.

dbGetQuery(mydb, "CREATE TABLE toy1 (id_student INTEGER, gender TEXT, region TEXT);")
dbGetQuery(mydb, "INSERT INTO toy1 (id_student, gender, region) SELECT id_student, gender, region FROM t1;")
dbGetQuery(mydb, "SELECT * FROM toy1;")
dbGetQuery(mydb, "DROP TABLE t1;")
```

# NULL Value
Expand Down Expand Up @@ -208,10 +224,16 @@ dbGetQuery(mydb,"INSERT INTO test2 (score, student) VALUES ('1', 'A');")
#NULL is exempt
dbGetQuery(mydb,"INSERT INTO test2 (score, student) VALUES (NULL, 'A');")
dbGetQuery(mydb,"INSERT INTO test2 (score, student) VALUES (NULL, 'A');")

```
```{R}
#EXERCISE 5
#Recreate one of your toy data tables with the constraint that for one of the integer variablesthe default value will be zero. Test your table by inserting some empty values. Display your new tables. Then delete your table.

dbGetQuery(mydb,"CREATE TABLE t3 (id INTEGER DEFAULT 0, gender TEXT, region TEXT);")
dbGetQuery(mydb,"INSERT INTO t3 (id, gender, region) SELECT id_student, gender, region FROM toy1;")
dbGetQuery(mydb,"INSERT INTO t3 (id, gender) VALUES (10131, 'F');")
dbGetQuery(mydb,"INSERT INTO t3 (gender, region) VALUES ('M', 'Wales');")
dbGetQuery(mydb, "SELECT * FROM t3;")
dbGetQuery(mydb,"DROP TABLE t3;")
```


Expand All @@ -224,9 +246,13 @@ dbGetQuery(mydb, "SELECT * FROM studentAssessment LIMIT 10;")

#Delete a column
dbGetQuery(mydb, "ALTER TABLE studentAssessment DROP COLUMN email;")

```
```{R}
#EXERCISE 6
#Add a column to one of your toy data tables with a default value of 3. Display your new table. Delete this column.
dbGetQuery(mydb, "ALTER TABLE toy1 ADD number INTEGER DEFAULT 3;")
dbGetQuery(mydb, "SELECT * FROM toy1 LIMIT 10;")
dbGetQuery(mydb, "ALTER TABLE toy1 DROP COLUMN number;")
```


Expand All @@ -244,10 +270,14 @@ dbGetQuery(mydb,"INSERT INTO test3 (score, student) VALUES (5, 'B');")
dbGetQuery(mydb, "SELECT * FROM test3;")

dbGetQuery(mydb, "DROP TABLE IF EXISTS test3;")

```
```{R}
#EXERCISE 7
#Create a new table with four variables and a primary key that is a sequential id value.

dbGetQuery(mydb, "CREATE TABLE t4 (id INTEGER AUTO_INCREMENT PRIMARY KEY, gender TEXT, score INTEGER, age INTEGER);")
dbGetQuery(mydb, "INSERT INTO t4 (gender, score, age) VALUES ('M', 80, 20);")
dbGetQuery(mydb, "INSERT INTO t4 (gender, score, age) VALUES ('F', 90, 21);")
dbGetQuery(mydb, "SELECT * FROM t4;")
```

## Filtering (WHERE)
Expand All @@ -274,10 +304,12 @@ dbGetQuery(mydb, "SELECT id_student, gender, region FROM studentInfo WHERE regio

#IN
dbGetQuery(mydb, "SELECT id_student, gender, region FROM studentInfo WHERE region IN ('Wales','Ireland');")

```
```{R}
#EXERCISE 8
#Query one of your original toy data tables, for two different conditions.

dbGetQuery(mydb, "SELECT age_band FROM t2 WHERE code_module LIKE 'A%';")
dbGetQuery(mydb, "SELECT code_presentation FROM t2 WHERE age_band = '0-35';")
```

## Removing Duplicates
Expand All @@ -288,7 +320,8 @@ dbGetQuery(mydb, "SELECT DISTINCT region, gender FROM studentInfo;")

#EXERCISE 9
#Insert a duplicate row into one of your toy data tables. Then query the table without including duplicates.

dbGetQuery(mydb, "INSERT INTO toy1 (id_student, gender, region) SELECT id_student, gender, region FROM toy1 WHERE id_student = 11391;")
dbGetQuery(mydb, "SELECT DISTINCT id_student, gender, region FROM toy1;")
```

## Conditional Expressions (non-standard)
Expand Down Expand Up @@ -356,10 +389,13 @@ dbGetQuery(mydb, "SELECT * FROM left_table
UNION
SELECT * FROM right_table;")


```
```{R}
#EXERCISE 10
# Create a common id variable in your two toy data tables. Then join those tables so that your query returns all the values from one table and only those that match from the other.

dbGetQuery(mydb, "ALTER TABLE toy1 ADD id INTEGER AUTO_INCREMENT PRIMARY KEY;")
dbGetQuery(mydb, "ALTER TABLE t2 ADD id INTEGER AUTO_INCREMENT PRIMARY KEY;")
dbGetQuery(mydb, "SELECT * FROM toy1 JOIN t2 ON toy1.id = t2.id;")
```
```{r}
#Now disconnect from your database
Expand Down