Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions amsterdam-house.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
,id,neighbourhood,latitude,longitude,room_type,price,days_occupied_in_2018,minimum_nights,number_of_reviews,reviews_per_month,availability_2019
0,2818,Oostelijk Havengebied - Indische Buurt,52.36575451,4.941419235,Private room,59,296,3,248,2.1,44
1,3209,Westerpark,52.39022505,4.873924095,Entire home/apt,160,309,4,42,1.03,47
2,20168,Centrum-Oost,52.36508703,4.893541008,Entire home/apt,80,224,1,233,2.18,198
3,25428,Centrum-West,52.3731144,4.883668196,Entire home/apt,125,208,14,1,0.09,141
4,27886,Centrum-West,52.38672732,4.89207807,Private room,150,318,2,171,2.03,199
5,28658,Bos en Lommer,52.37534218,4.85728935,Private room,65,64,3,434,4.16,295
6,28871,Centrum-West,52.36718681,4.890917797,Private room,75,184,2,215,2.13,137
7,29051,Centrum-West,52.36772541,4.891511737,Private room,55,131,2,383,4.07,188
8,31080,Zuid,52.35132054,4.848382899,Entire home/apt,219,,3,32,0.36,336
9,41125,Centrum-West,52.37891477,4.883205056,Entire home/apt,180,349,3,76,0.78,11
10,42970,Centrum-West,52.36781448,4.890012023,Private room,159,55,3,426,4.24,89
11,43980,Zuid,52.35745628,4.861242085,Entire home/apt,100,80,28,61,0.62,10
12,44129,Centrum-West,52.38071123,4.886104318,Entire home/apt,250,278,2,176,1.74,16
13,44391,Centrum-Oost,52.37016999,4.914377827,Entire home/apt,200,,3,31,0.31,0
14,45246,Westerpark,52.3776764,4.876178649,Entire home/apt,155,,3,79,0.79,253
15,46386,De Pijp - Rivierenbuurt,52.35247026,4.90825444,Entire home/apt,150,,3,3,0.03,0
16,47061,De Baarsjes - Oud-West,52.3679861,4.874469332,Entire home/apt,140,,2,167,1.67,0
17,48076,Centrum-West,52.38042002,4.894534735,Entire home/apt,350,92,5,159,1.82,276
18,49790,De Baarsjes - Oud-West,52.36266379,4.86103174,Entire home/apt,225,87,3,98,1.06,296
19,50515,Bos en Lommer,52.3772724,4.839252563,Entire home/apt,120,208,3,12,0.21,79
20,50518,Westerpark,52.38200613,4.878649621,Entire home/apt,125,,1,91,1.22,0
21,50523,Centrum-West,52.3684079,4.884133705,Private room,115,126,2,213,2.21,206
22,50570,Bos en Lommer,52.37773744,4.848911991,Entire home/apt,90,354,4,152,1.62,19
23,52490,Oostelijk Havengebied - Indische Buurt,52.37004589,4.938691236,Private room,72,337,3,81,0.82,11
24,53067,De Pijp - Rivierenbuurt,52.35339267,4.900637643,Private room,87,295,1,333,3.4,2
25,53671,Westerpark,52.38905337,4.885588535,Private room,75,31,3,261,2.8,289
26,53692,De Pijp - Rivierenbuurt,52.35348316,4.900490172,Private room,60,,3,219,2.4,47
27,55256,Centrum-Oost,52.37125689,4.903513145,Private room,86,352,1,120,1.26,129
28,55703,Bos en Lommer,52.37560816,4.858187468,Entire home/apt,250,1,3,3,0.07,0
29,55709,Centrum-Oost,52.3589565,4.897259341,Entire home/apt,159,355,5,53,0.54,32
30,55807,De Baarsjes - Oud-West,52.36965708,4.862025427,Private room,60,28,2,150,1.53,122
30 changes: 30 additions & 0 deletions sanfrancisco.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
id,neighbourhood,room_type,price,days_occupied_in_2018,minimum_nights,number_of_reviews,reviews_per_month,availability_2019
958,Western Addition,Entire home/apt,170,213,1,172,1.51,74
5858,Bernal Heights,Entire home/apt,235,0,30,112,0.96,365
7918,Haight Ashbury,Private room,65,0,32,17,0.15,365
8142,Haight Ashbury,Private room,65,0,32,8,0.15,365
8339,Western Addition,Entire home/apt,785,276,7,27,0.24,89
8567,Western Addition,Entire home/apt,255,206,2,31,0.27,20
8739,Mission,Private room,139,284,1,631,5.54,129
9225,Potrero Hill,Private room,135,26,1,434,3.91,342
10251,Mission,Entire home/apt,265,18,3,307,2.73,303
10578,Nob Hill,Entire home/apt,120,,30,18,0.21,365
10819,Marina,Entire home/apt,218,,30,22,0.32,0
10820,Haight Ashbury,Entire home/apt,177,,30,36,0.32,288
10824,Western Addition,Entire home/apt,194,,30,14,0.17,347
10832,Downtown/Civic Center,Entire home/apt,139,0,30,18,0.25,356
12041,Haight Ashbury,Private room,85,0,32,6,0.07,365
12042,Haight Ashbury,Private room,85,0,32,5,0.07,365
12522,Castro/Upper Market,Private room,79,325,3,383,3.54,23
12584,Inner Sunset,Entire home/apt,136,,30,16,0.21,365
14125,Mission,Entire home/apt,215,28,3,97,0.9,278
17132,South of Market,Entire home/apt,450,,35,14,0.14,219
18231,Noe Valley,Entire home/apt,107,,30,60,0.58,129
18904,Western Addition,Private room,110,288,3,350,4.1,74
19040,Mission,Entire home/apt,198,,5,226,2.12,4
21334,Nob Hill,Entire home/apt,125,302,30,118,1.13,325
21914,Haight Ashbury,Private room,65,0,32,14,0.16,365
23540,Mission,Entire home/apt,225,57,1,107,1.01,0
23611,Bernal Heights,Private room,100,12,3,232,2.21,364
23630,Castro/Upper Market,Entire home/apt,155,,3,340,3.29,235
24390,Mission,Entire home/apt,95,,30,65,0.67,135
164 changes: 156 additions & 8 deletions sql-project.Rmd
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: "sql-workshop"
author: "Charles Lang"
author: "Yifei Zhang"
output: html_document
---

Expand All @@ -16,7 +16,7 @@ library(RMySQL)
db_user <- 'admin'
db_password <- 'testsql!'
db_name <- 'oudb'
db_host <- 'PASTE YOUR ENDPOINT HERE'
db_host <- 'database-3.cvsve6xftlcb.us-east-2.rds.amazonaws.com'
db_port <- 3306

mydb <- dbConnect(MySQL(), user = db_user, password = db_password, dbname = db_name, host = db_host, port = db_port)
Expand All @@ -33,6 +33,10 @@ studentAssessment <- read.csv("studentAssessment.csv", header = TRUE)
#Course data
courses <- read.csv("courses.csv", header = TRUE)
studentRegistration <- read.csv("studentRegistration.csv", header = TRUE)

##Load toy dataset
sanfrancisco <-read.csv("sanfrancisco.csv",header=TRUE)
amsterdam <- read.csv("amsterdam-house.csv",header=TRUE)
```

## Write data to the DB using the DBI package
Expand All @@ -52,9 +56,14 @@ dbListTables(mydb)
#Read a particular table
dbReadTable(mydb, 'studentInfo')

```


```{r}
#EXERCISE 1
#Make two toy data sets with at least three variables and at least 30 rows each in them. Have a mix of numeric and character variables. Transfer these dataframes to your SQL database using the DBI commands. Name the tables whatever you like.

dbWriteTable(mydb, "sanfrancisco", sanfrancisco)
dbWriteTable(mydb, "amsterdam", amsterdam)
```

## Getting into SQL - READING
Expand All @@ -80,10 +89,16 @@ dbGetQuery(mydb, "SELECT COUNT(score) FROM studentAssessment WHERE score > 50;")
#Using an AND statement
dbGetQuery(mydb, "SELECT COUNT(*) FROM studentAssessment WHERE score > 50 AND id_assessment = '1752';")


```

```{r}
#EXERCISE 2
#Read one of your toy data tables, make sure the output is ordered in descending order, you rename one of the variables and the output is limited to the first 20 rows.

dbGetQuery(mydb,"SELECT availability_2019 AS availability_in_2019 FROM amsterdam ORDER BY availability_2019 DESC LIMIT 20")

#Read the other table according to a condition of one of the variables.
dbGetQuery(mydb, "SELECT * FROM sanfrancisco WHERE price > 500;")

```

Expand All @@ -101,7 +116,7 @@ dbGetQuery(mydb, "SELECT COUNT(*) FROM studentAssessment;")
#View inserted row
dbGetQuery(mydb, "SELECT * FROM studentAssessment ORDER BY id_student LIMIT 10;")

#Add a row with missing values
#Add a row with missing values ????
dbGetQuery(mydb, "INSERT INTO studentAssessment (id_assessment, id_student, date_submitted) VALUES ('00001', '1', '20');")

#View inserted row
Expand All @@ -120,10 +135,29 @@ dbGetQuery(mydb, "DELETE FROM studentAssessment WHERE id_student = 1;")

dbGetQuery(mydb, "SELECT * FROM studentAssessment ORDER BY id_student LIMIT 10;")


```


```{r}
#EXERCISE 3
#Insert a new row in one of your toy data tables leaving one variable empty. Change one value in your other table. Display your new tables. Delete the row you edited and the row you inserted.

#Insert a new row to sanfrancisco leaving availability_2019 empty
dbGetQuery(mydb, "INSERT INTO sanfrancisco (id, neighbourhood,room_type,price, days_occupied_in_2018, minimum_nights,number_of_reviews,reviews_per_month) VALUES ('40092','Mission','Entire home/apt','117','0','30', '96', '0.96');")

#Change one value in amsterdam
dbGetQuery(mydb, "UPDATE amsterdam SET availability_2019 = '365' WHERE id = 2818;")

#Display new tables
dbGetQuery(mydb, "SELECT * FROM amsterdam;")
dbGetQuery(mydb, "SELECT * FROM sanfrancisco;")

#Delete the row inserted
dbGetQuery(mydb, "DELETE FROM sanfrancisco WHERE id = 40092;")

#Delete the row edited
dbGetQuery(mydb, "DELETE FROM amsterdam WHERE id = 2818;")
```

## Add/Deleting Table
Expand Down Expand Up @@ -156,10 +190,44 @@ dbGetQuery(mydb, "SELECT * FROM test;") #This should produce an error since your
#Delete a table if it exists
dbGetQuery(mydb, "DROP TABLE IF EXISTS test;") #No error since it is only if it exists


```


```{r}
#EXERCISE 4
#Create a table that is exactly the same as your first toy data table but this time use SQL commands. Display your new table. Then delete the original table.

#Creating a new table in SQL
dbGetQuery(mydb,"CREATE TABLE san_francisco (
id INTEGER,
neighbourhood TEXT,
room_type TEXT,
price INTEGER,
days_occupied_in_2018 INTEGER,
minimum_nights INTEGER,
number_of_reviews FLOAT,
reviews_per_month INTEGER
);")

dbListTables(mydb)

```
```{r}
#Inserting data into the table
dbGetQuery(mydb, "INSERT INTO san_francisco VALUES (958,'Western Addition','Entire home/apt', 170,213,1,172,1.51);")
dbGetQuery(mydb, "INSERT INTO san_francisco VALUES (5858,'Bernal Heights','Entire home/apt',235,0,30,112,0.96);")
dbGetQuery(mydb, "INSERT INTO san_francisco VALUES (7918,'Haight Ashbury','Private room',65,0,32, 17,0.15);")
dbGetQuery(mydb, "INSERT INTO san_francisco VALUES (8142,'Haight Ashbury','Private room',65,0,32,8,0.15);")

#Display
dbGetQuery(mydb, "SELECT * FROM san_francisco;")

#Delete
dbGetQuery(mydb, "DROP TABLE sanfrancisco;")
```



# NULL Value
```{r}
Expand Down Expand Up @@ -209,9 +277,35 @@ dbGetQuery(mydb,"INSERT INTO test2 (score, student) VALUES ('1', 'A');")
dbGetQuery(mydb,"INSERT INTO test2 (score, student) VALUES (NULL, 'A');")
dbGetQuery(mydb,"INSERT INTO test2 (score, student) VALUES (NULL, 'A');")

```


```{r}
#EXERCISE 5
#Recreate one of your toy data tables with the constraint that for one of the integer variablesthe default value will be zero. Test your table by inserting some empty values. Display your new tables. Then delete your table.
#Recreate one of your toy data tables with the constraint that for one of the integer variables the default value will be zero. Test your table by inserting some empty values. Display your new tables. Then delete your table.

#Recreate
dbGetQuery(mydb,"CREATE TABLE sanfrancisco_2 (
id INTEGER,
neighbourhood TEXT,
room_type TEXT,
price INTEGER,
days_occupied_in_2018 INTEGER,
minimum_nights INTEGER,
number_of_reviews FLOAT,
reviews_per_month INTEGER DEFAULT 0
);")

#Insert some value
dbGetQuery(mydb,"INSERT INTO sanfrancisco_2 (id, neighbourhood,room_type,price, days_occupied_in_2018, minimum_nights,number_of_reviews,reviews_per_month) VALUES ('1', 'Marina','Entire home/apt','127','0','21','2','NULL');")

dbGetQuery(mydb,"INSERT INTO sanfrancisco_2 (id, neighbourhood,room_type,price, days_occupied_in_2018, minimum_nights,number_of_reviews,reviews_per_month) VALUES ('40092','Mission','Entire home/apt','117','0','30', '96', 'NULL');")

#Display
dbGetQuery(mydb, "SELECT * FROM sanfrancisco_2;")

#Delete
dbGetQuery(mydb, "DROP TABLE IF EXISTS sanfrancisco_2;")
```


Expand All @@ -224,11 +318,22 @@ dbGetQuery(mydb, "SELECT * FROM studentAssessment LIMIT 10;")

#Delete a column
dbGetQuery(mydb, "ALTER TABLE studentAssessment DROP COLUMN email;")
```


```{r}
#EXERCISE 6
#Add a column to one of your toy data tables with a default value of 3. Display your new table. Delete this column.
```

#Add a column with default value 3
dbGetQuery(mydb, "ALTER TABLE san_francisco ADD minimum_nights_2021 INTEGER DEFAULT 3 ")

#Display
dbGetQuery(mydb, "SELECT * FROM san_francisco;")

#Delete a column
dbGetQuery(mydb, "ALTER TABLE san_francisco DROP COLUMN minimum_nights_2021;")
```

# ID Columns
```{r}
Expand All @@ -245,8 +350,18 @@ dbGetQuery(mydb, "SELECT * FROM test3;")

dbGetQuery(mydb, "DROP TABLE IF EXISTS test3;")

```


```{r}
#EXERCISE 7
#Create a new table with four variables and a primary key that is a sequential id value.
dbGetQuery(mydb,"CREATE TABLE sanfrancisco3 (
id INTEGER AUTO_INCREMENT PRIMARY KEY,
neighbour TEXT,
ladtitude FLOAT,
longtitude FLOAT
);")

```

Expand Down Expand Up @@ -275,9 +390,13 @@ dbGetQuery(mydb, "SELECT id_student, gender, region FROM studentInfo WHERE regio
#IN
dbGetQuery(mydb, "SELECT id_student, gender, region FROM studentInfo WHERE region IN ('Wales','Ireland');")

```

```{r}
#EXERCISE 8
#Query one of your original toy data tables, for two different conditions.

#reviews per month lagers than 3
dbGetQuery(mydb, "SELECT neighbourhood,reviews_per_month,price FROM amsterdam WHERE reviews_per_month > 3 and price < 100 ORDER BY price DESC;")
```

## Removing Duplicates
Expand All @@ -286,9 +405,16 @@ dbGetQuery(mydb, "SELECT DISTINCT region FROM studentInfo;")

dbGetQuery(mydb, "SELECT DISTINCT region, gender FROM studentInfo;")

```


```{r}
#EXERCISE 9
#Insert a duplicate row into one of your toy data tables. Then query the table without including duplicates.

dbGetQuery(mydb,"INSERT INTO san_francisco (id, neighbourhood,room_type,price, days_occupied_in_2018, minimum_nights,number_of_reviews,reviews_per_month) VALUES ('40092','Mission','Entire home/apt','117','0','30', '96', '0.96');")

dbGetQuery(mydb, "SELECT DISTINCT neighbourhood FROM san_francisco;")
```

## Conditional Expressions (non-standard)
Expand Down Expand Up @@ -356,10 +482,32 @@ dbGetQuery(mydb, "SELECT * FROM left_table
UNION
SELECT * FROM right_table;")

```

```{r}
#EXERCISE 10
# Create a common id variable in your two toy data tables. Then join those tables so that your query returns all the values from one table and only those that match from the other.

#create a new toy table
dbGetQuery(mydb,"CREATE TABLE sanfrancisco_2021 (id INTEGER,availability_2021 INTEGER);")


dbGetQuery(mydb, "INSERT INTO sanfrancisco_2021 VALUES (958, 10);")
dbGetQuery(mydb, "INSERT INTO sanfrancisco_2021 VALUES (5858, 20);")
dbGetQuery(mydb, "INSERT INTO sanfrancisco_2021 VALUES (8142, 10);")
dbGetQuery(mydb, "INSERT INTO sanfrancisco_2021 VALUES (8339, 20);")
dbGetQuery(mydb, "INSERT INTO sanfrancisco_2021 VALUES (10251, 10);")
dbGetQuery(mydb, "INSERT INTO sanfrancisco_2021 VALUES (17132, 20);")
dbGetQuery(mydb, "INSERT INTO sanfrancisco_2021 VALUES (24723, 10);")
dbGetQuery(mydb, "INSERT INTO sanfrancisco_2021 VALUES (23511, 20);")

dbGetQuery(mydb, "SELECT * FROM sanfrancisco_2021;")

#INNER JOIN
dbGetQuery(mydb,"SELECT s1.neighbourhood AS neighbourhood, room_type, price, days_occupied_in_2018, minimum_nights,number_of_reviews,s2.availability_2021 AS a_2021
FROM san_francisco AS s1
INNER JOIN sanfrancisco_2021 AS s2
ON s1.id = s2.id")
```
```{r}
#Now disconnect from your database
Expand Down