From 0cf313425f5efb3514aacf773cae7a10e06695ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=80=A1=E9=A3=9E?= Date: Thu, 28 Jan 2021 00:53:00 +0800 Subject: [PATCH] Assignment1-Yifei --- amsterdam-house.csv | 32 +++++++++ sanfrancisco.csv | 30 ++++++++ sql-project.Rmd | 164 +++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 218 insertions(+), 8 deletions(-) create mode 100644 amsterdam-house.csv create mode 100644 sanfrancisco.csv diff --git a/amsterdam-house.csv b/amsterdam-house.csv new file mode 100644 index 0000000..4faa03b --- /dev/null +++ b/amsterdam-house.csv @@ -0,0 +1,32 @@ +,id,neighbourhood,latitude,longitude,room_type,price,days_occupied_in_2018,minimum_nights,number_of_reviews,reviews_per_month,availability_2019 +0,2818,Oostelijk Havengebied - Indische Buurt,52.36575451,4.941419235,Private room,59,296,3,248,2.1,44 +1,3209,Westerpark,52.39022505,4.873924095,Entire home/apt,160,309,4,42,1.03,47 +2,20168,Centrum-Oost,52.36508703,4.893541008,Entire home/apt,80,224,1,233,2.18,198 +3,25428,Centrum-West,52.3731144,4.883668196,Entire home/apt,125,208,14,1,0.09,141 +4,27886,Centrum-West,52.38672732,4.89207807,Private room,150,318,2,171,2.03,199 +5,28658,Bos en Lommer,52.37534218,4.85728935,Private room,65,64,3,434,4.16,295 +6,28871,Centrum-West,52.36718681,4.890917797,Private room,75,184,2,215,2.13,137 +7,29051,Centrum-West,52.36772541,4.891511737,Private room,55,131,2,383,4.07,188 +8,31080,Zuid,52.35132054,4.848382899,Entire home/apt,219,,3,32,0.36,336 +9,41125,Centrum-West,52.37891477,4.883205056,Entire home/apt,180,349,3,76,0.78,11 +10,42970,Centrum-West,52.36781448,4.890012023,Private room,159,55,3,426,4.24,89 +11,43980,Zuid,52.35745628,4.861242085,Entire home/apt,100,80,28,61,0.62,10 +12,44129,Centrum-West,52.38071123,4.886104318,Entire home/apt,250,278,2,176,1.74,16 +13,44391,Centrum-Oost,52.37016999,4.914377827,Entire home/apt,200,,3,31,0.31,0 +14,45246,Westerpark,52.3776764,4.876178649,Entire home/apt,155,,3,79,0.79,253 +15,46386,De Pijp - Rivierenbuurt,52.35247026,4.90825444,Entire home/apt,150,,3,3,0.03,0 +16,47061,De Baarsjes - Oud-West,52.3679861,4.874469332,Entire home/apt,140,,2,167,1.67,0 +17,48076,Centrum-West,52.38042002,4.894534735,Entire home/apt,350,92,5,159,1.82,276 +18,49790,De Baarsjes - Oud-West,52.36266379,4.86103174,Entire home/apt,225,87,3,98,1.06,296 +19,50515,Bos en Lommer,52.3772724,4.839252563,Entire home/apt,120,208,3,12,0.21,79 +20,50518,Westerpark,52.38200613,4.878649621,Entire home/apt,125,,1,91,1.22,0 +21,50523,Centrum-West,52.3684079,4.884133705,Private room,115,126,2,213,2.21,206 +22,50570,Bos en Lommer,52.37773744,4.848911991,Entire home/apt,90,354,4,152,1.62,19 +23,52490,Oostelijk Havengebied - Indische Buurt,52.37004589,4.938691236,Private room,72,337,3,81,0.82,11 +24,53067,De Pijp - Rivierenbuurt,52.35339267,4.900637643,Private room,87,295,1,333,3.4,2 +25,53671,Westerpark,52.38905337,4.885588535,Private room,75,31,3,261,2.8,289 +26,53692,De Pijp - Rivierenbuurt,52.35348316,4.900490172,Private room,60,,3,219,2.4,47 +27,55256,Centrum-Oost,52.37125689,4.903513145,Private room,86,352,1,120,1.26,129 +28,55703,Bos en Lommer,52.37560816,4.858187468,Entire home/apt,250,1,3,3,0.07,0 +29,55709,Centrum-Oost,52.3589565,4.897259341,Entire home/apt,159,355,5,53,0.54,32 +30,55807,De Baarsjes - Oud-West,52.36965708,4.862025427,Private room,60,28,2,150,1.53,122 diff --git a/sanfrancisco.csv b/sanfrancisco.csv new file mode 100644 index 0000000..e54cd50 --- /dev/null +++ b/sanfrancisco.csv @@ -0,0 +1,30 @@ +id,neighbourhood,room_type,price,days_occupied_in_2018,minimum_nights,number_of_reviews,reviews_per_month,availability_2019 +958,Western Addition,Entire home/apt,170,213,1,172,1.51,74 +5858,Bernal Heights,Entire home/apt,235,0,30,112,0.96,365 +7918,Haight Ashbury,Private room,65,0,32,17,0.15,365 +8142,Haight Ashbury,Private room,65,0,32,8,0.15,365 +8339,Western Addition,Entire home/apt,785,276,7,27,0.24,89 +8567,Western Addition,Entire home/apt,255,206,2,31,0.27,20 +8739,Mission,Private room,139,284,1,631,5.54,129 +9225,Potrero Hill,Private room,135,26,1,434,3.91,342 +10251,Mission,Entire home/apt,265,18,3,307,2.73,303 +10578,Nob Hill,Entire home/apt,120,,30,18,0.21,365 +10819,Marina,Entire home/apt,218,,30,22,0.32,0 +10820,Haight Ashbury,Entire home/apt,177,,30,36,0.32,288 +10824,Western Addition,Entire home/apt,194,,30,14,0.17,347 +10832,Downtown/Civic Center,Entire home/apt,139,0,30,18,0.25,356 +12041,Haight Ashbury,Private room,85,0,32,6,0.07,365 +12042,Haight Ashbury,Private room,85,0,32,5,0.07,365 +12522,Castro/Upper Market,Private room,79,325,3,383,3.54,23 +12584,Inner Sunset,Entire home/apt,136,,30,16,0.21,365 +14125,Mission,Entire home/apt,215,28,3,97,0.9,278 +17132,South of Market,Entire home/apt,450,,35,14,0.14,219 +18231,Noe Valley,Entire home/apt,107,,30,60,0.58,129 +18904,Western Addition,Private room,110,288,3,350,4.1,74 +19040,Mission,Entire home/apt,198,,5,226,2.12,4 +21334,Nob Hill,Entire home/apt,125,302,30,118,1.13,325 +21914,Haight Ashbury,Private room,65,0,32,14,0.16,365 +23540,Mission,Entire home/apt,225,57,1,107,1.01,0 +23611,Bernal Heights,Private room,100,12,3,232,2.21,364 +23630,Castro/Upper Market,Entire home/apt,155,,3,340,3.29,235 +24390,Mission,Entire home/apt,95,,30,65,0.67,135 diff --git a/sql-project.Rmd b/sql-project.Rmd index 99a7974..4fe6425 100644 --- a/sql-project.Rmd +++ b/sql-project.Rmd @@ -1,6 +1,6 @@ --- title: "sql-workshop" -author: "Charles Lang" +author: "Yifei Zhang" output: html_document --- @@ -16,7 +16,7 @@ library(RMySQL) db_user <- 'admin' db_password <- 'testsql!' db_name <- 'oudb' -db_host <- 'PASTE YOUR ENDPOINT HERE' +db_host <- 'database-3.cvsve6xftlcb.us-east-2.rds.amazonaws.com' db_port <- 3306 mydb <- dbConnect(MySQL(), user = db_user, password = db_password, dbname = db_name, host = db_host, port = db_port) @@ -33,6 +33,10 @@ studentAssessment <- read.csv("studentAssessment.csv", header = TRUE) #Course data courses <- read.csv("courses.csv", header = TRUE) studentRegistration <- read.csv("studentRegistration.csv", header = TRUE) + +##Load toy dataset +sanfrancisco <-read.csv("sanfrancisco.csv",header=TRUE) +amsterdam <- read.csv("amsterdam-house.csv",header=TRUE) ``` ## Write data to the DB using the DBI package @@ -52,9 +56,14 @@ dbListTables(mydb) #Read a particular table dbReadTable(mydb, 'studentInfo') +``` + + +```{r} #EXERCISE 1 #Make two toy data sets with at least three variables and at least 30 rows each in them. Have a mix of numeric and character variables. Transfer these dataframes to your SQL database using the DBI commands. Name the tables whatever you like. - +dbWriteTable(mydb, "sanfrancisco", sanfrancisco) +dbWriteTable(mydb, "amsterdam", amsterdam) ``` ## Getting into SQL - READING @@ -80,10 +89,16 @@ dbGetQuery(mydb, "SELECT COUNT(score) FROM studentAssessment WHERE score > 50;") #Using an AND statement dbGetQuery(mydb, "SELECT COUNT(*) FROM studentAssessment WHERE score > 50 AND id_assessment = '1752';") + +``` + +```{r} #EXERCISE 2 #Read one of your toy data tables, make sure the output is ordered in descending order, you rename one of the variables and the output is limited to the first 20 rows. - +dbGetQuery(mydb,"SELECT availability_2019 AS availability_in_2019 FROM amsterdam ORDER BY availability_2019 DESC LIMIT 20") + #Read the other table according to a condition of one of the variables. +dbGetQuery(mydb, "SELECT * FROM sanfrancisco WHERE price > 500;") ``` @@ -101,7 +116,7 @@ dbGetQuery(mydb, "SELECT COUNT(*) FROM studentAssessment;") #View inserted row dbGetQuery(mydb, "SELECT * FROM studentAssessment ORDER BY id_student LIMIT 10;") -#Add a row with missing values +#Add a row with missing values ???? dbGetQuery(mydb, "INSERT INTO studentAssessment (id_assessment, id_student, date_submitted) VALUES ('00001', '1', '20');") #View inserted row @@ -120,10 +135,29 @@ dbGetQuery(mydb, "DELETE FROM studentAssessment WHERE id_student = 1;") dbGetQuery(mydb, "SELECT * FROM studentAssessment ORDER BY id_student LIMIT 10;") + +``` + + +```{r} #EXERCISE 3 #Insert a new row in one of your toy data tables leaving one variable empty. Change one value in your other table. Display your new tables. Delete the row you edited and the row you inserted. +#Insert a new row to sanfrancisco leaving availability_2019 empty +dbGetQuery(mydb, "INSERT INTO sanfrancisco (id, neighbourhood,room_type,price, days_occupied_in_2018, minimum_nights,number_of_reviews,reviews_per_month) VALUES ('40092','Mission','Entire home/apt','117','0','30', '96', '0.96');") + +#Change one value in amsterdam +dbGetQuery(mydb, "UPDATE amsterdam SET availability_2019 = '365' WHERE id = 2818;") + +#Display new tables +dbGetQuery(mydb, "SELECT * FROM amsterdam;") +dbGetQuery(mydb, "SELECT * FROM sanfrancisco;") +#Delete the row inserted +dbGetQuery(mydb, "DELETE FROM sanfrancisco WHERE id = 40092;") + +#Delete the row edited +dbGetQuery(mydb, "DELETE FROM amsterdam WHERE id = 2818;") ``` ## Add/Deleting Table @@ -156,10 +190,44 @@ dbGetQuery(mydb, "SELECT * FROM test;") #This should produce an error since your #Delete a table if it exists dbGetQuery(mydb, "DROP TABLE IF EXISTS test;") #No error since it is only if it exists + +``` + + +```{r} #EXERCISE 4 #Create a table that is exactly the same as your first toy data table but this time use SQL commands. Display your new table. Then delete the original table. +#Creating a new table in SQL +dbGetQuery(mydb,"CREATE TABLE san_francisco ( + id INTEGER, + neighbourhood TEXT, + room_type TEXT, + price INTEGER, + days_occupied_in_2018 INTEGER, + minimum_nights INTEGER, + number_of_reviews FLOAT, + reviews_per_month INTEGER + );") + +dbListTables(mydb) + ``` +```{r} +#Inserting data into the table +dbGetQuery(mydb, "INSERT INTO san_francisco VALUES (958,'Western Addition','Entire home/apt', 170,213,1,172,1.51);") +dbGetQuery(mydb, "INSERT INTO san_francisco VALUES (5858,'Bernal Heights','Entire home/apt',235,0,30,112,0.96);") +dbGetQuery(mydb, "INSERT INTO san_francisco VALUES (7918,'Haight Ashbury','Private room',65,0,32, 17,0.15);") +dbGetQuery(mydb, "INSERT INTO san_francisco VALUES (8142,'Haight Ashbury','Private room',65,0,32,8,0.15);") + +#Display +dbGetQuery(mydb, "SELECT * FROM san_francisco;") + +#Delete +dbGetQuery(mydb, "DROP TABLE sanfrancisco;") +``` + + # NULL Value ```{r} @@ -209,9 +277,35 @@ dbGetQuery(mydb,"INSERT INTO test2 (score, student) VALUES ('1', 'A');") dbGetQuery(mydb,"INSERT INTO test2 (score, student) VALUES (NULL, 'A');") dbGetQuery(mydb,"INSERT INTO test2 (score, student) VALUES (NULL, 'A');") +``` + + +```{r} #EXERCISE 5 -#Recreate one of your toy data tables with the constraint that for one of the integer variablesthe default value will be zero. Test your table by inserting some empty values. Display your new tables. Then delete your table. +#Recreate one of your toy data tables with the constraint that for one of the integer variables the default value will be zero. Test your table by inserting some empty values. Display your new tables. Then delete your table. + +#Recreate +dbGetQuery(mydb,"CREATE TABLE sanfrancisco_2 ( + id INTEGER, + neighbourhood TEXT, + room_type TEXT, + price INTEGER, + days_occupied_in_2018 INTEGER, + minimum_nights INTEGER, + number_of_reviews FLOAT, + reviews_per_month INTEGER DEFAULT 0 + );") +#Insert some value +dbGetQuery(mydb,"INSERT INTO sanfrancisco_2 (id, neighbourhood,room_type,price, days_occupied_in_2018, minimum_nights,number_of_reviews,reviews_per_month) VALUES ('1', 'Marina','Entire home/apt','127','0','21','2','NULL');") + +dbGetQuery(mydb,"INSERT INTO sanfrancisco_2 (id, neighbourhood,room_type,price, days_occupied_in_2018, minimum_nights,number_of_reviews,reviews_per_month) VALUES ('40092','Mission','Entire home/apt','117','0','30', '96', 'NULL');") + +#Display +dbGetQuery(mydb, "SELECT * FROM sanfrancisco_2;") + +#Delete +dbGetQuery(mydb, "DROP TABLE IF EXISTS sanfrancisco_2;") ``` @@ -224,11 +318,22 @@ dbGetQuery(mydb, "SELECT * FROM studentAssessment LIMIT 10;") #Delete a column dbGetQuery(mydb, "ALTER TABLE studentAssessment DROP COLUMN email;") +``` + +```{r} #EXERCISE 6 #Add a column to one of your toy data tables with a default value of 3. Display your new table. Delete this column. -``` +#Add a column with default value 3 +dbGetQuery(mydb, "ALTER TABLE san_francisco ADD minimum_nights_2021 INTEGER DEFAULT 3 ") + +#Display +dbGetQuery(mydb, "SELECT * FROM san_francisco;") + +#Delete a column +dbGetQuery(mydb, "ALTER TABLE san_francisco DROP COLUMN minimum_nights_2021;") +``` # ID Columns ```{r} @@ -245,8 +350,18 @@ dbGetQuery(mydb, "SELECT * FROM test3;") dbGetQuery(mydb, "DROP TABLE IF EXISTS test3;") +``` + + +```{r} #EXERCISE 7 #Create a new table with four variables and a primary key that is a sequential id value. +dbGetQuery(mydb,"CREATE TABLE sanfrancisco3 ( + id INTEGER AUTO_INCREMENT PRIMARY KEY, + neighbour TEXT, + ladtitude FLOAT, + longtitude FLOAT + );") ``` @@ -275,9 +390,13 @@ dbGetQuery(mydb, "SELECT id_student, gender, region FROM studentInfo WHERE regio #IN dbGetQuery(mydb, "SELECT id_student, gender, region FROM studentInfo WHERE region IN ('Wales','Ireland');") +``` + +```{r} #EXERCISE 8 #Query one of your original toy data tables, for two different conditions. - +#reviews per month lagers than 3 +dbGetQuery(mydb, "SELECT neighbourhood,reviews_per_month,price FROM amsterdam WHERE reviews_per_month > 3 and price < 100 ORDER BY price DESC;") ``` ## Removing Duplicates @@ -286,9 +405,16 @@ dbGetQuery(mydb, "SELECT DISTINCT region FROM studentInfo;") dbGetQuery(mydb, "SELECT DISTINCT region, gender FROM studentInfo;") +``` + + +```{r} #EXERCISE 9 #Insert a duplicate row into one of your toy data tables. Then query the table without including duplicates. +dbGetQuery(mydb,"INSERT INTO san_francisco (id, neighbourhood,room_type,price, days_occupied_in_2018, minimum_nights,number_of_reviews,reviews_per_month) VALUES ('40092','Mission','Entire home/apt','117','0','30', '96', '0.96');") + +dbGetQuery(mydb, "SELECT DISTINCT neighbourhood FROM san_francisco;") ``` ## Conditional Expressions (non-standard) @@ -356,10 +482,32 @@ dbGetQuery(mydb, "SELECT * FROM left_table UNION SELECT * FROM right_table;") +``` +```{r} #EXERCISE 10 # Create a common id variable in your two toy data tables. Then join those tables so that your query returns all the values from one table and only those that match from the other. +#create a new toy table +dbGetQuery(mydb,"CREATE TABLE sanfrancisco_2021 (id INTEGER,availability_2021 INTEGER);") + + +dbGetQuery(mydb, "INSERT INTO sanfrancisco_2021 VALUES (958, 10);") +dbGetQuery(mydb, "INSERT INTO sanfrancisco_2021 VALUES (5858, 20);") +dbGetQuery(mydb, "INSERT INTO sanfrancisco_2021 VALUES (8142, 10);") +dbGetQuery(mydb, "INSERT INTO sanfrancisco_2021 VALUES (8339, 20);") +dbGetQuery(mydb, "INSERT INTO sanfrancisco_2021 VALUES (10251, 10);") +dbGetQuery(mydb, "INSERT INTO sanfrancisco_2021 VALUES (17132, 20);") +dbGetQuery(mydb, "INSERT INTO sanfrancisco_2021 VALUES (24723, 10);") +dbGetQuery(mydb, "INSERT INTO sanfrancisco_2021 VALUES (23511, 20);") + +dbGetQuery(mydb, "SELECT * FROM sanfrancisco_2021;") + +#INNER JOIN +dbGetQuery(mydb,"SELECT s1.neighbourhood AS neighbourhood, room_type, price, days_occupied_in_2018, minimum_nights,number_of_reviews,s2.availability_2021 AS a_2021 + FROM san_francisco AS s1 + INNER JOIN sanfrancisco_2021 AS s2 + ON s1.id = s2.id") ``` ```{r} #Now disconnect from your database