From 86c49e9b34cf66ec6bd48d53775e1e88c523cee8 Mon Sep 17 00:00:00 2001 From: Zoe Meow Date: Wed, 27 Jan 2021 11:07:16 -0500 Subject: [PATCH 1/2] Project 01 Submit --- .gitignore | 4 +++ Project01.Rproj | 13 ++++++++ sql-project.Rmd | 79 +++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 91 insertions(+), 5 deletions(-) create mode 100644 .gitignore create mode 100644 Project01.Rproj diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/Project01.Rproj b/Project01.Rproj new file mode 100644 index 0000000..8e3c2eb --- /dev/null +++ b/Project01.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX diff --git a/sql-project.Rmd b/sql-project.Rmd index 99a7974..bcd3499 100644 --- a/sql-project.Rmd +++ b/sql-project.Rmd @@ -1,6 +1,6 @@ --- title: "sql-workshop" -author: "Charles Lang" +author: "Zoe Wang" output: html_document --- @@ -16,7 +16,7 @@ library(RMySQL) db_user <- 'admin' db_password <- 'testsql!' db_name <- 'oudb' -db_host <- 'PASTE YOUR ENDPOINT HERE' +db_host <- 'database-1.c3y1cumw4ael.us-east-2.rds.amazonaws.com' db_port <- 3306 mydb <- dbConnect(MySQL(), user = db_user, password = db_password, dbname = db_name, host = db_host, port = db_port) @@ -53,7 +53,15 @@ dbListTables(mydb) dbReadTable(mydb, 'studentInfo') #EXERCISE 1 -#Make two toy data sets with at least three variables and at least 30 rows each in them. Have a mix of numeric and character variables. Transfer these dataframes to your SQL database using the DBI commands. Name the tables whatever you like. +#Make two toy data sets with at least three variables(columns) and at least 30 rows each in them. Have a mix of numeric and character variables. Transfer these dataframes to your SQL database using the DBI commands. Name the tables whatever you like. + +#toy data sets 01 +toy <- dbConnect(MySQL(), user = db_user, password = db_password, dbname = db_name, host = db_host, port = db_port) +dbWriteTable(toy, "assessment", studentAssessment[1:30, ]) + +#toy data sets 02 +toytoy<- dbConnect(MySQL(), user = db_user, password = db_password, dbname = db_name, host = db_host, port = db_port) +dbWriteTable(toytoy, "registration", studentRegistration[1:30, ]) ``` @@ -83,7 +91,11 @@ dbGetQuery(mydb, "SELECT COUNT(*) FROM studentAssessment WHERE score > 50 AND id #EXERCISE 2 #Read one of your toy data tables, make sure the output is ordered in descending order, you rename one of the variables and the output is limited to the first 20 rows. +dbGetQuery(toy, "SELECT id_student AS 'ID' FROM assessment ORDER BY id_student DESC LIMIT 20; ") + #Read the other table according to a condition of one of the variables. +dbGetQuery(toytoy, "SELECT COUNT(date_registration) FROM registration WHERE date_registration < -100;") + ``` @@ -123,6 +135,14 @@ dbGetQuery(mydb, "SELECT * FROM studentAssessment ORDER BY id_student LIMIT 10;" #EXERCISE 3 #Insert a new row in one of your toy data tables leaving one variable empty. Change one value in your other table. Display your new tables. Delete the row you edited and the row you inserted. +dbGetQuery(toy, "INSERT INTO assessment (id_assessment, ID, date_submitted, is_banked) VALUES ('1752', '1', '10', '0');") +dbGetQuery(toytoy, "UPDATE registration SET code_module = 'BBB' WHERE id_student = 11391;") + +dbReadTable(toy, 'assessment') +dbReadTable(toytoy, 'registration') + +dbGetQuery(toy, "DELETE FROM assessment WHERE ID = 1;") +dbGetQuery(toytoy, "DELETE FROM registration WHERE code_module = 'BBB';") ``` @@ -158,6 +178,16 @@ dbGetQuery(mydb, "DROP TABLE IF EXISTS test;") #No error since it is only if it #EXERCISE 4 #Create a table that is exactly the same as your first toy data table but this time use SQL commands. Display your new table. Then delete the original table. +dbGetQuery(toy, "CREATE TABLE assessment_new ( + id_assessment INTEGER, + id_student INTEGER, + date_submitted INTEGER, + is_banked BOOLEAN, + score INTEGER +);") + +dbGetQuery(toy, "SELECT * FROM assessment_new;") +dbGetQuery(toy, "DROP TABLE assessment;") ``` @@ -210,7 +240,22 @@ dbGetQuery(mydb,"INSERT INTO test2 (score, student) VALUES (NULL, 'A');") dbGetQuery(mydb,"INSERT INTO test2 (score, student) VALUES (NULL, 'A');") #EXERCISE 5 -#Recreate one of your toy data tables with the constraint that for one of the integer variablesthe default value will be zero. Test your table by inserting some empty values. Display your new tables. Then delete your table. +#Recreate one of your toy data tables with the constraint that for one of the integer variables the default value will be zero. Test your table by inserting some empty values. Display your new tables. Then delete your table. + +dbGetQuery(toy, "CREATE TABLE assessment_new2 ( + id_assessment INTEGER, + id_student INTEGER, + date_submitted INTEGER, + is_banked BOOLEAN, + score INTEGER DEFAULT 0 +);") + +dbGetQuery(toy,"INSERT INTO assessment_new2 (id_assessment, id_student, date_submitted, is_banked, score) VALUES ('1752', '11391', '18', '0', NULL);") +dbGetQuery(toy,"INSERT INTO assessment_new2 (id_assessment, id_student, date_submitted, is_banked, score) VALUES ('1752', '11391', '18', '0', NULL);") + +dbGetQuery(toy, "SELECT * FROM assessment_new2;") + +dbGetQuery(toy, "DROP TABLE IF EXISTS assessment_new2;") ``` @@ -227,6 +272,11 @@ dbGetQuery(mydb, "ALTER TABLE studentAssessment DROP COLUMN email;") #EXERCISE 6 #Add a column to one of your toy data tables with a default value of 3. Display your new table. Delete this column. + +dbGetQuety(toytoy, "ALTER TABLE registration ADD code_id INTEGER DEFAULT 3") +dbGetQuery(toytoy, "SELECT * FROM registration LIMIT 10;") +dbGetQuery(toytoy, "ALTER TABLE registration DROP COLUMN code_id;") + ``` @@ -248,6 +298,14 @@ dbGetQuery(mydb, "DROP TABLE IF EXISTS test3;") #EXERCISE 7 #Create a new table with four variables and a primary key that is a sequential id value. +dbGetQuery(toy,"CREATE TABLE test ( + id INTEGER AUTO_INCREMENT PRIMARY KEY, + name TEXT, + score INTEGER, + email TEXT, + phone TEXT +);") + ``` ## Filtering (WHERE) @@ -278,6 +336,10 @@ dbGetQuery(mydb, "SELECT id_student, gender, region FROM studentInfo WHERE regio #EXERCISE 8 #Query one of your original toy data tables, for two different conditions. +dbGetQuery(toy, "SELECT id_student, score FROM assessment_new WHERE score > 75 ORDER BY id_student DESC;") + +dbGetQuery(toy, "SELECT name, email, phone FROM test WHERE phone LIKE '919%';") + ``` ## Removing Duplicates @@ -289,6 +351,8 @@ dbGetQuery(mydb, "SELECT DISTINCT region, gender FROM studentInfo;") #EXERCISE 9 #Insert a duplicate row into one of your toy data tables. Then query the table without including duplicates. +dbGetQuery(toytoy, "INSERT INTO registration VALUES ('AAA', '2013J', '11391', '-159', NULL) ); ") +dbGetQuery(toytoy, "SELECT DISTINCT id_student FROM registration") ``` ## Conditional Expressions (non-standard) @@ -359,13 +423,18 @@ dbGetQuery(mydb, "SELECT * FROM left_table #EXERCISE 10 # Create a common id variable in your two toy data tables. Then join those tables so that your query returns all the values from one table and only those that match from the other. +dbGetQuety(toy, "ALTER TABLE assessment_new ADD id INTEGER DEFAULT 3;") +dbGetQuety(toytoy, "ALTER TABLE registration ADD common_id INTEGER DEFAULT 3;") +dbGetQuery(toy, "SELECT id FROM assessment_new LEFT JOIN registration ON id = common_id;") + ``` + ```{r} #Now disconnect from your database dbDisconnect(mydb) -#Then retunr to your AWS console and: +#Then return to your AWS console and: #1. Click on "Actions" and then "Stop" #2. Do NOT make a snapshot From 8e5dd4f92ca9e6f97e16c07c9b73026337929090 Mon Sep 17 00:00:00 2001 From: Zoe Wang <70659434+ZoeMeowww@users.noreply.github.com> Date: Tue, 15 Jun 2021 11:16:15 +0800 Subject: [PATCH 2/2] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index f793fb6..2d29ef9 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +#testing my setup here# + # SQL & Relational Databases Relational databases are the backbone of data science and the language that we use to communicate with them is called SQL. The SQL test is a common component for data-adjacenemnt jobs within industry, government and the education sector. It is a useful tool that some argue [spawned the field of data science](https://www.kdnuggets.com/gpspubs/sigkdd-explorations-kdd-10-years.html). Before Big Data was a thing, Knowledge Discovery in Databases (KDD) used simple SQL queries to investigate and understand the nature of the large amounts of data that were being collected by governments and companies. The humble SQL test now torments the budding data scientist as a right of passage in the job search process.