Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions SETUP.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,17 @@ This solution requires a version of Boto3 => 1.3
}
```

## Deploy Infrastructure with AWS CLI
This template requires use of an S3 bucket given its size.
## Deploying the CloudFormation Template

A [CloudFormation template](./cloudformation/sagemaker_studio.yml) deploys resources that are required for this workshop, including databases and a SageMaker Studio domain within which a notebook is setup for use with this workshop. To deploy this template, first clone this repo in an environment (e.g., your laptop or another SageMaker notebook) and then follow the instructions below to deploy the stack.

If deploying withe AWS CLI then you will need to have the CLI V2 installed (see [here](https://docs.aws.amazon.com/cli/latest/userguide/cliv2-migration-instructions.html)).

Note that the workshop notebooks must be run within the SageMaker Studio domain that is created by this template. Attempting to run these notebooks in a different environment (such as the one used to deploy the template) will fail due to not having sufficient IAM roles to access databases and other resources.

### Deploy Infrastructure with AWS CLI
This template requires use of an S3 bucket given its size. For example, to deploy in us-west-2:

```
aws cloudformation deploy \
--stack-name txt2sql \
Expand All @@ -48,7 +57,9 @@ aws cloudformation deploy \
--s3-bucket bucket-to-hold-cfn-template
```

## Deploy Infrastructure using the Console
When updating the parameters in `cloudformation/parameters/{region}.json` make sure that the DBPassword is at least 8 characters long.

### Deploy Infrastructure using the Console
To deploy this template using the AWS Console only, [follow the instructions here](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/cfn-console-create-stack.html) by uploading the template found in the `cloudformation` folder named `sagemaker_studio.yml`.

Be sure to update the parameters for template when deploying in console [as described here](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/cfn-using-console-create-stack-parameters.html). You will need to update the following:
Expand Down
108 changes: 78 additions & 30 deletions module_3/01_Fine_Tune_Amazon_Titan.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"spider_folder = '/home/sagemaker-user/text-to-sql-bedrock-workshop/module_3/spider'"
Expand All @@ -137,7 +139,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd\n",
Expand All @@ -159,9 +163,9 @@
"# print the full string, no truncation\n",
"pd.set_option(\"display.max_colwidth\", None)\n",
"\n",
"# set s3 bucket:\n",
"S3_BUCKET_NAME = \"<AthenaResultsS3Location>\" # Can be found in CloudFormation outputs\n",
"FINE_TUNING_JOB_ROLE_ARN = \"<BedrockFineTuningJobRole>\" # can be found in the cloudformation outputs under BedrockFineTuningJobRole"
"# set s3 bucket, this is found in the CloudFormation outputs (the bucket part of AthenaResultsS3Location):\n",
"S3_BUCKET_NAME = \"...\"\n",
"FINE_TUNING_JOB_ROLE_ARN = \"...\" # can be found in the cloudformation outputs under BedrockFineTuningJobRole"
]
},
{
Expand All @@ -175,7 +179,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def read_json_file(file_name):\n",
Expand Down Expand Up @@ -227,7 +233,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"query_train_spider = construct_queries(\n",
Expand Down Expand Up @@ -256,7 +264,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def construct_schema(table):\n",
Expand Down Expand Up @@ -291,7 +301,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def construct_primary_keys(table):\n",
Expand All @@ -318,7 +330,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def construct_foreign_keys(table):\n",
Expand Down Expand Up @@ -355,7 +369,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def construct_table_df(tables_path):\n",
Expand Down Expand Up @@ -391,7 +407,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"\n",
Expand All @@ -409,7 +427,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"dev_df.head(1)"
Expand Down Expand Up @@ -440,7 +460,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def template_dataset_titan(\n",
Expand Down Expand Up @@ -491,7 +513,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"template_dataset_titan(train_df.iloc[0].to_dict(), return_jsonl=False)"
Expand All @@ -516,7 +540,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# constants\n",
Expand All @@ -532,7 +558,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def generate_jsonl_file(\n",
Expand Down Expand Up @@ -613,7 +641,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# create directory for output data sets\n",
Expand Down Expand Up @@ -667,7 +697,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# # Intermediary save to feather format\n",
Expand All @@ -690,7 +722,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def test_jsonl_file(savepath):\n",
Expand Down Expand Up @@ -739,7 +773,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Test the train dataset\n",
Expand Down Expand Up @@ -769,7 +805,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def get_s3_uri(bucket_name: str, local_path: str, s3_path: str) -> str:\n",
Expand Down Expand Up @@ -797,7 +835,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"session = boto3.session.Session()\n",
Expand All @@ -810,7 +850,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"bucket_name = S3_BUCKET_NAME\n",
Expand Down Expand Up @@ -848,7 +890,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"role_arn = FINE_TUNING_JOB_ROLE_ARN\n",
Expand Down Expand Up @@ -906,7 +950,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# check if tuning job has finished\n",
Expand All @@ -931,7 +977,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"provisionedModelName = f\"pvs-{custom_model_name}\"\n",
Expand Down Expand Up @@ -1653,9 +1701,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "conda_pytorch_p310",
"language": "python",
"name": "python3"
"name": "conda_pytorch_p310"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -1667,7 +1715,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.10.14"
}
},
"nbformat": 4,
Expand Down
10 changes: 5 additions & 5 deletions module_4/01_prevent_SQL_injection.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@
},
"outputs": [],
"source": [
"\n",
"ATHENA_RESULTS_S3_LOCATION = \"<workshop bucket name>\" # available in cloudformation outputs\n",
"# available in cloudformation outputs, bucket name part of AthenaResultsS3Location:\n",
"ATHENA_RESULTS_S3_LOCATION = \"<workshop bucket name>\" \n",
"ATHENA_CATALOG_NAME = \"<athena catalog name>\" # available in cloudformation outputs\n",
"DB_NAME = \"tpcds1\""
]
Expand Down Expand Up @@ -1433,9 +1433,9 @@
],
"instance_type": "ml.m5.large",
"kernelspec": {
"display_name": "Python 3 (Data Science 3.0)",
"display_name": "conda_python3",
"language": "python",
"name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-data-science-310-v1"
"name": "conda_python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -1447,7 +1447,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.10.14"
}
},
"nbformat": 4,
Expand Down