@@ -1049,11 +1049,59 @@ def create_parallel_cluster_lambdas(self):
10491049 )
10501050 )
10511051
1052+ createParallelClusterConfigLambdaAsset = s3_assets .Asset (self , "CreateParallelClusterConfigAsset" , path = "resources/lambdas/CreateParallelClusterConfig" )
1053+ self .create_parallel_cluster_config_lambda = aws_lambda .Function (
1054+ self , "CreateParallelClusterConfigLambda" ,
1055+ function_name = f"{ self .stack_name } -CreateParallelClusterConfig" ,
1056+ description = "Create ParallelCluster config" ,
1057+ memory_size = 2048 ,
1058+ runtime = aws_lambda .Runtime .PYTHON_3_9 ,
1059+ architecture = aws_lambda .Architecture .X86_64 ,
1060+ timeout = Duration .minutes (15 ),
1061+ log_retention = logs .RetentionDays .INFINITE ,
1062+ handler = "CreateParallelClusterConfig.lambda_handler" ,
1063+ code = aws_lambda .Code .from_bucket (createParallelClusterConfigLambdaAsset .bucket , createParallelClusterConfigLambdaAsset .s3_object_key ),
1064+ layers = [self .parallel_cluster_lambda_layer ],
1065+ environment = {
1066+ 'ClusterName' : self .config ['slurm' ]['ClusterName' ],
1067+ 'ErrorSnsTopicArn' : self .config .get ('ErrorSnsTopicArn' , '' ),
1068+ 'ParallelClusterConfigS3Bucket' : self .assets_bucket ,
1069+ 'ParallelClusterConfigYamlTemplateS3Key' : self .parallel_cluster_config_template_yaml_s3_key ,
1070+ 'ParallelClusterConfigYamlS3Key' : self .parallel_cluster_config_yaml_s3_key ,
1071+ 'Region' : self .cluster_region
1072+ }
1073+ )
1074+ self .create_parallel_cluster_config_lambda .add_to_role_policy (
1075+ statement = iam .PolicyStatement (
1076+ effect = iam .Effect .ALLOW ,
1077+ actions = [
1078+ 's3:DeleteObject' ,
1079+ 's3:GetObject' ,
1080+ 's3:PutObject'
1081+ ],
1082+ resources = [
1083+ f"arn:{ Aws .PARTITION } :s3:::{ self .assets_bucket } /{ self .config ['slurm' ]['ClusterName' ]} /*" ,
1084+ f"arn:{ Aws .PARTITION } :s3:::{ self .assets_bucket } /{ self .config ['slurm' ]['ClusterName' ]} /{ self .parallel_cluster_config_template_yaml_s3_key } " ,
1085+ f"arn:{ Aws .PARTITION } :s3:::{ self .assets_bucket } /{ self .config ['slurm' ]['ClusterName' ]} /{ self .parallel_cluster_config_yaml_s3_key } "
1086+ ]
1087+ )
1088+ )
1089+ if 'ErrorSnsTopicArn' in self .config :
1090+ self .create_parallel_cluster_config_lambda .add_to_role_policy (
1091+ statement = iam .PolicyStatement (
1092+ effect = iam .Effect .ALLOW ,
1093+ actions = [
1094+ 'sns:Publish'
1095+ ],
1096+ resources = [self .config ['ErrorSnsTopicArn' ]]
1097+ )
1098+ )
1099+
10521100 createParallelClusterLambdaAsset = s3_assets .Asset (self , "CreateParallelClusterAsset" , path = "resources/lambdas/CreateParallelCluster" )
10531101 self .create_parallel_cluster_lambda = aws_lambda .Function (
10541102 self , "CreateParallelClusterLambda" ,
10551103 function_name = f"{ self .stack_name } -CreateParallelCluster" ,
1056- description = "Create ParallelCluster from json string " ,
1104+ description = "Create ParallelCluster" ,
10571105 memory_size = 2048 ,
10581106 runtime = aws_lambda .Runtime .PYTHON_3_9 ,
10591107 architecture = aws_lambda .Architecture .X86_64 ,
@@ -2380,7 +2428,7 @@ def create_parallel_cluster_config(self):
23802428 index = 0
23812429 for extra_mount_sg_name , extra_mount_sg in self .extra_mount_security_groups [fs_type ].items ():
23822430 template_var = f"ExtraMountSecurityGroupId{ index } "
2383- self .create_parallel_cluster_lambda .add_environment (
2431+ self .create_parallel_cluster_config_lambda .add_environment (
23842432 key = template_var ,
23852433 value = extra_mount_sg .security_group_id
23862434 )
@@ -2838,50 +2886,64 @@ def create_parallel_cluster_config(self):
28382886 self .parallel_cluster_config ['SharedStorage' ].append (parallel_cluster_storage_dict )
28392887
28402888 # Save the config template to s3.
2889+ self .parallel_cluster_config_template_yaml = yaml .dump (self .parallel_cluster_config )
2890+ self .parallel_cluster_config_template_yaml_hash = sha512 ()
2891+ self .parallel_cluster_config_template_yaml_hash .update (bytes (self .parallel_cluster_config_template_yaml , 'utf-8' ))
2892+ self .assets_hash .update (bytes (self .parallel_cluster_config_template_yaml , 'utf-8' ))
28412893 self .s3_client .put_object (
28422894 Bucket = self .assets_bucket ,
28432895 Key = self .parallel_cluster_config_template_yaml_s3_key ,
2844- Body = yaml . dump ( self .parallel_cluster_config )
2896+ Body = self .parallel_cluster_config_template_yaml
28452897 )
28462898
28472899 self .build_config_files = CustomResource (
28482900 self , "BuildConfigFiles" ,
28492901 service_token = self .create_build_files_lambda .function_arn
28502902 )
28512903
2852- self .create_parallel_cluster_lambda .add_environment (
2904+ self .create_parallel_cluster_config_lambda .add_environment (
28532905 key = 'ParallelClusterAssetReadPolicyArn' ,
28542906 value = self .parallel_cluster_asset_read_policy .managed_policy_arn
28552907 )
2856- self .create_parallel_cluster_lambda .add_environment (
2908+ self .create_parallel_cluster_config_lambda .add_environment (
28572909 key = 'ParallelClusterJwtWritePolicyArn' ,
28582910 value = self .parallel_cluster_jwt_write_policy .managed_policy_arn
28592911 )
2860- self .create_parallel_cluster_lambda .add_environment (
2912+ self .create_parallel_cluster_config_lambda .add_environment (
28612913 key = 'ParallelClusterMungeKeyWritePolicyArn' ,
28622914 value = self .parallel_cluster_munge_key_write_policy .managed_policy_arn
28632915 )
2864- self .create_parallel_cluster_lambda .add_environment (
2916+ self .create_parallel_cluster_config_lambda .add_environment (
28652917 key = 'ParallelClusterSnsPublishPolicyArn' ,
28662918 value = self .parallel_cluster_sns_publish_policy .managed_policy_arn
28672919 )
2868- self .create_parallel_cluster_lambda .add_environment (
2920+ self .create_parallel_cluster_config_lambda .add_environment (
28692921 key = 'SlurmCtlSecurityGroupId' ,
28702922 value = self .slurmctl_sg .security_group_id
28712923 )
2872- self .create_parallel_cluster_lambda .add_environment (
2924+ self .create_parallel_cluster_config_lambda .add_environment (
28732925 key = 'SlurmNodeSecurityGroupId' ,
28742926 value = self .slurmnode_sg .security_group_id
28752927 )
2928+ self .parallel_cluster_config = CustomResource (
2929+ self , "ParallelClusterConfig" ,
2930+ service_token = self .create_parallel_cluster_config_lambda .function_arn ,
2931+ properties = {
2932+ 'ParallelClusterConfigTemplateYamlHash' : self .parallel_cluster_config_template_yaml_hash .hexdigest ()
2933+ }
2934+ )
2935+ self .parallel_cluster_config_template_yaml_s3_url = self .parallel_cluster_config .get_att_string ('ConfigTemplateYamlS3Url' )
2936+ self .parallel_cluster_config_yaml_s3_url = self .parallel_cluster_config .get_att_string ('ConfigYamlS3Url' )
2937+ self .parallel_cluster_config_yaml_hash = self .parallel_cluster_config .get_att_string ('ConfigYamlHash' )
2938+ self .assets_hash .update (bytes (self .parallel_cluster_config_yaml_hash , 'utf-8' ))
2939+
28762940 self .parallel_cluster = CustomResource (
28772941 self , "ParallelCluster" ,
28782942 service_token = self .create_parallel_cluster_lambda .function_arn ,
28792943 properties = {
2880- 'ParallelClusterConfigHash' : self .assets_hash . hexdigest ()
2944+ 'ParallelClusterConfigHash' : self .parallel_cluster_config_yaml_hash
28812945 }
28822946 )
2883- self .parallel_cluster_config_template_yaml_s3_url = self .parallel_cluster .get_att_string ('ConfigTemplateYamlS3Url' )
2884- self .parallel_cluster_config_yaml_s3_url = self .parallel_cluster .get_att_string ('ConfigYamlS3Url' )
28852947 # The lambda to create an A record for the head node must be built before the parallel cluster.
28862948 self .parallel_cluster .node .add_dependency (self .create_head_node_a_record_lambda )
28872949 self .parallel_cluster .node .add_dependency (self .update_head_node_lambda )
@@ -2891,6 +2953,7 @@ def create_parallel_cluster_config(self):
28912953 self .parallel_cluster .node .add_dependency (self .configure_res_submitters_lambda )
28922954 # Build config files need to be created before cluster so that they can be downloaded as part of on_head_node_configures
28932955 self .parallel_cluster .node .add_dependency (self .build_config_files )
2956+ self .parallel_cluster .node .add_dependency (self .parallel_cluster_config )
28942957
28952958 self .call_slurm_rest_api_lambda .node .add_dependency (self .parallel_cluster )
28962959
@@ -2899,7 +2962,7 @@ def create_parallel_cluster_config(self):
28992962 self , "UpdateHeadNode" ,
29002963 service_token = self .update_head_node_lambda .function_arn ,
29012964 properties = {
2902- 'ParallelClusterConfigHash' : self .assets_hash . hexdigest () ,
2965+ 'ParallelClusterConfigHash' : self .parallel_cluster_config_yaml_hash ,
29032966 }
29042967 )
29052968 self .update_head_node .node .add_dependency (self .parallel_cluster )
@@ -2929,6 +2992,9 @@ def create_parallel_cluster_config(self):
29292992 CfnOutput (self , "ParallelClusterConfigYamlS3Url" ,
29302993 value = self .parallel_cluster_config_yaml_s3_url
29312994 )
2995+ CfnOutput (self , "ParallelClusterConfigHash" ,
2996+ value = self .parallel_cluster_config_yaml_hash
2997+ )
29322998 CfnOutput (self , "PlaybookS3Url" ,
29332999 value = self .playbooks_asset .s3_object_url
29343000 )
0 commit comments