From e16074795224e30857c1d6dc93fac5354045ec05 Mon Sep 17 00:00:00 2001 From: Oleg Kachur Date: Tue, 24 Feb 2026 12:32:50 +0000 Subject: [PATCH] Delete google provider deprecated items sheduled for Jan 2026 --- .../unit/always/test_project_structure.py | 38 - providers/google/docs/changelog.rst | 40 + .../integration-logos/Google-Data-Catalog.png | Bin 15195 -> 0 bytes .../docs/operators/cloud/datacatalog.rst | 526 ---- .../google/docs/operators/cloud/vertex_ai.rst | 91 - providers/google/provider.yaml | 6 - .../providers/google/cloud/hooks/bigquery.py | 9 - .../google/cloud/hooks/datacatalog.py | 1172 --------- .../cloud/hooks/vertex_ai/generative_model.py | 292 +- .../google/cloud/links/datacatalog.py | 84 - .../google/cloud/operators/datacatalog.py | 2338 ----------------- .../cloud/operators/vertex_ai/auto_ml.py | 2 +- .../operators/vertex_ai/generative_model.py | 568 +--- .../providers/google/get_provider_info.py | 7 - .../google/cloud/hooks/test_datacatalog.py | 1600 ----------- .../hooks/vertex_ai/test_generative_model.py | 111 - .../cloud/operators/test_datacatalog.py | 994 ------- .../vertex_ai/test_generative_model.py | 259 +- .../run_provider_yaml_files_check.py | 3 - 19 files changed, 44 insertions(+), 8096 deletions(-) delete mode 100644 providers/google/docs/integration-logos/Google-Data-Catalog.png delete mode 100644 providers/google/docs/operators/cloud/datacatalog.rst delete mode 100644 providers/google/src/airflow/providers/google/cloud/hooks/datacatalog.py delete mode 100644 providers/google/src/airflow/providers/google/cloud/links/datacatalog.py delete mode 100644 providers/google/src/airflow/providers/google/cloud/operators/datacatalog.py delete mode 100644 providers/google/tests/unit/google/cloud/hooks/test_datacatalog.py delete mode 100644 providers/google/tests/unit/google/cloud/operators/test_datacatalog.py diff --git a/airflow-core/tests/unit/always/test_project_structure.py b/airflow-core/tests/unit/always/test_project_structure.py index ffd7aa53c5c71..3c402ff27234f 100644 --- a/airflow-core/tests/unit/always/test_project_structure.py +++ b/airflow-core/tests/unit/always/test_project_structure.py @@ -427,37 +427,6 @@ class TestGoogleProviderProjectStructure(ExampleCoverageTest, AssetsCoverageTest "airflow.providers.google.marketing_platform.operators.GoogleDisplayVideo360UploadLineItemsOperator", "airflow.providers.google.marketing_platform.operators.GoogleDisplayVideo360DownloadLineItemsOperator", "airflow.providers.google.marketing_platform.sensors.GoogleDisplayVideo360RunQuerySensor", - "airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook", - "airflow.providers.google.cloud.links.datacatalog.DataCatalogEntryGroupLink", - "airflow.providers.google.cloud.links.datacatalog.DataCatalogEntryLink", - "airflow.providers.google.cloud.links.datacatalog.DataCatalogTagTemplateLink", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateEntryOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateEntryGroupOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateTagOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateTagTemplateOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateTagTemplateFieldOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteEntryGroupOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteTagOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteTagTemplateOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteTagTemplateFieldOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogGetEntryOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogGetEntryGroupOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogGetTagTemplateOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogListTagsOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogLookupEntryOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogRenameTagTemplateFieldOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogSearchCatalogOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogUpdateEntryOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogUpdateTagOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogUpdateTagTemplateOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateEntryOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogUpdateTagTemplateFieldOperator", - "airflow.providers.google.cloud.operators.vertex_ai.generative_model.GenerateFromCachedContentOperator", - "airflow.providers.google.cloud.operators.vertex_ai.generative_model.CreateCachedContentOperator", - "airflow.providers.google.cloud.operators.vertex_ai.generative_model.CountTokensOperator", - "airflow.providers.google.cloud.operators.vertex_ai.generative_model.SupervisedFineTuningTrainOperator", - "airflow.providers.google.cloud.operators.vertex_ai.generative_model.GenerativeModelGenerateContentOperator", - "airflow.providers.google.cloud.operators.vertex_ai.generative_model.TextEmbeddingModelGetEmbeddingsOperator", } BASE_CLASSES = { @@ -486,8 +455,6 @@ class TestGoogleProviderProjectStructure(ExampleCoverageTest, AssetsCoverageTest "airflow.providers.google.cloud.operators.vertex_ai.auto_ml.AutoMLTrainingJobBaseOperator", "airflow.providers.google.cloud.operators.vertex_ai.endpoint_service.UpdateEndpointOperator", "airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.GetBatchPredictionJobOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteEntryOperator", - "airflow.providers.google.cloud.operators.vertex_ai.generative_model.DeleteExperimentRunOperator", } ASSETS_NOT_REQUIRED = { @@ -519,11 +486,6 @@ class TestGoogleProviderProjectStructure(ExampleCoverageTest, AssetsCoverageTest "airflow.providers.google.cloud.operators.cloud_storage_transfer_service." "CloudDataTransferServiceResumeOperationOperator", "airflow.providers.google.cloud.operators.compute.ComputeEngineBaseOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteEntryGroupOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteEntryOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteTagOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteTagTemplateFieldOperator", - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteTagTemplateOperator", "airflow.providers.google.cloud.operators.datafusion.CloudDataFusionDeleteInstanceOperator", "airflow.providers.google.cloud.operators.datafusion.CloudDataFusionDeletePipelineOperator", "airflow.providers.google.cloud.operators.dataproc.DataprocDeleteBatchOperator", diff --git a/providers/google/docs/changelog.rst b/providers/google/docs/changelog.rst index b66b95a3c13e1..e294a0b7b3b87 100644 --- a/providers/google/docs/changelog.rst +++ b/providers/google/docs/changelog.rst @@ -27,6 +27,46 @@ Changelog --------- +.. warning:: + Deprecated classes, parameters and features have been removed from the Google provider package. + The following breaking changes were introduced: + +* Operators + + * ``Remove CloudDataCatalogCreateEntryOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateEntryOperator instead`` + * ``Remove CloudDataCatalogCreateEntryGroupOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateEntryGroupOperator instead`` + * ``Remove CloudDataCatalogCreateTagOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateEntryOperator, airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateEntryOperator instead`` + * ``Remove CloudDataCatalogCreateTagTemplateOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateAspectTypeOperator instead`` + * ``Remove CloudDataCatalogCreateTagTemplateFieldOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateAspectTypeOperator, airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateAspectTypeOperator instead`` + * ``Remove CloudDataCatalogDeleteEntryOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogDeleteEntryOperator instead`` + * ``Remove CloudDataCatalogDeleteEntryGroupOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogDeleteEntryGroupOperator instead`` + * ``Remove CloudDataCatalogDeleteTagOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateEntryOperator instead`` + * ``Remove CloudDataCatalogDeleteTagTemplateOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogDeleteAspectTypeOperator instead`` + * ``Remove CloudDataCatalogDeleteTagTemplateFieldOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateAspectTypeOperator instead`` + * ``Remove CloudDataCatalogGetEntryOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogGetEntryOperator instead`` + * ``Remove CloudDataCatalogGetEntryGroupOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogGetEntryGroupOperator instead`` + * ``Remove CloudDataCatalogGetTagTemplateOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogGetAspectTypeOperator instead`` + * ``Remove CloudDataCatalogListTagsOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogGetEntryOperator instead`` + * ``Remove CloudDataCatalogLookupEntryOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogLookupEntryOperator instead`` + * ``Remove CloudDataCatalogRenameTagTemplateFieldOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateAspectTypeOperator instead`` + * ``Remove CloudDataCatalogSearchCatalogOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogSearchEntriesOperator instead`` + * ``Remove CloudDataCatalogUpdateEntryOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateEntryOperator instead`` + * ``Remove CloudDataCatalogUpdateTagOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateEntryOperator instead`` + * ``Remove CloudDataCatalogUpdateTagTemplateOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateAspectTypeOperator instead`` + * ``Remove CloudDataCatalogUpdateTagTemplateFieldOperator use airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateAspectTypeOperator instead`` + * ``Remove airflow.providers.google.cloud.operators.vertex_ai.generative_model.TextEmbeddingModelGetEmbeddingsOperator use airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAIGenerateEmbeddingsOperator instead`` + * ``Remove airflow.providers.google.cloud.operators.vertex_ai.generative_model.GenerativeModelGenerateContentOperator use airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAIGenerateContentOperator instead`` + * ``Remove airflow.providers.google.cloud.operators.vertex_ai.generative_model.SupervisedFineTuningTrainOperator use airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAISupervisedFineTuningTrainOperator instead`` + * ``Remove airflow.providers.google.cloud.operators.vertex_ai.generative_model.CountTokensOperator use airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAICountTokensOperator instead`` + * ``Remove airflow.providers.google.cloud.operators.vertex_ai.generative_model.CreateCachedContentOperator use airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAICreateCachedContentOperator instead`` + * ``Remove airflow.providers.google.cloud.operators.vertex_ai.generative_model.GenerateFromCachedContentOperator use airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAIGenerateContentOperator instead`` + * ``Remove airflow.providers.google.cloud.operators.vertex_ai.generative_model.DeleteExperimentRunOperator use airflow.providers.google.cloud.operators.vertex_ai.experiment_service.DeleteExperimentRunOperator instead`` + +* Hooks + + * ``Remove CloudDataCatalogHook use airflow.providers.google.cloud.hooks.dataplex.DataplexHook instead`` + * ``Remove airflow.providers.google.cloud.hooks.vertex_ai.generative_model.ExperimentRunHook use airflow.providers.google.cloud.hooks.vertex_ai.experiment_service.ExperimentRunHook instead`` + 20.0.0 ...... diff --git a/providers/google/docs/integration-logos/Google-Data-Catalog.png b/providers/google/docs/integration-logos/Google-Data-Catalog.png deleted file mode 100644 index 93aab9f89436cc3fd2bef417f62b217c60209241..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15195 zcma*ObzB@z&^|~KEV#S76Wk$K@ZhkxySpqB2o^NBdw>9oEba~)Y_Z_(!QJil``+*K z-hX%Z$Mp8jY|YMebyq)C_0$iwuX5-pBq(rjaOev1G8%Aj@7Vr3kP%^L`~u^>U|+)R zq@~pqq@}4`-CeBh9IfEsm=XgM#pL_d@WNF!OEe6~j;XeO77FWO;k%`<5cEpfu{Io< zUQSEcPXGMXB8XCrfD}wniNOJF6K+8X-N1~>&Q30hN%1w&u*KdEhdc?M<@E#a-bvMu zXw}=iSK}L%XWTO9=w?AQ-jv1)x~660(p0r3s6PkQ01r#sVlLRux+GXZc*=RVHxdq6RexM~%itA{0ec z$h5WMF2i$1QTExP_Knb`yB$t6{DxGtdAm5O5PG`AP-J2+Ew%7)q7xEkhG|cVyK_vD zSG-nBckP}(;BO>a^6`|>t2ofJFYY+)rZ8 z)Al=`38A|cj}#9mHFBsLye;v2Ni3OK`2W-N;=p?U+rTSf!At&cgUu$11^>Uz|0DB1 zz5kW@AL;*<`A<4*3zAO#CsqqNBm`;3M>O6K0rgt%#^FPpbF9S zGosGKB~iLeMRm~;X!L;}YrgutU+dlh`1I7?M3sO)1WXj-GhtVlu^JAQ1`j1@vE`n+ zt)q>HALFJA@wr(4}{NGqk&9>%DBoFBSIoex7C#PtD+azlzMl!s*DSs}-P>z6al=ty7n zaz3mlb74E@Xdzpv13nFI&!6692x@o>V}r=R7e=EJ)#$C0TT%Bo#W;xn7=%9|I=3ra z86*IeDLFHxX#jfh(x6p`he$qZqrk5q7PHBgAOSZ|!;9{~Ih$7}8vRA{g@m2b{H;#k#BWmM(biigp(4gg)(=_bu307O&^;2ZeO3Qd&OBEKBCrP%l&K{8L+m-G+oc4}LM6nqjNI+9HizZeRqbx9#0+*V)2#9(9Ac(MLak zBg*4@U$69C!x(@b4Pd^2@twn6n4ztTi6uc;0M^xJXMI}SS*gHRN%*7aV{tpbeK7?L z{L6&1-k4>!GSb2SZunj$H(px=y;C8QvAW==jVOARQ9NN2Nm58`B=VeGEk+YL_*mF_ z5FZaEA`0o~&R|W9HK-7FrDfsJHz=m}if)nv4?{5o98X!rb4_O@DpULDXBD~Yn4HXu zSa?p5A8`i-%pIrES!1(ptn$RqdlMIFI2bUgiK2He*bA<+yt_AJ2h8;*%n?7nXN+?S z3-w0N79nR9kY{rMpw4B9Mfgk*xgmrXnLdGkl0#%0B1%lXu7&fKEqAOt)W~AgB;^D; zS_ZM?1s(1`&W*%HC^>!2%0U_2Cb1U`fL><7Z$ze>l8c@B?*CL-Crr6x#Y;||MRhr; zH?&WOd#=R-$-qsxaPj&G9?&!4k*iZ;rqa?ns@l?J}gq?5ZG*Iz3j1{|Qw4N0eUGP0|i7+0u0 zS8rwm(IcTSp`=i+(&39oXCJ);24-%ORTI; zg!4Gcj%Df<6SdV6nB5>1p^7PbEb#y78|FM5iBnv2PP$(sn2pIi| zBVFl>;4u5O|A$i26bBmSZGMBJD3gwkI?7xo17vjhmuKuJQ8kn#>W-H2$rbugZO`Xp zCuxU-Wzv za-pmx6J&?o&k%k6J9{L$K>`M5cdaQxWg}|dqU}H(PS_Dv*ue<>uo^rory$aVBeLC5 zT3&tZ zY7xW1=R?N*U{oAi!Z5>4y9Hn$cyo1H-s4ZUX>N<9pItjOLv;bwj(4Ji@1B#ihwcT^ z#uW_!+B>n<C@)BVf!tADKq4 z@XRsnZOvnqDJ1_y8Z0cLI@}5_`ODN+E0}KVlS(bM{UVVr-}p93bsC5b8;k24vm0nF zQ~MXpOw$uV>qW0Vb5IUi+WSHU!q{v<4K5ap@}Fk2ll2*^eKK%I>t3>c6Rz@g1xrA( zN*`kV_Qd2~^X3Xe{hY%}eAZK~6@i&zfdgNM{N3C;wm{~cEbS27o6Q~iQPL-Nfq7tx ztZc3L5X>Y6o*A^R7{1p@)p31e8>jW)^I6>r^i|r7&lV&2%Gc30T6^UCcpKVr_TKnj zIB>>t6K^obAZA~7*RU4KD>8S=cU}4ubW77zkwiU~17~8m!)22s*KxTW6sS$imVMv` z8P7VALx)+@9ES*7I-^q=CN|OBQ<4|hw5zhfyNKTMRT>dW=aJ**t#!BfBhhUYbcD*QP!Xexh|O*3jbpz)!j5@ zQzk39U#pS*UCH(=kj&G2sMNb&gW+dYMop0TL!C{^id;r4LzJKk+-GxIwN{|&CH=wW z1k>-9ynpca`k)A4h+rvea6jjsU&DEY5+pDTQ=GWjgQ^6&>(^I1==tR< zVoWdnCxW3kHMWeR`)Qnnsm9hoJGlviAn;=CT}olACiX>Qr9OhG5oc!M!%G^0;CY-ydLS7Citw}*VhMMl*aD2+OKhd^Q6U*w9lW^h{niL1d-^JrRG388DT`_>Fky4NlbZ9y z)zDejJaHe}!R&B6OX9&lEbFo_UHS+dHUZAX*ilt0Mm#kz2(y`3u&NTFHSeiw<}?#lmz(t!#mZkMjj)JgbF z{{EU2Zo&U=ooj_vuTwa_)>yPDpVxnz%4*=ju`RWSB(XX|_g%_F3*wFXw)r_jLj<`T z?(@sxHyI*nW+OcQFGLJ>zwP#X-ev_E;;-xU+rF?ayan}`bioh#nR^)Zj)O=5+}VAs5=1AcTr+bJ;H;L9fizWpS(w-MZ9`_zkYAJi_cRPutSR-s7ACm ziX(rg$_~w45`inLm1H!##Nj`NC)7ZbYi&(QbP-jP4xbOLf9DQ*`JtjEsX7g|PcJO5 zcg=H93!Tjl6S#%#cESpeqyy2(zQg{1b@7T}CxDhlFfoh>%8?jca?+!QxiG2K_Raqr7k?wFaG1D0wpabH$?HW8$ zZy)mHFM3IVDlhLjif+{Z1AXR#>-t0-SKkmSJ0TaB-qJRl#OdVN(_C4<%kB@kRoIde zWQ#1_-WMMh1^DTSnN7-N*dk(AseYU*;fJ6ap!AQB{QL_*g6%xs6pb3f*57_f`TE9N z9}04*UbVsDv9%Ouh5KkFD&Ar+%{Jx|StFk@feqQC|v_LArhDxfG>|u6^$J5VLy4>F}tZNO`i8$OpEVC0W=PMn) z@3X`WYIwJ+i{>zOiQnKYWV}Mg{Sbpa64~N#jgC5}w2WRxT17PU!4m=Bb)G#7$jv4J z?4lp!6lmTAlJNE54w?trC`$4cAu(WL;4nixNcA)M%0mw?Nt^8@x*Ouz&%E3&%_~fH z@LS-L?T0jPm}<=LzwUWTqI<)Du>Zv6hLJSsSXZH{h<`75vtWggjA>5NsbniAMs%bo z6MdupNWW!;pkTH}LyYHzIh-%)BH?}b)}Ksx2On?CYd_kGz&t~Jxq*FJ=a#-!xMatn zOydX-<EC%}m9=z+VsY+`)H8#}~~0%7GqPW6PWh z9S#Vr*}N|)#I16G{)Dc1@mbz z*)tto_22UbCRI9`s(bJyoCNj{+ru=H?30img7-2gxGaua!vx^Jf1H(6o@65?oxD_ul5i-$D{3QH<`R1BN*)gZA+80EWKf2nd zk<;?i@+FTW6m43hK_|e z1gSvbZHm6`;BZ=9TBib4PK?bc6K5~N5P^+v{x*1g_?eEN{E{Kgu{klL(0zSIP7%&qN!&2Cs-N<8 zgDtqbA~BHh(KW3@Ut1SvtJo$Pw78g<_-mk2lp>R|jQZ5#xKVcWmp)h$Qk(sx-+w69 zdP4F7E=fe0nI($F-XunbhPWRn7U9ZSl?uyE+*~}y$LH+OSXtb~K1QcOuMc+5-Zt1I zf{K5vCFC&hHzC0ugYaP{-_Z0ak)jRAJ|%CqW^&zNzS~BSCGtHs=q&GGmMu%iD>r7R z+Z|J7feXaOUQ$qx6jaCI^ZpY2^RcoWt&F6qCG>l3_1;Q_p6K}KKTAG*;h>h%%pSE# zS!}T?*UuKfKMC_q=g<(2u2)krC!>heBLh2UOxNEzrRJ6nW?r78L66ov-B*KPfoCAL z>c#o9;FIc&d!bOz-QQVR)Y@VG6pw%MC$;)WNE1Z-3$KMxUm6n@-D=-J2g!u&+W+N0Dzmrm=rRhx0qSl<$g$aFX5I3Z$^d`y@mZ?Hi zv^9P79lR9b5z*%($n&c#u{HXZ@QSxw&zO{!Uo#PKEYbZd-t7ka7~S}?L6VfgmQsfq z6T+e2L^0l#XZuYCWsb6z?tS^trwjyB$ED6uG{`S5FlpYil=u3g)Ak6Wsq8P!D;_@! ze%mubc7w~J621mcVSdVyuGEruxA{#Co{CkKy&DdKzH?yz=J7v|cMV(1330zTPHB_) z!ZVR^X|{))}`n7iCnL9+t{xOXdXR16_MYb(oI};AI zq0uuMJwV~fGYYD9*x6NKaTG`)+a34c)sK*JrFg28ORn$qhy4d%Ah2-Vnx(bKsI`wtP<-#g`VLc zr{Sgb&dEdDvs6^5o(H;^uSNd2uT6Q?<-_9(x=a9{Ax6sO4vH{C?2n5Qs~h6C(}=*_ zW6>ogPqoa>mm+*`^JMlG9PzYymeswkMn)j6zKN}u=gkjOs~No0uRW*+=uX79F6|x~ z2niff$!G(7W4Wfi%@n_Fw4AJUh2nEn&!^M8L8C*xTETfb3*qQl%(FWhs?ru^qxO{2 zf#vK*`FdZG>W@gACH5Bl>3BAo6YMAq)JE^sx!lk_KC`&8X<(ATRsubj)ip1$+w|yT@R}y^1 z)@+-5`u7U92?@E@C2+D=ZdV(y);s}gPHb%^F4j6$UtLQHlO1IsUy8I6`E^|Fz|q!V zqTeRE=(}J~Jrg~mRs&u_eewyHj}LeG-H*#6LDaB7Hh=v-K(*~9Rmc3rdU)jIwBAwZ z?|6F|u+>M0Z#|$kBqEEoJ7%cq+dh9&B8nk}1CSY{u#psZKc!v}quK!a?n|$gYIElD zC@{-xKY|~}%ov?IIzH4}y`455i5_!ye-HMTOEBAgwAOKPC=)oSO>;1Qx!?GE^j)d6 z?*_8-Ve{Z0S4UIlci8fMxV%Kn0}#Cd+Co3*RJ-gcTiE{*c`{s_R>Wc8+)j^+ps8;F zg(tfGt{hntJj_1CpWU_N!*h=;+jJM{Xj%^Pov39r^9!^g?S9ZbyIP6h_s2PvjyWE| z?b#&zs|}2;xVaGh^zz1^6FvfvqrZ}~}YF}N5hk?W@CAZ@GrLHtaMm4s*1o=Az`{-kTj{-umjS3l@g=4PP)V1it+H7s8B@Bx=F%^ED`B8|Cv@} z|BOt7LWOq6X zgZn>an(gaI>Q9ZX{2z@$w&wbUA@no6F?9eIJ)!-T`ZJ=STW@(5JVM$#>$;)}(Sn-x z@Vqq&8gau>Ld-r-&h7);Q9nmX*a`{m5WfdseGCGF!*k_;tt%8*=BXU!E+ zcD%{UEiQK2*U>uDTxa*^vV3^nCJ|A)JYjg}SjDr9QdCm!O~*scBCA;1hWPUBQ9b_3 zEC}J8hJn{j?oICZTo~R?&XsFBP+AH-?n*|#<0KY5+@`owuG|8*Z;xep-r#o6q$h*c zrl1ioMmY}3*m1@NV-l`asK6O-Ci4Cmz5-8>X+Ae`&#?ht@Oi^T6x&d3%8`rJ6NL3c z69xEMHyA$xKD<6KqX3uCJDAD(M~ana?u7K!)`|opG=-FHdwJGop`|-!_GNLg3vq%q zu9-NaVL4#pnuQ{d(q{PjkhEH%ao-oy!P~nwD40+zFrv;D9MdXYPGk(C*i9wODcO~8 zZU@?fA$3kEQtHrq({DQ8c8P3_7>yS7=7>0{CxQ;S&TO?!b`kFMLD4=H&c>tsd6C#HeIh6FY-M8>%H!!!kK80)xAn>ryk5CPpqF{$L+XillgW>p4Hgzt=<7I zt}^HzLaZ7+VTRk!kxoEh=j?Y^73+?iLpGqjGFe6~mb6KlxmyBD;u?nAp4Uy{>&|ai z&~v(`Bi96^xjc%X_9dUG1O2(JKwDtH2Yl9VB8=K~&rthV4?i&zcZ?$J!EK-UJ#7$` z(`I0hIytSw3(d(ul`F@GA4*B%thFiB( z=H&Wb|FhhtnVF~(3)^jbCW?);@+7xqkrnx0?S9>}@lMjx3VHA!MUbi8F2@*`MThl6?(5Hi z=W6C3GefLt9Z)An`$9&zxJI4e2>pcksvA*ft|PihI7=0-ep zz{6ZC`Js!p+T2pev{-y4dP-EbJD`RYZPXBY4A3V42&>d@+!l7?cu42#V1MUiyZn_> zM}^mEt>;+5&aPj*ZZ-C^rw2wep!Mja?(XyHr);*Zuzqz)dgMSJjO#O-J~dldqug(G zn}UtJI4onqbyHfX8U&T6>~2~qZqg9OMwJ4`5CD`X<3BDvXm=Nqx+0J(laxG+JwI#7 z{rRT}nP(OEX^zy&#}326Vz&WHiZIx2e>o)&GBpa^q~NT~b#-R|z^SsGw>ZTn0n(^a zqDrhZ$uqQ;(N|OR#1!1l#-3;MM(&dk1X19DQActX`kigs%ykr-w}+rM-$5@-h; z1?AgPNH2a!8fj;+OIt0c%hbYBM2tii0S~9KwnfhMZeQ^>$BnyKO}q>Am&JqWP}Qro zPw}I#Boi^{ppVW#yi{+l?{~}I@Zf{XhOlXC4zK>jB0|<@7{5eOsiBXfPwOg@8I$0C ziCe|U>NVe~Clo)X=cQh>Gvi7R&Zlh^&c*aSkKu0o?^v5Gp)&q> zC;@BLzeDtYr%Lz(SDx3UD%%07gOkF-{;1pVWyQ;r=?zQ=!Cz?*Sgrk)^8uepmrdV# zqF4s|>-lIaW_E+aD{-gdc~s6NjBRcDUvE3aAg!f%!7qJQbEl_c_TAkfdVU+AtxN%f z`>7>{Hvg;otCIJlO4&>Ik+UidKG~t*`%7yh;?du2?i5B?Wu+aih})V*9*Ea<()U+{ z(re-wK~r@CM&w6$49CU7(?dDK*#Eh!$zL#4K8#sz_+I1^7_YAN4$gg>Y{|KO84aWI zOo8w=$3|a<17~vcBJCMY#OKC&jtupSymzHL+qE0>2v zpS|ezD^lxc&}_J~m|*(X4;3skqJo@I-L!;)E{3GW{Zj=Z^3o%&m z7NaDv%m2+b=w&mj&wy_c4+P71oxM_Rm}e&^r;+00C-?j!0h>k5ML%e=+z(09wmKOS zPf79$I-Mc|I*W5n936{`=Qjv9qdmWI#k-+ncin#epk%G3smbNa+8|)I=u?+YS#7tq z6Hdpp2$0y#B87m48=vRbiIij;;F=#CQ3!gDfM{=Ltc58B!~VA`F*O!e1j`Ur9Ik5wQ}bG zDEH|l4TZ&A;Itxa$n7{FlyKe7YPE|aR<@5omw3M}W5OlSj4T1l$@#}-tj&}dHTXXH zTPI_cax~hCG~q4G`{)T5ER+Am)*Kge(K<*A5bcbf@=A0#i!HR6xS;rN{DwXl3AR{& zUTyW9nT$$54cNM*(#ebP4VP2xo1V3alD+maXN|x7y)}j4MvckS&C{6(ez-|Q5c#4<$o5UzhCKMb_}ecxZXs@L;7oE?z@gM< z>?}{hEd(E}%kvdMB^zx`?{L|`f1aE!rw2RZtGNwcq(zAiHrNP-(heCuKzg55eQ4#f zNXuWG4fM?)W(#MBFUt_QsU<5wV^wbP{^O+n_K|}@*l-VbkXB0M-t2i#j^MUj&=QK% z3Cdz^P4c{t*ZFC=I89d?`n7PHw;u0E{u?K5YoOt#4gvIssnEum3Fb3aUchpnLf<%D?njh#XGqg(GfdjpZG+`Tp= z%tALo)1d)wiX77YO+Po55mC_h8=wfFzViDj#9uxfjfFrk2ma__*W#f${|Di<2P`WK z2Wg*IY)ykrDT-}IUe2}>+DQ|#oLbsr=<sE?(A&<(yI} z@?anhmM^%kOqZg;7K$5QnLd0?u??*KP1{AY#q94wl|Ld>3kCaH^O+c^PgGf0=*Hq# ztuOZm18;F8GnR3U_o)cfpL^$>%`ajPSzjeq10#G?Z!Wh5^aXt@IN3$#Nm#SLdV`|u zaLPTiLK*!~1e8aZUhhp*O&7;5LxbmXc57J>eAe?$QQEIs*nc}Nnty)M-k2WeoH5#X zG|5P>mP~7ypU`NAa=B^6x`=TnwU_B=P;@??`mKXV)|S?X_r8k>ts4?dfXt_^4=Z2q zD%=yoXDTxJ3q$$Zg?(>N2=v?gDvjs9niQ*FmrOIp=h@DpRdXv=8Ai)JmK8_B>xZ zkm)u2%Ym*t$?a@Op8%~XrO^@H7N!Hv9Vu-~c@~RImDWxO5VkO`+CqG&dFEGy6JQT*HPRxl zF81`3cro9ofOx&&m-U>fu}%ZV$$EyZU6t(#vfpMde;YS*e3|L%UzXE+BNdv|-1Jlw z_WaJW(+KVv2}M{c29|WQJ3=XmmeMhjgyp^~0p7Y+Qze0DnWn3}QWt;7^6P#B^ghTH z7B~JQ^~4-C&#F4}s=qMR{X!2KEzW|4uC_}fkOP^Ou-e|eUDKiYD9aOuj?#t9=tE#h zo<*;Q^t#{hHFe>Q@mbNxnB$dJE66qf@`B})(J8Xa{PutD!5E%nPw(0y6zw`}$y+=r zTaOfTG@q}>t@qZAhkq}T3cQ^WnLukKxRb^s+mKS3onyB{{M?)=GW7~xq?DplT^ggT zQmv$lGyELzS*>;P_S14J2c{~)S1r4$`RMWp8ppw((g>iY8teO!hIf`Q z8ekc`5hZ};}J%$f_|*H&%r zA@f;uQ-Y%aq#Hi2*Hv8F=q0s3O1vJPxmySHc{xPZ2l)Ol*XisrZ6=W?5VW+UGr7K( z={0U>i$P~4c4UyfbrFuv`}a3Z{>j6=8Up6Tta7UYt~|FL0Z1qAt4+Dt#Qu(TzIg-1aT16V0n@qZA^p<@REOWzgulSp z+6gmp)N>;P<5?vo7QW!b3>jKz$^CB=tT-l&oMWq(*RNf59n_rK!prT_#*0f|Jmlor zXgI|T0Q;YxL_0z8*`yswkIo31E%%#?Sw;|i;JWJF2yPv%z9IzXzjF;U)aGNcn_OL% z@PY+)N|%4W zoJ#l#&)5&(ztoa5iV9?R1_S@FAueZ2A=MR4&AM_&gni#tew~X+N7Xb^KpYk;t4&&(Nl1J=))Wgh+oz_&(6ZAij)OD3 zpP|~C@74|XwsQt_-0Ag~YGmY&)J6!WSp1JrMvBDW9t#oeS(Sj}c*lAxeImT5&grH2 zLV*uKe8T!jikI@yz$X~t$^qbSqjxZ|C>2G#(WXX!QDCTaZR+%GDjZS+mlCysv4m|4e`rw&Ov zu+e~2x|XLXR5Q3@d}mww{o zCPR?H(sw4;?TucX?XQafs4ph_sA#aY7;QU%>mo+PuFYvDgHmH2<=ezV2#SZviL7+mC8&5`xZjA_P=n6|Ale}iSA+-pjk(^o&PdbP6A5VK{U7>Dv@Lpx*KPsqZu2^^M7PYHve^S% zPXw^6B40=YBB}Y*<$00r83+4LUt2hS{ZVG1gXKa(ILBW4{k6G@v|)u1+RMVzfyStD z$x8Z2BkR9{(z;L=F*+ePS#|)NfPigR!vf<^RuSpW*v}8XQ~nJm97U;0K3Id@4`WP+ zmGsZ**P$hNFc6c;r%)lZWA*HXM_wiOU?s22iezp@t_gv(-a#|azA(f5)ZhAp`vm?n zqHgF{W_HZvN&=0OT-a401F(9~k;jeJ&_Ejtaqs&K9tE&x8~nx_*D=J~+nTp?7M-|ubsMB|7 zXSt_^za~v6WGlPXE8}6~)Q8Ip-5xG!R3>Ae(;i0A8PF8vU$>C@PLpz7&{4&IlDaKR z`dP-|TXn&9^{`voMD;*&7gY%>0516pv(DIbirnDHNM*@x>bBgGx}pp#F33wuN9Mmg z7TBvuho1BQ4$oMnq~Z2=#ONU8d8R|7dTe1xpue-fpt}HBM`Cst7G-ea+_y2*C33r)h;Zi{ro_quS-s^wq$X;Bk19h z{Q0RCm#u1Jp5lC0zU6aT@onYwYDp^F5UUyc%3n2TGi4Pi{|f?)mD|f+JRSb}>lGLR z&`-N*$4o7He^`7g5RsyippY9jD zN-vuR`H$jauTNvLte?(%RuBo46Z-;a054ht8&{}&Ua1=6g#`@8bb0Z1=(>yE@xI#l zCMN3a$*>ZV?XjHSe!lT?8X!<#R=|{?v8P~k8ZEJZz|p3^von^B8vGHij3^+e43Yv? zM0vsgRiZ5d>hiE9t7n+6v;6WS>P21}tIgIu1DQ#BX98gIY#^AZA?B~9G8R%@ zC$#Z2{Hq9mWIrm^8?6_eiQDjZj74TDIu7P#(7&l97jA8~HjcHbM3YCG8bxw7MkCqy zX{FJs08po=D>&_@P+~1CjqqWtsM0Uf)vzj>u-$rI4+cN?$||ZV)3|v40;O8@N)K4* zf+EjkG@$jrrzswwIiGzJ{W8k}u0zpi&eMP&|Y_W(Mj`SQ> zY>=deE<*Q9%fyL*f^1}8ZegOQ@@Neci_+*jj6u-ymn#vI-$fdAM0NG=1R2$HJlNyA zgXm4)o-j&&(%6R!sPvX1C#wi%7ff=h7@#g)0R#hi-{#XDekbCb&;S-o%V#bUY9bv2 zCd}FH-3Kc-WJYgd2oPBGp+ZuXObN2EH1t%!K3H+2_!8z0yUVCfUu@~X7>(#KzHrxj z_C9SukyTQLB`%a6Rk0z~i>R{2&y!DeT_B0^G+xm}5*t)#cIP&#B!UK1)o;Vax)@Yp zW1CVm)fgAd4xXxmCZZKIRpf;8*!#NM7+wu|76_@U5D|+Ka2ihwFJYTf931w<86$~&te1_c5W36DE;^COIT8W_& zukPIvnSo1M{lKtcYu9qJn6&;j&zGA?IF`NvFg{y*eKd?Mk>TYKYyS}xV~UScj>XX| z!qL<_tdypi&R#}B&TcfB?n$RH?Zrt6i3c2ql@w@K-DI@&`__. - Mapping between entities from Data Catalog and Dataplex Universal Catalog presented in table - `Mapping between Data Catalog and Dataplex Universal Catalog `__ - under `Learn more about simultaneous availability of Data Catalog metadata in Dataplex Universal Catalog` block. - -The `Data Catalog `__ is a fully managed and scalable metadata -management service that allows organizations to quickly discover, manage and understand all their data in -Google Cloud. It offers: - -* A simple and easy to use search interface for data discovery, powered by the same Google search technology that - supports Gmail and Drive -* A flexible and powerful cataloging system for capturing technical and business metadata -* An auto-tagging mechanism for sensitive data with DLP API integration - -Prerequisite Tasks -^^^^^^^^^^^^^^^^^^ - -.. include:: /operators/_partials/prerequisite_tasks.rst - - -.. _howto/operator:CloudDataCatalogEntryOperators: - -Managing an entries -^^^^^^^^^^^^^^^^^^^ - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogGetEntryOperator` or - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogLookupEntryOperator`. - For more information please check this :ref:`section `. - -Operators uses a :class:`~google.cloud.datacatalog_v1beta1.types.Entry` for representing entry - -.. contents:: - :depth: 1 - :local: - -.. _howto/operator:CloudDataCatalogLookupEntryOperator: -.. _howto/operator:CloudDataCatalogGetEntryOperator: - -Getting an entry -"""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogGetEntryOperator` or - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogLookupEntryOperator`. - For more information please check this :ref:`section `. - -Getting an entry is performed with the -:class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogGetEntryOperator` and -:class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogLookupEntryOperator` -operators. - -The ``CloudDataCatalogGetEntryOperator`` use Project ID, Entry Group ID, Entry ID to get the entry. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogGetEntryOperator` -parameters which allows you to dynamically determine values. - -The result is saved to :ref:`XCom `, which allows it to be used by other operators. - -The ``CloudDataCatalogLookupEntryOperator`` use the resource name to get the entry. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogLookupEntryOperator` -parameters which allows you to dynamically determine values. - -The result is saved to :ref:`XCom `, which allows it to be used by other operators. - -.. _howto/operator:CloudDataCatalogCreateEntryOperator: - -Creating an entry -""""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateEntryOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateEntryOperator` -operator create the entry. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateEntryOperator` -parameters which allows you to dynamically determine values. - -The result is saved to :ref:`XCom `, which allows it to be used by other operators. - -The newly created entry ID can be read with the ``entry_id`` key. - -.. _howto/operator:CloudDataCatalogUpdateEntryOperator: - -Updating an entry -""""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateEntryOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogUpdateEntryOperator` -operator update the entry. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogUpdateEntryOperator` -parameters which allows you to dynamically determine values. - -.. _howto/operator:CloudDataCatalogDeleteEntryOperator: - -Deleting a entry -"""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogDeleteEntryOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteEntryOperator` -operator delete the entry. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteEntryOperator` -parameters which allows you to dynamically determine values. - -.. _howto/operator:CloudDataCatalogEntryGroupOperators: - -Managing a entry groups -^^^^^^^^^^^^^^^^^^^^^^^ - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateEntryGroupOperator`. - For more information please check this :ref:`section `. - -Operators uses a :class:`~google.cloud.datacatalog_v1beta1.types.Entry` for representing a entry groups. - -.. contents:: - :depth: 1 - :local: - -.. _howto/operator:CloudDataCatalogCreateEntryGroupOperator: - -Creating an entry group -""""""""""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateEntryGroupOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateEntryGroupOperator` -operator create the entry group. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateEntryGroupOperator` -parameters which allows you to dynamically determine values. - -The result is saved to :ref:`XCom `, which allows it to be used by other operators. - -The newly created entry group ID can be read with the ``entry_group_id`` key. - -.. _howto/operator:CloudDataCatalogGetEntryGroupOperator: - -Getting an entry group -"""""""""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogGetEntryGroupOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogGetEntryGroupOperator` -operator get the entry group. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogGetEntryGroupOperator` -parameters which allows you to dynamically determine values. - -The result is saved to :ref:`XCom `, which allows it to be used by other operators. - -.. _howto/operator:CloudDataCatalogDeleteEntryGroupOperator: - -Deleting an entry group -""""""""""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogDeleteEntryGroupOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteEntryGroupOperator` -operator delete the entry group. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteEntryGroupOperator` -parameters which allows you to dynamically determine values. - -.. _howto/operator:CloudDataCatalogTagTemplateOperators: - -Managing tag templates -^^^^^^^^^^^^^^^^^^^^^^ - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateAspectTypeOperator`. - For more information please check this :ref:`section `. - -Operators uses a :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplate` for representing a tag templates. - -.. contents:: - :depth: 1 - :local: - -.. _howto/operator:CloudDataCatalogCreateTagTemplateOperator: - -Creating a tag template -""""""""""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateAspectTypeOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateTagTemplateOperator` -operator get the tag template. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateTagTemplateOperator` -parameters which allows you to dynamically determine values. - -The result is saved to :ref:`XCom `, which allows it to be used by other operators. - -The newly created tag template ID can be read with the ``tag_template_id`` key. - -.. _howto/operator:CloudDataCatalogDeleteTagTemplateOperator: - -Deleting a tag template -""""""""""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogDeleteAspectTypeOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteTagTemplateOperator` -operator delete the tag template. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteTagTemplateOperator` -parameters which allows you to dynamically determine values. - - -.. _howto/operator:CloudDataCatalogGetTagTemplateOperator: - -Getting a tag template -"""""""""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogGetAspectTypeOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogGetTagTemplateOperator` -operator get the tag template. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogGetTagTemplateOperator` -parameters which allows you to dynamically determine values. - -The result is saved to :ref:`XCom `, which allows it to be used by other operators. - -.. _howto/operator:CloudDataCatalogUpdateTagTemplateOperator: - -Updating a tag template -""""""""""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateAspectTypeOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogUpdateTagTemplateOperator` -operator update the tag template. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogUpdateTagTemplateOperator` -parameters which allows you to dynamically determine values. - -.. _howto/operator:CloudDataCatalogTagOperators: - -Managing tags -^^^^^^^^^^^^^ - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateEntryOperator` or - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateEntryOperator`. - For more information please check this :ref:`section `. - -Operators uses a :class:`~google.cloud.datacatalog_v1beta1.types.Tag` for representing a tag. - -.. contents:: - :depth: 1 - :local: - -.. _howto/operator:CloudDataCatalogCreateTagOperator: - -Creating a tag on an entry -"""""""""""""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateEntryOperator` or - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateEntryOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateTagOperator` -operator get the tag template. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateTagOperator` -parameters which allows you to dynamically determine values. - -The result is saved to :ref:`XCom `, which allows it to be used by other operators. - -The newly created tag ID can be read with the ``tag_id`` key. - -.. _howto/operator:CloudDataCatalogUpdateTagOperator: - -Updating a tag -"""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateEntryOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogUpdateTagOperator` -operator update the tag template. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogUpdateTagOperator` -parameters which allows you to dynamically determine values. - -.. _howto/operator:CloudDataCatalogDeleteTagOperator: - -Deleting a tag -"""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateEntryOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteTagOperator` -operator delete the tag template. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteTagOperator` -parameters which allows you to dynamically determine values. - -.. _howto/operator:CloudDataCatalogListTagsOperator: - -Listing tags on an entry -"""""""""""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogGetEntryOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogListTagsOperator` -operator get list of the tags on the entry. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogListTagsOperator` -parameters which allows you to dynamically determine values. - -The result is saved to :ref:`XCom `, which allows it to be used by other operators. - -.. _howto/operator:CloudDataCatalogTagTemplateFieldssOperators: - -Managing a tag template fields -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateAspectTypeOperator` or - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateAspectTypeOperator`. - For more information please check this :ref:`section `. - -Operators uses a :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplateField` for representing a tag template fields. - -.. contents:: - :depth: 1 - :local: - -.. _howto/operator:CloudDataCatalogCreateTagTemplateFieldOperator: - -Creating a field -"""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateAspectTypeOperator` or - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateAspectTypeOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateTagTemplateFieldOperator` -operator get the tag template field. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogCreateTagTemplateFieldOperator` -parameters which allows you to dynamically determine values. - -The result is saved to :ref:`XCom `, which allows it to be used by other operators. - -The newly created field ID can be read with the ``tag_template_field_id`` key. - -.. _howto/operator:CloudDataCatalogRenameTagTemplateFieldOperator: - -Renaming a field -"""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateAspectTypeOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogRenameTagTemplateFieldOperator` -operator rename the tag template field. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogRenameTagTemplateFieldOperator` -parameters which allows you to dynamically determine values. - -.. _howto/operator:CloudDataCatalogUpdateTagTemplateFieldOperator: - -Updating a field -"""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateAspectTypeOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogUpdateTagTemplateFieldOperator` -operator get the tag template field. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogUpdateTagTemplateFieldOperator` -parameters which allows you to dynamically determine values. - - -.. _howto/operator:CloudDataCatalogDeleteTagTemplateFieldOperator: - -Deleting a field -"""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateAspectTypeOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteTagTemplateFieldOperator` -operator delete the tag template field. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogDeleteTagTemplateFieldOperator` -parameters which allows you to dynamically determine values. - - -.. _howto/operator:CloudDataCatalogSearchCatalogOperator: - -Search resources -"""""""""""""""" - -.. warning:: - The Data Catalog will be discontinued on January 30, 2026 in favor of Dataplex Universal Catalog. Please use - :class:`~airflow.providers.google.cloud.operators.dataplex.DataplexCatalogSearchEntriesOperator`. - For more information please check this :ref:`section `. - -The :class:`~airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogSearchCatalogOperator` -operator searches Data Catalog for multiple resources like entries, tags that match a query. - -The ``query`` parameters should defined using `search syntax `__. - -You can use :ref:`Jinja templating ` with -:template-fields:`airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogSearchCatalogOperator` -parameters which allows you to dynamically determine values. - -The result is saved to :ref:`XCom `, which allows it to be used by other operators. - -Reference -^^^^^^^^^ - -For further information, look at: - -* `Client Library Documentation `__ -* `Product Documentation `__ diff --git a/providers/google/docs/operators/cloud/vertex_ai.rst b/providers/google/docs/operators/cloud/vertex_ai.rst index 8b8d06e385a8c..dd840ec6750c3 100644 --- a/providers/google/docs/operators/cloud/vertex_ai.rst +++ b/providers/google/docs/operators/cloud/vertex_ai.rst @@ -576,70 +576,6 @@ To get a pipeline job list you can use Interacting with Generative AI ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. warning:: - This operator is deprecated and will be removed after January 3, 2026. Please use - :class:`~airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAIGenerateEmbeddingsOperator`. - -To generate text embeddings you can use -:class:`~airflow.providers.google.cloud.operators.vertex_ai.generative_model.TextEmbeddingModelGetEmbeddingsOperator`. -The operator returns the model's response in :ref:`XCom ` under ``model_response`` key. - -.. exampleinclude:: /../../google/tests/system/google/cloud/gen_ai/example_gen_ai_generative_model.py - :language: python - :dedent: 4 - :start-after: [START how_to_cloud_gen_ai_generate_embeddings_task] - :end-before: [END how_to_cloud_gen_ai_generate_embeddings_task] - -.. warning:: - This operator is deprecated and will be removed after January 3, 2026. Please use - :class:`~airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAIGenerateContentOperator`. - -To generate content with a generative model you can use -:class:`~airflow.providers.google.cloud.operators.vertex_ai.generative_model.GenerativeModelGenerateContentOperator`. -The operator returns the model's response in :ref:`XCom ` under ``model_response`` key. - -.. exampleinclude:: /../../google/tests/system/google/cloud/gen_ai/example_gen_ai_generative_model.py - :language: python - :dedent: 4 - :start-after: [START how_to_cloud_gen_ai_generate_content_operator] - :end-before: [END how_to_cloud_gen_ai_generate_content_operator] - -.. warning:: - This operator is deprecated and will be removed after January 3, 2026. Please use - :class:`~airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAISupervisedFineTuningTrainOperator`. - -To run a supervised fine tuning job you can use -:class:`~airflow.providers.google.cloud.operators.vertex_ai.generative_model.SupervisedFineTuningTrainOperator`. -The operator returns the tuned model's endpoint name in :ref:`XCom ` under ``tuned_model_endpoint_name`` key. - -.. exampleinclude:: /../../google/tests/system/google/cloud/gen_ai/example_gen_ai_generative_model_tuning.py - :language: python - :dedent: 4 - :start-after: [START how_to_cloud_gen_ai_supervised_fine_tuning_train_operator] - :end-before: [END how_to_cloud_gen_ai_supervised_fine_tuning_train_operator] - -You can also use supervised fine tuning job for video tasks: training and tracking - -.. exampleinclude:: /../../google/tests/system/google/cloud/gen_ai/example_gen_ai_generative_model_tuning.py - :language: python - :dedent: 4 - :start-after: [START how_to_cloud_gen_ai_supervised_fine_tuning_train_operator_for_video] - :end-before: [END how_to_cloud_gen_ai_supervised_fine_tuning_train_operator_for_video] - -.. warning:: - This operator is deprecated and will be removed after January 3, 2026. Please use - :class:`~airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAICountTokensOperator`. - -To calculates the number of input tokens before sending a request to the Gemini API you can use: -:class:`~airflow.providers.google.cloud.operators.vertex_ai.generative_model.CountTokensOperator`. -The operator returns the total tokens in :ref:`XCom ` under ``total_tokens`` key. - -.. exampleinclude:: /../../google/tests/system/google/cloud/gen_ai/example_gen_ai_generative_model.py - :language: python - :dedent: 4 - :start-after: [START how_to_cloud_gen_ai_count_tokens_operator] - :end-before: [END how_to_cloud_gen_ai_count_tokens_operator] - To evaluate a model you can use :class:`~airflow.providers.google.cloud.operators.vertex_ai.generative_model.RunEvaluationOperator`. The operator returns the evaluation summary metrics in :ref:`XCom ` under ``summary_metrics`` key. @@ -650,33 +586,6 @@ The operator returns the evaluation summary metrics in :ref:`XCom ` under ``return_value`` key. - -.. exampleinclude:: /../../google/tests/system/google/cloud/gen_ai/example_gen_ai_generative_model.py - :language: python - :dedent: 4 - :start-after: [START how_to_cloud_gen_ai_create_cached_content_operator] - :end-before: [END how_to_cloud_gen_ai_create_cached_content_operator] - -.. warning:: - This operator is deprecated and will be removed after January 3, 2026. Please use - :class:`~airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAIGenerateContentOperator`. - -To generate a response from cached content you can use -:class:`~airflow.providers.google.cloud.operators.vertex_ai.generative_model.GenerateFromCachedContentOperator`. -The operator returns the cached content response in :ref:`XCom ` under ``return_value`` key. - -.. exampleinclude:: /../../google/tests/system/google/cloud/gen_ai/example_gen_ai_generative_model.py - :language: python - :dedent: 4 - :start-after: [START how_to_cloud_gen_ai_generate_from_cached_content_operator] - :end-before: [END how_to_cloud_gen_ai_generate_from_cached_content_operator] Interacting with Vertex AI Feature Store ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/providers/google/provider.yaml b/providers/google/provider.yaml index d73c4ce1dd7d4..4f0d1d90c81c9 100644 --- a/providers/google/provider.yaml +++ b/providers/google/provider.yaml @@ -310,12 +310,6 @@ integrations: external-doc-url: https://cloud.google.com/dataproc/ logo: /docs/integration-logos/Google-Data-Proc.png tags: [gcp] - - integration-name: Google Data Catalog - external-doc-url: https://cloud.google.com/data-catalog/ - how-to-guide: - - /docs/apache-airflow-providers-google/operators/cloud/datacatalog.rst - logo: /docs/integration-logos/Google-Data-Catalog.png - tags: [gcp] - integration-name: Google Dataflow external-doc-url: https://cloud.google.com/dataflow/ how-to-guide: diff --git a/providers/google/src/airflow/providers/google/cloud/hooks/bigquery.py b/providers/google/src/airflow/providers/google/cloud/hooks/bigquery.py index c882fa26dd782..1ee60b524ee19 100644 --- a/providers/google/src/airflow/providers/google/cloud/hooks/bigquery.py +++ b/providers/google/src/airflow/providers/google/cloud/hooks/bigquery.py @@ -68,7 +68,6 @@ from airflow.providers.google.cloud.utils.credentials_provider import _get_scopes from airflow.providers.google.cloud.utils.lineage import send_hook_lineage_for_bq_job from airflow.providers.google.common.consts import CLIENT_INFO -from airflow.providers.google.common.deprecated import deprecated from airflow.providers.google.common.hooks.base_google import ( _UNSET, PROVIDE_PROJECT_ID, @@ -392,14 +391,6 @@ def get_df( ) return result - @deprecated( - planned_removal_date="November 30, 2025", - use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_df", - category=AirflowProviderDeprecationWarning, - ) - def get_pandas_df(self, sql, parameters=None, dialect=None, **kwargs): - return self._get_pandas_df(sql, parameters, dialect, **kwargs) - @GoogleBaseHook.fallback_to_default_project_id def table_exists(self, dataset_id: str, table_id: str, project_id: str) -> bool: """ diff --git a/providers/google/src/airflow/providers/google/cloud/hooks/datacatalog.py b/providers/google/src/airflow/providers/google/cloud/hooks/datacatalog.py deleted file mode 100644 index 77e5787a9117d..0000000000000 --- a/providers/google/src/airflow/providers/google/cloud/hooks/datacatalog.py +++ /dev/null @@ -1,1172 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -from collections.abc import Sequence -from typing import TYPE_CHECKING - -from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault -from google.cloud import datacatalog -from google.cloud.datacatalog import ( - CreateTagRequest, - DataCatalogClient, - Entry, - EntryGroup, - SearchCatalogRequest, - Tag, - TagTemplate, - TagTemplateField, -) - -from airflow.exceptions import AirflowProviderDeprecationWarning -from airflow.providers.common.compat.sdk import AirflowException -from airflow.providers.google.common.consts import CLIENT_INFO -from airflow.providers.google.common.deprecated import deprecated -from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook - -if TYPE_CHECKING: - from google.api_core.retry import Retry - from google.protobuf.field_mask_pb2 import FieldMask - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.hooks.dataplex.DataplexHook", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogHook(GoogleBaseHook): - """ - Hook for Google Cloud Data Catalog Service. - - :param gcp_conn_id: The connection ID to use when fetching connection info. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account. - """ - - def __init__( - self, - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__( - gcp_conn_id=gcp_conn_id, - impersonation_chain=impersonation_chain, - **kwargs, - ) - self._client: DataCatalogClient | None = None - - def get_conn(self) -> DataCatalogClient: - """Retrieve client library object that allow access to Cloud Data Catalog service.""" - if not self._client: - self._client = DataCatalogClient(credentials=self.get_credentials(), client_info=CLIENT_INFO) - return self._client - - @GoogleBaseHook.fallback_to_default_project_id - def create_entry( - self, - location: str, - entry_group: str, - entry_id: str, - entry: dict | Entry, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> Entry: - """ - Create an entry. - - Currently only entries of 'FILESET' type can be created. - - :param location: Required. The location of the entry to create. - :param entry_group: Required. Entry group ID under which the entry is created. - :param entry_id: Required. The id of the entry to create. - :param entry: Required. The entry to create. - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.Entry` - :param project_id: The ID of the Google Cloud project that owns the entry. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If set to ``None`` or missing, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - parent = f"projects/{project_id}/locations/{location}/entryGroups/{entry_group}" - self.log.info("Creating a new entry: parent=%s", parent) - result = client.create_entry( - request={"parent": parent, "entry_id": entry_id, "entry": entry}, - retry=retry, - timeout=timeout, - metadata=metadata, - ) - self.log.info("Created a entry: name=%s", result.name) - return result - - @GoogleBaseHook.fallback_to_default_project_id - def create_entry_group( - self, - location: str, - entry_group_id: str, - entry_group: dict | EntryGroup, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> EntryGroup: - """ - Create an EntryGroup. - - :param location: Required. The location of the entry group to create. - :param entry_group_id: Required. The id of the entry group to create. The id must begin with a letter - or underscore, contain only English letters, numbers and underscores, and be at most 64 - characters. - :param entry_group: The entry group to create. Defaults to an empty entry group. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.EntryGroup` - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - parent = f"projects/{project_id}/locations/{location}" - self.log.info("Creating a new entry group: parent=%s", parent) - - result = client.create_entry_group( - request={"parent": parent, "entry_group_id": entry_group_id, "entry_group": entry_group}, - retry=retry, - timeout=timeout, - metadata=metadata, - ) - self.log.info("Created a entry group: name=%s", result.name) - - return result - - @GoogleBaseHook.fallback_to_default_project_id - def create_tag( - self, - location: str, - entry_group: str, - entry: str, - tag: dict | Tag, - project_id: str = PROVIDE_PROJECT_ID, - template_id: str | None = None, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> Tag: - """ - Create a tag on an entry. - - :param location: Required. The location of the tag to create. - :param entry_group: Required. Entry group ID under which the tag is created. - :param entry: Required. Entry group ID under which the tag is created. - :param tag: Required. The tag to create. - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.Tag` - :param template_id: Required. Template ID used to create tag - :param project_id: The ID of the Google Cloud project that owns the tag. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - if template_id: - template_path = f"projects/{project_id}/locations/{location}/tagTemplates/{template_id}" - if isinstance(tag, Tag): - tag.template = template_path - else: - tag["template"] = template_path - parent = f"projects/{project_id}/locations/{location}/entryGroups/{entry_group}/entries/{entry}" - - self.log.info("Creating a new tag: parent=%s", parent) - # HACK: google-cloud-datacatalog has problems with mapping messages where the value is not a - # primitive type, so we need to convert it manually. - # See: https://github.com/googleapis/python-datacatalog/issues/84 - if isinstance(tag, dict): - tag = Tag( - name=tag.get("name"), - template=tag.get("template"), - template_display_name=tag.get("template_display_name"), - column=tag.get("column"), - fields={ - k: datacatalog.TagField(**v) if isinstance(v, dict) else v - for k, v in tag.get("fields", {}).items() - }, - ) - request = CreateTagRequest( - parent=parent, - tag=tag, - ) - - result = client.create_tag(request=request, retry=retry, timeout=timeout, metadata=metadata or ()) - self.log.info("Created a tag: name=%s", result.name) - - return result - - @GoogleBaseHook.fallback_to_default_project_id - def create_tag_template( - self, - location, - tag_template_id: str, - tag_template: dict | TagTemplate, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> TagTemplate: - """ - Create a tag template. - - :param location: Required. The location of the tag template to create. - :param tag_template_id: Required. The id of the tag template to create. - :param tag_template: Required. The tag template to create. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplate` - :param project_id: The ID of the Google Cloud project that owns the tag template. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - parent = f"projects/{project_id}/locations/{location}" - - self.log.info("Creating a new tag template: parent=%s", parent) - # HACK: google-cloud-datacatalog has problems with mapping messages where the value is not a - # primitive type, so we need to convert it manually. - # See: https://github.com/googleapis/python-datacatalog/issues/84 - if isinstance(tag_template, dict): - tag_template = datacatalog.TagTemplate( - name=tag_template.get("name"), - display_name=tag_template.get("display_name"), - fields={ - k: datacatalog.TagTemplateField(**v) if isinstance(v, dict) else v - for k, v in tag_template.get("fields", {}).items() - }, - ) - - request = datacatalog.CreateTagTemplateRequest( - parent=parent, tag_template_id=tag_template_id, tag_template=tag_template - ) - result = client.create_tag_template( - request=request, - retry=retry, - timeout=timeout, - metadata=metadata, - ) - self.log.info("Created a tag template: name=%s", result.name) - - return result - - @GoogleBaseHook.fallback_to_default_project_id - def create_tag_template_field( - self, - location: str, - tag_template: str, - tag_template_field_id: str, - tag_template_field: dict | TagTemplateField, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> TagTemplateField: - r""" - Create a field in a tag template. - - :param location: Required. The location of the tag template field to create. - :param tag_template: Required. The id of the tag template to create. - :param tag_template_field_id: Required. The ID of the tag template field to create. Field ids can - contain letters (both uppercase and lowercase), numbers (0-9), underscores (\_) and dashes (-). - Field IDs must be at least 1 character long and at most 128 characters long. Field IDs must also - be unique within their template. - :param tag_template_field: Required. The tag template field to create. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplateField` - :param project_id: The ID of the Google Cloud project that owns the tag template field. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - parent = f"projects/{project_id}/locations/{location}/tagTemplates/{tag_template}" - - self.log.info("Creating a new tag template field: parent=%s", parent) - - result = client.create_tag_template_field( - request={ - "parent": parent, - "tag_template_field_id": tag_template_field_id, - "tag_template_field": tag_template_field, - }, - retry=retry, - timeout=timeout, - metadata=metadata, - ) - - self.log.info("Created a tag template field: name=%s", result.name) - - return result - - @GoogleBaseHook.fallback_to_default_project_id - def delete_entry( - self, - location: str, - entry_group: str, - entry: str, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> None: - """ - Delete an existing entry. - - :param location: Required. The location of the entry to delete. - :param entry_group: Required. Entry group ID for entries that is deleted. - :param entry: Entry ID that is deleted. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - name = f"projects/{project_id}/locations/{location}/entryGroups/{entry_group}/entries/{entry}" - self.log.info("Deleting a entry: name=%s", name) - client.delete_entry(request={"name": name}, retry=retry, timeout=timeout, metadata=metadata or ()) - self.log.info("Deleted a entry: name=%s", name) - - @GoogleBaseHook.fallback_to_default_project_id - def delete_entry_group( - self, - location, - entry_group, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> None: - """ - Delete an EntryGroup. - - Only entry groups that do not contain entries can be deleted. - - :param location: Required. The location of the entry group to delete. - :param entry_group: Entry group ID that is deleted. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - name = f"projects/{project_id}/locations/{location}/entryGroups/{entry_group}" - - self.log.info("Deleting a entry group: name=%s", name) - client.delete_entry_group( - request={"name": name}, retry=retry, timeout=timeout, metadata=metadata or () - ) - self.log.info("Deleted a entry group: name=%s", name) - - @GoogleBaseHook.fallback_to_default_project_id - def delete_tag( - self, - location: str, - entry_group: str, - entry: str, - tag: str, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> None: - """ - Delete a tag. - - :param location: Required. The location of the tag to delete. - :param entry_group: Entry group ID for tag that is deleted. - :param entry: Entry ID for tag that is deleted. - :param tag: Identifier for TAG that is deleted. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - name = ( - f"projects/{project_id}/locations/{location}/entryGroups/{entry_group}/entries/{entry}/tags/{tag}" - ) - - self.log.info("Deleting a tag: name=%s", name) - client.delete_tag(request={"name": name}, retry=retry, timeout=timeout, metadata=metadata or ()) - self.log.info("Deleted a tag: name=%s", name) - - @GoogleBaseHook.fallback_to_default_project_id - def delete_tag_template( - self, - location, - tag_template, - force: bool, - project_id: str, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> None: - """ - Delete a tag template and all tags using the template. - - :param location: Required. The location of the tag template to delete. - :param tag_template: ID for tag template that is deleted. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param force: Required. Currently, this field must always be set to ``true``. This confirms the - deletion of any possible tags using this template. ``force = false`` will be supported in the - future. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - name = f"projects/{project_id}/locations/{location}/tagTemplates/{tag_template}" - - self.log.info("Deleting a tag template: name=%s", name) - client.delete_tag_template( - request={"name": name, "force": force}, retry=retry, timeout=timeout, metadata=metadata or () - ) - self.log.info("Deleted a tag template: name=%s", name) - - @GoogleBaseHook.fallback_to_default_project_id - def delete_tag_template_field( - self, - location: str, - tag_template: str, - field: str, - force: bool, - project_id: str, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> None: - """ - Delete a field in a tag template and all uses of that field. - - :param location: Required. The location of the tag template to delete. - :param tag_template: Tag Template ID for tag template field that is deleted. - :param field: Name of field that is deleted. - :param force: Required. This confirms the deletion of this field from any tags using this field. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - name = f"projects/{project_id}/locations/{location}/tagTemplates/{tag_template}/fields/{field}" - - self.log.info("Deleting a tag template field: name=%s", name) - client.delete_tag_template_field( - request={"name": name, "force": force}, retry=retry, timeout=timeout, metadata=metadata or () - ) - self.log.info("Deleted a tag template field: name=%s", name) - - @GoogleBaseHook.fallback_to_default_project_id - def get_entry( - self, - location: str, - entry_group: str, - entry: str, - project_id: str, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> Entry: - """ - Get an entry. - - :param location: Required. The location of the entry to get. - :param entry_group: Required. The entry group of the entry to get. - :param entry: The ID of the entry to get. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - name = f"projects/{project_id}/locations/{location}/entryGroups/{entry_group}/entries/{entry}" - - self.log.info("Getting a entry: name=%s", name) - result = client.get_entry( - request={"name": name}, retry=retry, timeout=timeout, metadata=metadata or () - ) - self.log.info("Received a entry: name=%s", result.name) - - return result - - @GoogleBaseHook.fallback_to_default_project_id - def get_entry_group( - self, - location: str, - entry_group: str, - project_id: str, - read_mask: FieldMask | None = None, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> EntryGroup: - """ - Get an entry group. - - :param location: Required. The location of the entry group to get. - :param entry_group: The ID of the entry group to get. - :param read_mask: The fields to return. If not set or empty, all fields are returned. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.protobuf.field_mask_pb2.FieldMask` - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - name = f"projects/{project_id}/locations/{location}/entryGroups/{entry_group}" - - self.log.info("Getting a entry group: name=%s", name) - - result = client.get_entry_group( - request={"name": name, "read_mask": read_mask}, - retry=retry, - timeout=timeout, - metadata=metadata, - ) - - self.log.info("Received a entry group: name=%s", result.name) - - return result - - @GoogleBaseHook.fallback_to_default_project_id - def get_tag_template( - self, - location: str, - tag_template: str, - project_id: str, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> TagTemplate: - """ - Get a tag template. - - :param location: Required. The location of the tag template to get. - :param tag_template: Required. The ID of the tag template to get. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - name = f"projects/{project_id}/locations/{location}/tagTemplates/{tag_template}" - - self.log.info("Getting a tag template: name=%s", name) - - result = client.get_tag_template( - request={"name": name}, retry=retry, timeout=timeout, metadata=metadata or () - ) - - self.log.info("Received a tag template: name=%s", result.name) - - return result - - @GoogleBaseHook.fallback_to_default_project_id - def list_tags( - self, - location: str, - entry_group: str, - entry: str, - project_id: str, - page_size: int = 100, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ): - """ - List the tags on an Entry. - - :param location: Required. The location of the tags to get. - :param entry_group: Required. The entry group of the tags to get. - :param entry_group: Required. The entry of the tags to get. - :param page_size: The maximum number of resources contained in the underlying API response. If page - streaming is performed per- resource, this parameter does not affect the return value. If page - streaming is performed per-page, this determines the maximum number of resources in a page. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - parent = f"projects/{project_id}/locations/{location}/entryGroups/{entry_group}/entries/{entry}" - - self.log.info("Listing tag on entry: entry_name=%s", parent) - - result = client.list_tags( - request={"parent": parent, "page_size": page_size}, - retry=retry, - timeout=timeout, - metadata=metadata, - ) - - self.log.info("Received tags.") - - return result - - @GoogleBaseHook.fallback_to_default_project_id - def get_tag_for_template_name( - self, - location: str, - entry_group: str, - entry: str, - template_name: str, - project_id: str, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> Tag: - """ - Get for a tag with a specific template for a specific entry. - - :param location: Required. The location which contains the entry to search for. - :param entry_group: The entry group ID which contains the entry to search for. - :param entry: The name of the entry to search for. - :param template_name: The name of the template that will be the search criterion. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - tags_list = self.list_tags( - location=location, - entry_group=entry_group, - entry=entry, - project_id=project_id, - retry=retry, - timeout=timeout, - metadata=metadata, - ) - tag = next(t for t in tags_list if t.template == template_name) - return tag - - def lookup_entry( - self, - linked_resource: str | None = None, - sql_resource: str | None = None, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> Entry: - r""" - Get an entry by target resource name. - - This method allows clients to use the resource name from the source Google Cloud service - to get the Data Catalog Entry. - - :param linked_resource: The full name of the Google Cloud resource the Data Catalog entry - represents. See: https://cloud.google.com/apis/design/resource\_names#full\_resource\_name. Full - names are case-sensitive. - - :param sql_resource: The SQL name of the entry. SQL names are case-sensitive. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - if linked_resource and sql_resource: - raise AirflowException("Only one of linked_resource, sql_resource should be set.") - - if not linked_resource and not sql_resource: - raise AirflowException("At least one of linked_resource, sql_resource should be set.") - - if linked_resource: - self.log.info("Getting entry: linked_resource=%s", linked_resource) - result = client.lookup_entry( - request={"linked_resource": linked_resource}, - retry=retry, - timeout=timeout, - metadata=metadata, - ) - else: - self.log.info("Getting entry: sql_resource=%s", sql_resource) - result = client.lookup_entry( - request={"sql_resource": sql_resource}, - retry=retry, - timeout=timeout, - metadata=metadata, - ) - self.log.info("Received entry. name=%s", result.name) - - return result - - @GoogleBaseHook.fallback_to_default_project_id - def rename_tag_template_field( - self, - location: str, - tag_template: str, - field: str, - new_tag_template_field_id: str, - project_id: str, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> TagTemplateField: - """ - Rename a field in a tag template. - - :param location: Required. The location of the tag template field to rename. - :param tag_template: The tag template ID for field that is renamed. - :param field: Required. The old ID of this tag template field. For example, - ``my_old_field``. - :param new_tag_template_field_id: Required. The new ID of this tag template field. For example, - ``my_new_field``. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - name = f"projects/{project_id}/locations/{location}/tagTemplates/{tag_template}/fields/{field}" - - self.log.info( - "Renaming field: old_name=%s, new_tag_template_field_id=%s", name, new_tag_template_field_id - ) - - result = client.rename_tag_template_field( - request={"name": name, "new_tag_template_field_id": new_tag_template_field_id}, - retry=retry, - timeout=timeout, - metadata=metadata, - ) - - self.log.info("Renamed tag template field.") - - return result - - def search_catalog( - self, - scope: dict | SearchCatalogRequest.Scope, - query: str, - page_size: int = 100, - order_by: str | None = None, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ): - r""" - Search Data Catalog for multiple resources like entries, tags that match a query. - - This does not return the complete resource, only the resource identifier and high level fields. - Clients can subsequently call ``Get`` methods. - - Note that searches do not have full recall. There may be results that match your query but are not - returned, even in subsequent pages of results. These missing results may vary across repeated calls to - search. Do not rely on this method if you need to guarantee full recall. - - :param scope: Required. The scope of this search request. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.Scope` - :param query: Required. The query string in search query syntax. The query must be non-empty. - - Query strings can be simple as "x" or more qualified as: - - - name:x - - column:x - - description:y - - Note: Query tokens need to have a minimum of 3 characters for substring matching to work - correctly. See `Data Catalog Search Syntax `__ for more information. - :param page_size: The maximum number of resources contained in the underlying API response. If page - streaming is performed per-resource, this parameter does not affect the return value. If page - streaming is performed per-page, this determines the maximum number of resources in a page. - :param order_by: Specifies the ordering of results, currently supported case-sensitive choices are: - - - ``relevance``, only supports descending - - ``last_access_timestamp [asc|desc]``, defaults to descending if not specified - - ``last_modified_timestamp [asc|desc]``, defaults to descending if not specified - - If not specified, defaults to ``relevance`` descending. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - - self.log.info( - "Searching catalog: scope=%s, query=%s, page_size=%s, order_by=%s", - scope, - query, - page_size, - order_by, - ) - result = client.search_catalog( - request={"scope": scope, "query": query, "page_size": page_size, "order_by": order_by}, - retry=retry, - timeout=timeout, - metadata=metadata, - ) - - self.log.info("Received items.") - - return result - - @GoogleBaseHook.fallback_to_default_project_id - def update_entry( - self, - entry: dict | Entry, - update_mask: dict | FieldMask, - project_id: str, - location: str | None = None, - entry_group: str | None = None, - entry_id: str | None = None, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> Entry: - """ - Update an existing entry. - - :param entry: Required. The updated entry. The "name" field must be set. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.Entry` - :param update_mask: The fields to update on the entry. If absent or empty, all modifiable fields are - updated. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.protobuf.field_mask_pb2.FieldMask` - :param location: Required. The location of the entry to update. - :param entry_group: The entry group ID for the entry that is being updated. - :param entry_id: The entry ID that is being updated. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - if project_id and location and entry_group and entry_id: - full_entry_name = ( - f"projects/{project_id}/locations/{location}/entryGroups/{entry_group}/entries/{entry_id}" - ) - if isinstance(entry, Entry): - entry.name = full_entry_name - elif isinstance(entry, dict): - entry["name"] = full_entry_name - else: - raise AirflowException("Unable to set entry's name.") - elif location and entry_group and entry_id: - raise AirflowException( - "You must provide all the parameters (project_id, location, entry_group, entry_id) " - "contained in the name, or do not specify any parameters and pass the name on the object " - ) - name = entry.name if isinstance(entry, Entry) else entry["name"] - self.log.info("Updating entry: name=%s", name) - - # HACK: google-cloud-datacatalog has a problem with dictionaries for update methods. - if isinstance(entry, dict): - entry = Entry(**entry) - result = client.update_entry( - request={"entry": entry, "update_mask": update_mask}, - retry=retry, - timeout=timeout, - metadata=metadata, - ) - - self.log.info("Updated entry.") - - return result - - @GoogleBaseHook.fallback_to_default_project_id - def update_tag( - self, - tag: dict | Tag, - update_mask: dict | FieldMask, - project_id: str, - location: str | None = None, - entry_group: str | None = None, - entry: str | None = None, - tag_id: str | None = None, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> Tag: - """ - Update an existing tag. - - :param tag: Required. The updated tag. The "name" field must be set. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.Tag` - :param update_mask: The fields to update on the Tag. If absent or empty, all modifiable fields are - updated. Currently the only modifiable field is the field ``fields``. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.FieldMask` - :param location: Required. The location of the tag to rename. - :param entry_group: The entry group ID for the tag that is being updated. - :param entry: The entry ID for the tag that is being updated. - :param tag_id: The tag ID that is being updated. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - if project_id and location and entry_group and entry and tag_id: - full_tag_name = ( - f"projects/{project_id}/locations/{location}/entryGroups/{entry_group}/entries/{entry}" - f"/tags/{tag_id}" - ) - if isinstance(tag, Tag): - tag.name = full_tag_name - elif isinstance(tag, dict): - tag["name"] = full_tag_name - else: - raise AirflowException("Unable to set tag's name.") - elif location and entry_group and entry and tag_id: - raise AirflowException( - "You must provide all the parameters (project_id, location, entry_group, entry, tag_id) " - "contained in the name, or do not specify any parameters and pass the name on the object " - ) - - name = tag.name if isinstance(tag, Tag) else tag["name"] - self.log.info("Updating tag: name=%s", name) - - # HACK: google-cloud-datacatalog has a problem with dictionaries for update methods. - if isinstance(tag, dict): - tag = Tag(**tag) - result = client.update_tag( - request={"tag": tag, "update_mask": update_mask}, - retry=retry, - timeout=timeout, - metadata=metadata, - ) - self.log.info("Updated tag.") - - return result - - @GoogleBaseHook.fallback_to_default_project_id - def update_tag_template( - self, - tag_template: dict | TagTemplate, - update_mask: dict | FieldMask, - project_id: str, - location: str | None = None, - tag_template_id: str | None = None, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ) -> TagTemplate: - """ - Update a tag template. - - This method cannot be used to update the fields of a template. The tag - template fields are represented as separate resources and should be updated using their own - create/update/delete methods. - - :param tag_template: Required. The template to update. The "name" field must be set. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplate` - :param update_mask: The field mask specifies the parts of the template to overwrite. - - If absent or empty, all of the allowed fields above will be updated. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.protobuf.field_mask_pb2.FieldMask` - :param location: Required. The location of the tag template to rename. - :param tag_template_id: Optional. The tag template ID for the entry that is being updated. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - if project_id and location and tag_template: - full_tag_template_name = ( - f"projects/{project_id}/locations/{location}/tagTemplates/{tag_template_id}" - ) - if isinstance(tag_template, TagTemplate): - tag_template.name = full_tag_template_name - elif isinstance(tag_template, dict): - tag_template["name"] = full_tag_template_name - else: - raise AirflowException("Unable to set name of tag template.") - elif location and tag_template: - raise AirflowException( - "You must provide all the parameters (project_id, location, tag_template_id) " - "contained in the name, or do not specify any parameters and pass the name on the object " - ) - - name = tag_template.name if isinstance(tag_template, TagTemplate) else tag_template["name"] - self.log.info("Updating tag template: name=%s", name) - - # HACK: google-cloud-datacatalog has a problem with dictionaries for update methods. - if isinstance(tag_template, dict): - tag_template = TagTemplate(**tag_template) - result = client.update_tag_template( - request={"tag_template": tag_template, "update_mask": update_mask}, - retry=retry, - timeout=timeout, - metadata=metadata, - ) - self.log.info("Updated tag template.") - - return result - - @GoogleBaseHook.fallback_to_default_project_id - def update_tag_template_field( - self, - tag_template_field: dict | TagTemplateField, - update_mask: dict | FieldMask, - project_id: str, - tag_template_field_name: str | None = None, - location: str | None = None, - tag_template: str | None = None, - tag_template_field_id: str | None = None, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - ): - """ - Update a field in a tag template. This method cannot be used to update the field type. - - :param tag_template_field: Required. The template to update. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplateField` - :param update_mask: The field mask specifies the parts of the template to be updated. Allowed fields: - - - ``display_name`` - - ``type.enum_type`` - - If ``update_mask`` is not set or empty, all of the allowed fields above will be updated. - - When updating an enum type, the provided values will be merged with the existing values. - Therefore, enum values can only be added, existing enum values cannot be deleted nor renamed. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.protobuf.field_mask_pb2.FieldMask` - :param tag_template_field_name: Optional. The name of the tag template field to rename. - :param location: Optional. The location of the tag to rename. - :param tag_template: Optional. The tag template ID for tag template field to rename. - :param tag_template_field_id: Optional. The ID of tag template field to rename. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - """ - client = self.get_conn() - if project_id and location and tag_template and tag_template_field_id: - tag_template_field_name = ( - f"projects/{project_id}/locations/{location}/tagTemplates/{tag_template}" - f"/fields/{tag_template_field_id}" - ) - - self.log.info("Updating tag template field: name=%s", tag_template_field_name) - - result = client.update_tag_template_field( - request={ - "name": tag_template_field_name, - "tag_template_field": tag_template_field, - "update_mask": update_mask, - }, - retry=retry, - timeout=timeout, - metadata=metadata, - ) - self.log.info("Updated tag template field.") - - return result diff --git a/providers/google/src/airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py b/providers/google/src/airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py index 2e846ccd2524e..06854d313248a 100644 --- a/providers/google/src/airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +++ b/providers/google/src/airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py @@ -19,26 +19,17 @@ from __future__ import annotations -import time -from datetime import timedelta -from typing import TYPE_CHECKING, Any, Literal +from typing import Any import vertexai -from google.cloud import aiplatform from vertexai.generative_models import GenerativeModel from vertexai.language_models import TextEmbeddingModel from vertexai.preview import generative_models as preview_generative_model from vertexai.preview.caching import CachedContent from vertexai.preview.evaluation import EvalResult, EvalTask -from vertexai.preview.tuning import sft -from airflow.exceptions import AirflowProviderDeprecationWarning -from airflow.providers.google.common.deprecated import deprecated from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook -if TYPE_CHECKING: - from google.cloud.aiplatform_v1beta1 import types as types_v1beta1 - class GenerativeModelHook(GoogleBaseHook): """Hook for Google Cloud Vertex AI Generative Model APIs.""" @@ -90,174 +81,6 @@ def get_cached_context_model( cached_context_model = preview_generative_model.GenerativeModel.from_cached_content(cached_content) return cached_context_model - @deprecated( - planned_removal_date="January 3, 2026", - use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.embed_content", - category=AirflowProviderDeprecationWarning, - ) - @GoogleBaseHook.fallback_to_default_project_id - def text_embedding_model_get_embeddings( - self, - prompt: str, - pretrained_model: str, - location: str, - project_id: str = PROVIDE_PROJECT_ID, - ) -> list: - """ - Use the Vertex AI PaLM API to generate text embeddings. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param prompt: Required. Inputs or queries that a user or a program gives - to the Vertex AI PaLM API, in order to elicit a specific response. - :param pretrained_model: A pre-trained model optimized for generating text embeddings. - """ - vertexai.init(project=project_id, location=location, credentials=self.get_credentials()) - model = self.get_text_embedding_model(pretrained_model) - - response = model.get_embeddings([prompt])[0] # single prompt - - return response.values - - @deprecated( - planned_removal_date="January 3, 2026", - use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.generate_content", - category=AirflowProviderDeprecationWarning, - ) - @GoogleBaseHook.fallback_to_default_project_id - def generative_model_generate_content( - self, - contents: list, - location: str, - pretrained_model: str, - tools: list | None = None, - generation_config: dict | None = None, - safety_settings: dict | None = None, - system_instruction: str | None = None, - project_id: str = PROVIDE_PROJECT_ID, - ) -> str: - """ - Use the Vertex AI Gemini Pro foundation model to generate natural language text. - - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param contents: Required. The multi-part content of a message that a user or a program - gives to the generative model, in order to elicit a specific response. - :param generation_config: Optional. Generation configuration settings. - :param safety_settings: Optional. Per request settings for blocking unsafe content. - :param tools: Optional. A list of tools available to the model during evaluation, such as a data store. - :param system_instruction: Optional. An instruction given to the model to guide its behavior. - :param pretrained_model: Required. Model, - supporting prompts with text-only input, including natural language - tasks, multi-turn text and code chat, and code generation. It can - output text and code. - """ - vertexai.init(project=project_id, location=location, credentials=self.get_credentials()) - - model = self.get_generative_model( - pretrained_model=pretrained_model, system_instruction=system_instruction - ) - response = model.generate_content( - contents=contents, - tools=tools, - generation_config=generation_config, - safety_settings=safety_settings, - ) - - return response.text - - @deprecated( - planned_removal_date="January 3, 2026", - use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.supervised_fine_tuning_train", - category=AirflowProviderDeprecationWarning, - ) - @GoogleBaseHook.fallback_to_default_project_id - def supervised_fine_tuning_train( - self, - source_model: str, - train_dataset: str, - location: str, - tuned_model_display_name: str | None = None, - validation_dataset: str | None = None, - epochs: int | None = None, - adapter_size: Literal[1, 4, 8, 16] | None = None, - learning_rate_multiplier: float | None = None, - project_id: str = PROVIDE_PROJECT_ID, - ) -> Any: - """ - Use the Supervised Fine Tuning API to create a tuning job. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param source_model: Required. A pre-trained model optimized for performing natural - language tasks such as classification, summarization, extraction, content - creation, and ideation. - :param train_dataset: Required. Cloud Storage URI of your training dataset. The dataset - must be formatted as a JSONL file. For best results, provide at least 100 to 500 examples. - :param tuned_model_display_name: Optional. Display name of the TunedModel. The name can be up - to 128 characters long and can consist of any UTF-8 characters. - :param validation_dataset: Optional. Cloud Storage URI of your training dataset. The dataset must be - formatted as a JSONL file. For best results, provide at least 100 to 500 examples. - :param epochs: Optional. To optimize performance on a specific dataset, try using a higher - epoch value. Increasing the number of epochs might improve results. However, be cautious - about over-fitting, especially when dealing with small datasets. If over-fitting occurs, - consider lowering the epoch number. - :param adapter_size: Optional. Adapter size for tuning. - :param learning_rate_multiplier: Optional. Multiplier for adjusting the default learning rate. - """ - vertexai.init(project=project_id, location=location, credentials=self.get_credentials()) - - sft_tuning_job = sft.train( - source_model=source_model, - train_dataset=train_dataset, - validation_dataset=validation_dataset, - epochs=epochs, - adapter_size=adapter_size, - learning_rate_multiplier=learning_rate_multiplier, - tuned_model_display_name=tuned_model_display_name, - ) - - # Polling for job completion - while not sft_tuning_job.has_ended: - time.sleep(60) - sft_tuning_job.refresh() - - return sft_tuning_job - - @deprecated( - planned_removal_date="January 3, 2026", - use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.count_tokens", - category=AirflowProviderDeprecationWarning, - ) - @GoogleBaseHook.fallback_to_default_project_id - def count_tokens( - self, - contents: list, - location: str, - pretrained_model: str, - project_id: str = PROVIDE_PROJECT_ID, - ) -> types_v1beta1.CountTokensResponse: - """ - Use the Vertex AI Count Tokens API to calculate the number of input tokens before sending a request to the Gemini API. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param contents: Required. The multi-part content of a message that a user or a program - gives to the generative model, in order to elicit a specific response. - :param pretrained_model: Required. Model, - supporting prompts with text-only input, including natural language - tasks, multi-turn text and code chat, and code generation. It can - output text and code. - """ - vertexai.init(project=project_id, location=location, credentials=self.get_credentials()) - - model = self.get_generative_model(pretrained_model=pretrained_model) - response = model.count_tokens( - contents=contents, - ) - - return response - @GoogleBaseHook.fallback_to_default_project_id def run_evaluation( self, @@ -315,116 +138,3 @@ def run_evaluation( ) return eval_result - - @deprecated( - planned_removal_date="January 3, 2026", - use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.create_cached_content", - category=AirflowProviderDeprecationWarning, - ) - def create_cached_content( - self, - model_name: str, - location: str, - ttl_hours: float = 1, - system_instruction: Any | None = None, - contents: list[Any] | None = None, - display_name: str | None = None, - project_id: str = PROVIDE_PROJECT_ID, - ) -> str: - """ - Create CachedContent to reduce the cost of requests that contain repeat content with high input token counts. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param model_name: Required. The name of the publisher model to use for cached content. - :param system_instruction: Developer set system instruction. - :param contents: The content to cache. - :param ttl_hours: The TTL for this resource in hours. The expiration time is computed: now + TTL. - Defaults to one hour. - :param display_name: The user-generated meaningful display name of the cached content - """ - vertexai.init(project=project_id, location=location, credentials=self.get_credentials()) - - response = CachedContent.create( - model_name=model_name, - system_instruction=system_instruction, - contents=contents, - ttl=timedelta(hours=ttl_hours), - display_name=display_name, - ) - - return response.name - - @deprecated( - planned_removal_date="January 3, 2026", - use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.generate_content", - category=AirflowProviderDeprecationWarning, - ) - def generate_from_cached_content( - self, - location: str, - cached_content_name: str, - contents: list, - generation_config: dict | None = None, - safety_settings: dict | None = None, - project_id: str = PROVIDE_PROJECT_ID, - ) -> str: - """ - Generate a response from CachedContent. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param cached_content_name: Required. The name of the cached content resource. - :param contents: Required. The multi-part content of a message that a user or a program - gives to the generative model, in order to elicit a specific response. - :param generation_config: Optional. Generation configuration settings. - :param safety_settings: Optional. Per request settings for blocking unsafe content. - """ - # During run of the system test it was found out that names from xcom, e.g. 3402922389 can be - # treated as int and throw an error TypeError: expected string or bytes-like object, got 'int' - cached_content_name = str(cached_content_name) - vertexai.init(project=project_id, location=location, credentials=self.get_credentials()) - - cached_context_model = self.get_cached_context_model(cached_content_name=cached_content_name) - - response = cached_context_model.generate_content( - contents=contents, - generation_config=generation_config, - safety_settings=safety_settings, - ) - - return response.text - - -@deprecated( - planned_removal_date="January 3, 2026", - use_instead="airflow.providers.google.cloud.hooks.vertex_ai.experiment_service.ExperimentRunHook", - category=AirflowProviderDeprecationWarning, -) -class ExperimentRunHook(GoogleBaseHook): - """Use the Vertex AI SDK for Python to create and manage your experiment runs.""" - - @GoogleBaseHook.fallback_to_default_project_id - def delete_experiment_run( - self, - experiment_run_name: str, - experiment_name: str, - location: str, - project_id: str = PROVIDE_PROJECT_ID, - delete_backing_tensorboard_run: bool = False, - ) -> None: - """ - Delete experiment run from the experiment. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param experiment_name: Required. The name of the evaluation experiment. - :param experiment_run_name: Required. The specific run name or ID for this experiment. - :param delete_backing_tensorboard_run: Whether to delete the backing Vertex AI TensorBoard run - that stores time series metrics for this run. - """ - self.log.info("Next experiment run will be deleted: %s", experiment_run_name) - experiment_run = aiplatform.ExperimentRun( - run_name=experiment_run_name, experiment=experiment_name, project=project_id, location=location - ) - experiment_run.delete(delete_backing_tensorboard_run=delete_backing_tensorboard_run) diff --git a/providers/google/src/airflow/providers/google/cloud/links/datacatalog.py b/providers/google/src/airflow/providers/google/cloud/links/datacatalog.py deleted file mode 100644 index 00b9071d77dcc..0000000000000 --- a/providers/google/src/airflow/providers/google/cloud/links/datacatalog.py +++ /dev/null @@ -1,84 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""This module contains Google Data Catalog links.""" - -from __future__ import annotations - -from airflow.exceptions import AirflowProviderDeprecationWarning -from airflow.providers.google.cloud.links.base import BaseGoogleLink -from airflow.providers.google.common.deprecated import deprecated - -DATACATALOG_BASE_LINK = "/datacatalog" -ENTRY_GROUP_LINK = ( - DATACATALOG_BASE_LINK - + "/groups/{entry_group_id};container={project_id};location={location_id}?project={project_id}" -) -ENTRY_LINK = ( - DATACATALOG_BASE_LINK - + "/projects/{project_id}/locations/{location_id}/entryGroups/{entry_group_id}/entries/{entry_id}\ - ?project={project_id}" -) -TAG_TEMPLATE_LINK = ( - DATACATALOG_BASE_LINK - + "/projects/{project_id}/locations/{location_id}/tagTemplates/{tag_template_id}?project={project_id}" -) - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.links.dataplex.DataplexCatalogEntryGroupLink", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class DataCatalogEntryGroupLink(BaseGoogleLink): - """Helper class for constructing Data Catalog Entry Group Link.""" - - name = "Data Catalog Entry Group" - key = "data_catalog_entry_group" - format_str = ENTRY_GROUP_LINK - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.links.dataplex.DataplexCatalogEntryLink", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class DataCatalogEntryLink(BaseGoogleLink): - """Helper class for constructing Data Catalog Entry Link.""" - - name = "Data Catalog Entry" - key = "data_catalog_entry" - format_str = ENTRY_LINK - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.links.dataplex.DataplexCatalogAspectTypeLink", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class DataCatalogTagTemplateLink(BaseGoogleLink): - """Helper class for constructing Data Catalog Tag Template Link.""" - - name = "Data Catalog Tag Template" - key = "data_catalog_tag_template" - format_str = TAG_TEMPLATE_LINK diff --git a/providers/google/src/airflow/providers/google/cloud/operators/datacatalog.py b/providers/google/src/airflow/providers/google/cloud/operators/datacatalog.py deleted file mode 100644 index 86dced30cc8d7..0000000000000 --- a/providers/google/src/airflow/providers/google/cloud/operators/datacatalog.py +++ /dev/null @@ -1,2338 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -from collections.abc import Sequence -from typing import TYPE_CHECKING - -from google.api_core.exceptions import AlreadyExists, NotFound -from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault -from google.cloud.datacatalog import ( - DataCatalogClient, - Entry, - EntryGroup, - SearchCatalogRequest, - SearchCatalogResult, - Tag, - TagTemplate, - TagTemplateField, -) - -from airflow.exceptions import AirflowProviderDeprecationWarning -from airflow.providers.google.cloud.hooks.datacatalog import CloudDataCatalogHook -from airflow.providers.google.cloud.links.datacatalog import ( - DataCatalogEntryGroupLink, - DataCatalogEntryLink, - DataCatalogTagTemplateLink, -) -from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator -from airflow.providers.google.common.deprecated import deprecated -from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID - -if TYPE_CHECKING: - from google.api_core.retry import Retry - from google.protobuf.field_mask_pb2 import FieldMask - - from airflow.providers.common.compat.sdk import Context - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateEntryOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogCreateEntryOperator(GoogleCloudBaseOperator): - """ - Creates an entry. - - Currently only entries of 'FILESET' type can be created. - - The newly created entry ID are saved under the ``entry_id`` key in XCOM. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogCreateEntryOperator` - - :param location: Required. The location of the entry to create. - :param entry_group: Required. Entry group ID under which the entry is created. - :param entry_id: Required. The id of the entry to create. - :param entry: Required. The entry to create. - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.Entry` - :param project_id: The ID of the Google Cloud project that owns the entry. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If set to ``None`` or missing, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "location", - "entry_group", - "entry_id", - "entry", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - operator_extra_links = (DataCatalogEntryLink(),) - - def __init__( - self, - *, - location: str, - entry_group: str, - entry_id: str, - entry: dict | Entry, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.location = location - self.entry_group = entry_group - self.entry_id = entry_id - self.entry = entry - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - try: - result = hook.create_entry( - location=self.location, - entry_group=self.entry_group, - entry_id=self.entry_id, - entry=self.entry, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - except AlreadyExists: - self.log.info("Entry already exists. Skipping create operation.") - result = hook.get_entry( - location=self.location, - entry_group=self.entry_group, - entry=self.entry_id, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - _, _, entry_id = result.name.rpartition("/") - self.log.info("Current entry_id ID: %s", entry_id) - context["ti"].xcom_push(key="entry_id", value=entry_id) - DataCatalogEntryLink.persist( - context=context, - entry_id=self.entry_id, - entry_group_id=self.entry_group, - location_id=self.location, - project_id=self.project_id or hook.project_id, - ) - return Entry.to_dict(result) - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateEntryGroupOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogCreateEntryGroupOperator(GoogleCloudBaseOperator): - """ - Creates an EntryGroup. - - The newly created entry group ID are saved under the ``entry_group_id`` key in XCOM. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogCreateEntryGroupOperator` - - :param location: Required. The location of the entry group to create. - :param entry_group_id: Required. The id of the entry group to create. The id must begin with a letter - or underscore, contain only English letters, numbers and underscores, and be at most 64 - characters. - :param entry_group: The entry group to create. Defaults to an empty entry group. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.EntryGroup` - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "location", - "entry_group_id", - "entry_group", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - operator_extra_links = (DataCatalogEntryGroupLink(),) - - def __init__( - self, - *, - location: str, - entry_group_id: str, - entry_group: dict | EntryGroup, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.location = location - self.entry_group_id = entry_group_id - self.entry_group = entry_group - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - try: - result = hook.create_entry_group( - location=self.location, - entry_group_id=self.entry_group_id, - entry_group=self.entry_group, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - except AlreadyExists: - self.log.info("Entry already exists. Skipping create operation.") - result = hook.get_entry_group( - location=self.location, - entry_group=self.entry_group_id, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - - _, _, entry_group_id = result.name.rpartition("/") - self.log.info("Current entry group ID: %s", entry_group_id) - context["ti"].xcom_push(key="entry_group_id", value=entry_group_id) - DataCatalogEntryGroupLink.persist( - context=context, - entry_group_id=self.entry_group_id, - location_id=self.location, - project_id=self.project_id or hook.project_id, - ) - return EntryGroup.to_dict(result) - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateEntryOperator, " - "airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateEntryOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogCreateTagOperator(GoogleCloudBaseOperator): - """ - Creates a tag on an entry. - - The newly created tag ID are saved under the ``tag_id`` key in XCOM. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogCreateTagOperator` - - :param location: Required. The location of the tag to create. - :param entry_group: Required. Entry group ID under which the tag is created. - :param entry: Required. Entry group ID under which the tag is created. - :param tag: Required. The tag to create. - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.Tag` - :param template_id: Required. Template ID used to create tag - :param project_id: The ID of the Google Cloud project that owns the tag. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "location", - "entry_group", - "entry", - "tag", - "template_id", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - operator_extra_links = (DataCatalogEntryLink(),) - - def __init__( - self, - *, - location: str, - entry_group: str, - entry: str, - tag: dict | Tag, - template_id: str | None = None, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.location = location - self.entry_group = entry_group - self.entry = entry - self.tag = tag - self.template_id = template_id - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - try: - tag = hook.create_tag( - location=self.location, - entry_group=self.entry_group, - entry=self.entry, - tag=self.tag, - template_id=self.template_id, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - except AlreadyExists: - self.log.info("Tag already exists. Skipping create operation.") - project_id = self.project_id or hook.project_id - if project_id is None: - raise RuntimeError("The project id must be set here") - if self.template_id: - template_name = DataCatalogClient.tag_template_path( - project_id, self.location, self.template_id - ) - else: - if isinstance(self.tag, Tag): - template_name = self.tag.template - else: - template_name = self.tag["template"] - - tag = hook.get_tag_for_template_name( - location=self.location, - entry_group=self.entry_group, - template_name=template_name, - entry=self.entry, - project_id=project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - - _, _, tag_id = tag.name.rpartition("/") - self.log.info("Current Tag ID: %s", tag_id) - context["ti"].xcom_push(key="tag_id", value=tag_id) - DataCatalogEntryLink.persist( - context=context, - entry_id=self.entry, - entry_group_id=self.entry_group, - location_id=self.location, - project_id=self.project_id or hook.project_id, - ) - return Tag.to_dict(tag) - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateAspectTypeOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogCreateTagTemplateOperator(GoogleCloudBaseOperator): - """ - Creates a tag template. - - The newly created tag template are saved under the ``tag_template_id`` key in XCOM. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogCreateTagTemplateOperator` - - :param location: Required. The location of the tag template to create. - :param tag_template_id: Required. The id of the tag template to create. - :param tag_template: Required. The tag template to create. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplate` - :param project_id: The ID of the Google Cloud project that owns the tag template. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "location", - "tag_template_id", - "tag_template", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - operator_extra_links = (DataCatalogTagTemplateLink(),) - - def __init__( - self, - *, - location: str, - tag_template_id: str, - tag_template: dict | TagTemplate, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.location = location - self.tag_template_id = tag_template_id - self.tag_template = tag_template - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - try: - result = hook.create_tag_template( - location=self.location, - tag_template_id=self.tag_template_id, - tag_template=self.tag_template, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - except AlreadyExists: - self.log.info("Tag Template already exists. Skipping create operation.") - result = hook.get_tag_template( - location=self.location, - tag_template=self.tag_template_id, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - _, _, tag_template = result.name.rpartition("/") - self.log.info("Current Tag ID: %s", tag_template) - context["ti"].xcom_push(key="tag_template_id", value=tag_template) - DataCatalogTagTemplateLink.persist( - context=context, - tag_template_id=self.tag_template_id, - location_id=self.location, - project_id=self.project_id or hook.project_id, - ) - return TagTemplate.to_dict(result) - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateAspectTypeOperator, " - "airflow.providers.google.cloud.operators.dataplex.DataplexCatalogCreateAspectTypeOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogCreateTagTemplateFieldOperator(GoogleCloudBaseOperator): - r""" - Creates a field in a tag template. - - The newly created tag template field are saved under the ``tag_template_field_id`` key in XCOM. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogCreateTagTemplateFieldOperator` - - :param location: Required. The location of the tag template field to create. - :param tag_template: Required. The id of the tag template to create. - :param tag_template_field_id: Required. The ID of the tag template field to create. Field ids can - contain letters (both uppercase and lowercase), numbers (0-9), underscores (\_) and dashes (-). - Field IDs must be at least 1 character long and at most 128 characters long. Field IDs must also - be unique within their template. - :param tag_template_field: Required. The tag template field to create. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplateField` - :param project_id: The ID of the Google Cloud project that owns the tag template field. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "location", - "tag_template", - "tag_template_field_id", - "tag_template_field", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - operator_extra_links = (DataCatalogTagTemplateLink(),) - - def __init__( - self, - *, - location: str, - tag_template: str, - tag_template_field_id: str, - tag_template_field: dict | TagTemplateField, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.location = location - self.tag_template = tag_template - self.tag_template_field_id = tag_template_field_id - self.tag_template_field = tag_template_field - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - try: - result = hook.create_tag_template_field( - location=self.location, - tag_template=self.tag_template, - tag_template_field_id=self.tag_template_field_id, - tag_template_field=self.tag_template_field, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - except AlreadyExists: - self.log.info("Tag template field already exists. Skipping create operation.") - tag_template = hook.get_tag_template( - location=self.location, - tag_template=self.tag_template, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - result = tag_template.fields[self.tag_template_field_id] - - self.log.info("Current Tag ID: %s", self.tag_template_field_id) - context["ti"].xcom_push(key="tag_template_field_id", value=self.tag_template_field_id) - DataCatalogTagTemplateLink.persist( - context=context, - tag_template_id=self.tag_template, - location_id=self.location, - project_id=self.project_id or hook.project_id, - ) - return TagTemplateField.to_dict(result) - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogDeleteEntryOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogDeleteEntryOperator(GoogleCloudBaseOperator): - """ - Deletes an existing entry. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogDeleteEntryOperator` - - :param location: Required. The location of the entry to delete. - :param entry_group: Required. Entry group ID for entries that is deleted. - :param entry: Entry ID that is deleted. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "location", - "entry_group", - "entry", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - - def __init__( - self, - *, - location: str, - entry_group: str, - entry: str, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.location = location - self.entry_group = entry_group - self.entry = entry - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> None: - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - try: - hook.delete_entry( - location=self.location, - entry_group=self.entry_group, - entry=self.entry, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - except NotFound: - self.log.info("Entry doesn't exists. Skipping.") - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogDeleteEntryGroupOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogDeleteEntryGroupOperator(GoogleCloudBaseOperator): - """ - Deletes an EntryGroup. - - Only entry groups that do not contain entries can be deleted. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogDeleteEntryGroupOperator` - - :param location: Required. The location of the entry group to delete. - :param entry_group: Entry group ID that is deleted. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "location", - "entry_group", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - - def __init__( - self, - *, - location: str, - entry_group: str, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.location = location - self.entry_group = entry_group - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> None: - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - try: - hook.delete_entry_group( - location=self.location, - entry_group=self.entry_group, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - except NotFound: - self.log.info("Entry doesn't exists. skipping") - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateEntryOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogDeleteTagOperator(GoogleCloudBaseOperator): - """ - Deletes a tag. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogDeleteTagOperator` - - :param location: Required. The location of the tag to delete. - :param entry_group: Entry group ID for tag that is deleted. - :param entry: Entry ID for tag that is deleted. - :param tag: Identifier for TAG that is deleted. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "location", - "entry_group", - "entry", - "tag", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - - def __init__( - self, - *, - location: str, - entry_group: str, - entry: str, - tag: str, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.location = location - self.entry_group = entry_group - self.entry = entry - self.tag = tag - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> None: - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - try: - hook.delete_tag( - location=self.location, - entry_group=self.entry_group, - entry=self.entry, - tag=self.tag, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - except NotFound: - self.log.info("Entry doesn't exists. skipping") - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogDeleteAspectTypeOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogDeleteTagTemplateOperator(GoogleCloudBaseOperator): - """ - Deletes a tag template and all tags using the template. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogDeleteTagTemplateOperator` - - :param location: Required. The location of the tag template to delete. - :param tag_template: ID for tag template that is deleted. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param force: Required. Currently, this field must always be set to ``true``. This confirms the - deletion of any possible tags using this template. ``force = false`` will be supported in the - future. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "location", - "tag_template", - "force", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - - def __init__( - self, - *, - location: str, - tag_template: str, - force: bool, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.location = location - self.tag_template = tag_template - self.force = force - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> None: - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - try: - hook.delete_tag_template( - location=self.location, - tag_template=self.tag_template, - force=self.force, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - except NotFound: - self.log.info("Tag Template doesn't exists. skipping") - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateAspectTypeOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogDeleteTagTemplateFieldOperator(GoogleCloudBaseOperator): - """ - Deletes a field in a tag template and all uses of that field. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogDeleteTagTemplateFieldOperator` - - :param location: Required. The location of the tag template to delete. - :param tag_template: Tag Template ID for tag template field that is deleted. - :param field: Name of field that is deleted. - :param force: Required. This confirms the deletion of this field from any tags using this field. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "location", - "tag_template", - "field", - "force", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - - def __init__( - self, - *, - location: str, - tag_template: str, - field: str, - force: bool, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.location = location - self.tag_template = tag_template - self.field = field - self.force = force - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> None: - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - try: - hook.delete_tag_template_field( - location=self.location, - tag_template=self.tag_template, - field=self.field, - force=self.force, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - except NotFound: - self.log.info("Tag Template field doesn't exists. skipping") - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogGetEntryOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogGetEntryOperator(GoogleCloudBaseOperator): - """ - Gets an entry. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogGetEntryOperator` - - :param location: Required. The location of the entry to get. - :param entry_group: Required. The entry group of the entry to get. - :param entry: The ID of the entry to get. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "location", - "entry_group", - "entry", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - operator_extra_links = (DataCatalogEntryLink(),) - - def __init__( - self, - *, - location: str, - entry_group: str, - entry: str, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.location = location - self.entry_group = entry_group - self.entry = entry - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> dict: - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - result = hook.get_entry( - location=self.location, - entry_group=self.entry_group, - entry=self.entry, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - DataCatalogEntryLink.persist( - context=context, - entry_id=self.entry, - entry_group_id=self.entry_group, - location_id=self.location, - project_id=self.project_id or hook.project_id, - ) - return Entry.to_dict(result) - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogGetEntryGroupOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogGetEntryGroupOperator(GoogleCloudBaseOperator): - """ - Gets an entry group. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogGetEntryGroupOperator` - - :param location: Required. The location of the entry group to get. - :param entry_group: The ID of the entry group to get. - :param read_mask: The fields to return. If not set or empty, all fields are returned. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.protobuf.field_mask_pb2.FieldMask` - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "location", - "entry_group", - "read_mask", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - operator_extra_links = (DataCatalogEntryGroupLink(),) - - def __init__( - self, - *, - location: str, - entry_group: str, - read_mask: FieldMask, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.location = location - self.entry_group = entry_group - self.read_mask = read_mask - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> dict: - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - result = hook.get_entry_group( - location=self.location, - entry_group=self.entry_group, - read_mask=self.read_mask, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - DataCatalogEntryGroupLink.persist( - context=context, - entry_group_id=self.entry_group, - location_id=self.location, - project_id=self.project_id or hook.project_id, - ) - return EntryGroup.to_dict(result) - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogGetAspectTypeOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogGetTagTemplateOperator(GoogleCloudBaseOperator): - """ - Gets a tag template. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogGetTagTemplateOperator` - - :param location: Required. The location of the tag template to get. - :param tag_template: Required. The ID of the tag template to get. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "location", - "tag_template", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - operator_extra_links = (DataCatalogTagTemplateLink(),) - - def __init__( - self, - *, - location: str, - tag_template: str, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.location = location - self.tag_template = tag_template - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> dict: - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - result = hook.get_tag_template( - location=self.location, - tag_template=self.tag_template, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - DataCatalogTagTemplateLink.persist( - context=context, - tag_template_id=self.tag_template, - location_id=self.location, - project_id=self.project_id or hook.project_id, - ) - return TagTemplate.to_dict(result) - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogGetEntryOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogListTagsOperator(GoogleCloudBaseOperator): - """ - Lists the tags on an Entry. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogListTagsOperator` - - :param location: Required. The location of the tags to get. - :param entry_group: Required. The entry group of the tags to get. - :param entry: Required. The entry of the tags to get. - :param page_size: The maximum number of resources contained in the underlying API response. If page - streaming is performed per- resource, this parameter does not affect the return value. If page - streaming is performed per-page, this determines the maximum number of resources in a page. - (Default: 100) - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "location", - "entry_group", - "entry", - "page_size", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - operator_extra_links = (DataCatalogEntryLink(),) - - def __init__( - self, - *, - location: str, - entry_group: str, - entry: str, - page_size: int = 100, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.location = location - self.entry_group = entry_group - self.entry = entry - self.page_size = page_size - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> list: - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - result = hook.list_tags( - location=self.location, - entry_group=self.entry_group, - entry=self.entry, - page_size=self.page_size, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - DataCatalogEntryLink.persist( - context=context, - entry_id=self.entry, - entry_group_id=self.entry_group, - location_id=self.location, - project_id=self.project_id or hook.project_id, - ) - return [Tag.to_dict(item) for item in result] - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogLookupEntryOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogLookupEntryOperator(GoogleCloudBaseOperator): - r""" - Get an entry by target resource name. - - This method allows clients to use the resource name from the source Google Cloud service - to get the Data Catalog Entry. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogLookupEntryOperator` - - :param linked_resource: The full name of the Google Cloud resource the Data Catalog entry - represents. See: https://cloud.google.com/apis/design/resource\_names#full\_resource\_name. Full - names are case-sensitive. - :param sql_resource: The SQL name of the entry. SQL names are case-sensitive. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "linked_resource", - "sql_resource", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - operator_extra_links = (DataCatalogEntryLink(),) - - def __init__( - self, - *, - linked_resource: str | None = None, - sql_resource: str | None = None, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.linked_resource = linked_resource - self.sql_resource = sql_resource - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> dict: - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - result = hook.lookup_entry( - linked_resource=self.linked_resource, - sql_resource=self.sql_resource, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - - project_id, location_id, entry_group_id, entry_id = result.name.split("/")[1::2] - DataCatalogEntryLink.persist( - context=context, - entry_id=entry_id, - entry_group_id=entry_group_id, - location_id=location_id, - project_id=project_id, - ) - return Entry.to_dict(result) - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateAspectTypeOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogRenameTagTemplateFieldOperator(GoogleCloudBaseOperator): - """ - Renames a field in a tag template. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogRenameTagTemplateFieldOperator` - - :param location: Required. The location of the tag template field to rename. - :param tag_template: The tag template ID for field that is renamed. - :param field: Required. The old ID of this tag template field. For example, - ``my_old_field``. - :param new_tag_template_field_id: Required. The new ID of this tag template field. For example, - ``my_new_field``. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "location", - "tag_template", - "field", - "new_tag_template_field_id", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - operator_extra_links = (DataCatalogTagTemplateLink(),) - - def __init__( - self, - *, - location: str, - tag_template: str, - field: str, - new_tag_template_field_id: str, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.location = location - self.tag_template = tag_template - self.field = field - self.new_tag_template_field_id = new_tag_template_field_id - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> None: - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - hook.rename_tag_template_field( - location=self.location, - tag_template=self.tag_template, - field=self.field, - new_tag_template_field_id=self.new_tag_template_field_id, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - DataCatalogTagTemplateLink.persist( - context=context, - tag_template_id=self.tag_template, - location_id=self.location, - project_id=self.project_id or hook.project_id, - ) - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogSearchEntriesOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogSearchCatalogOperator(GoogleCloudBaseOperator): - r""" - Searches Data Catalog for multiple resources like entries, tags that match a query. - - This does not return the complete resource, only the resource identifier and high level fields. - Clients can subsequently call ``Get`` methods. - - Note that searches do not have full recall. There may be results that match your query but are not - returned, even in subsequent pages of results. These missing results may vary across repeated calls to - search. Do not rely on this method if you need to guarantee full recall. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogSearchCatalogOperator` - - :param scope: Required. The scope of this search request. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.Scope` - :param query: Required. The query string in search query syntax. The query must be non-empty. - - Query strings can be simple as "x" or more qualified as: - - - name:x - - column:x - - description:y - - Note: Query tokens need to have a minimum of 3 characters for substring matching to work - correctly. See `Data Catalog Search Syntax `__ for more information. - :param page_size: The maximum number of resources contained in the underlying API response. If page - streaming is performed per-resource, this parameter does not affect the return value. If page - streaming is performed per-page, this determines the maximum number of resources in a page. - :param order_by: Specifies the ordering of results, currently supported case-sensitive choices are: - - - ``relevance``, only supports descending - - ``last_access_timestamp [asc|desc]``, defaults to descending if not specified - - ``last_modified_timestamp [asc|desc]``, defaults to descending if not specified - - If not specified, defaults to ``relevance`` descending. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "scope", - "query", - "page_size", - "order_by", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - - def __init__( - self, - *, - scope: dict | SearchCatalogRequest.Scope, - query: str, - page_size: int = 100, - order_by: str | None = None, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.scope = scope - self.query = query - self.page_size = page_size - self.order_by = order_by - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> list: - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - result = hook.search_catalog( - scope=self.scope, - query=self.query, - page_size=self.page_size, - order_by=self.order_by, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - return [SearchCatalogResult.to_dict(item) for item in result] - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateEntryOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogUpdateEntryOperator(GoogleCloudBaseOperator): - """ - Updates an existing entry. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogUpdateEntryOperator` - - :param entry: Required. The updated entry. The "name" field must be set. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.Entry` - :param update_mask: The fields to update on the entry. If absent or empty, all modifiable fields are - updated. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.protobuf.field_mask_pb2.FieldMask` - :param location: Required. The location of the entry to update. - :param entry_group: The entry group ID for the entry that is being updated. - :param entry_id: The entry ID that is being updated. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "entry", - "update_mask", - "location", - "entry_group", - "entry_id", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - operator_extra_links = (DataCatalogEntryLink(),) - - def __init__( - self, - *, - entry: dict | Entry, - update_mask: dict | FieldMask, - location: str | None = None, - entry_group: str | None = None, - entry_id: str | None = None, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.entry = entry - self.update_mask = update_mask - self.location = location - self.entry_group = entry_group - self.entry_id = entry_id - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> None: - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - result = hook.update_entry( - entry=self.entry, - update_mask=self.update_mask, - location=self.location, - entry_group=self.entry_group, - entry_id=self.entry_id, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - - location_id, entry_group_id, entry_id = result.name.split("/")[3::2] - DataCatalogEntryLink.persist( - context=context, - entry_id=self.entry_id or entry_id, - entry_group_id=self.entry_group or entry_group_id, - location_id=self.location or location_id, - project_id=self.project_id or hook.project_id, - ) - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateEntryOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogUpdateTagOperator(GoogleCloudBaseOperator): - """ - Updates an existing tag. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogUpdateTagOperator` - - :param tag: Required. The updated tag. The "name" field must be set. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.Tag` - :param update_mask: The fields to update on the Tag. If absent or empty, all modifiable fields are - updated. Currently the only modifiable field is the field ``fields``. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.protobuf.field_mask_pb2.FieldMask` - :param location: Required. The location of the tag to rename. - :param entry_group: The entry group ID for the tag that is being updated. - :param entry: The entry ID for the tag that is being updated. - :param tag_id: The tag ID that is being updated. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "tag", - "update_mask", - "location", - "entry_group", - "entry", - "tag_id", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - operator_extra_links = (DataCatalogEntryLink(),) - - def __init__( - self, - *, - tag: dict | Tag, - update_mask: dict | FieldMask, - location: str | None = None, - entry_group: str | None = None, - entry: str | None = None, - tag_id: str | None = None, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.tag = tag - self.update_mask = update_mask - self.location = location - self.entry_group = entry_group - self.entry = entry - self.tag_id = tag_id - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> None: - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - result = hook.update_tag( - tag=self.tag, - update_mask=self.update_mask, - location=self.location, - entry_group=self.entry_group, - entry=self.entry, - tag_id=self.tag_id, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - - location_id, entry_group_id, entry_id = result.name.split("/")[3:8:2] - DataCatalogEntryLink.persist( - context=context, - entry_id=self.entry or entry_id, - entry_group_id=self.entry_group or entry_group_id, - location_id=self.location or location_id, - project_id=self.project_id or hook.project_id, - ) - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateAspectTypeOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogUpdateTagTemplateOperator(GoogleCloudBaseOperator): - """ - Updates a tag template. - - This method cannot be used to update the fields of a template. The tag - template fields are represented as separate resources and should be updated using their own - create/update/delete methods. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogUpdateTagTemplateOperator` - - :param tag_template: Required. The template to update. The "name" field must be set. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplate` - :param update_mask: The field mask specifies the parts of the template to overwrite. - - If absent or empty, all of the allowed fields above will be updated. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.protobuf.field_mask_pb2.FieldMask` - :param location: Required. The location of the tag template to rename. - :param tag_template_id: Optional. The tag template ID for the entry that is being updated. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "tag_template", - "update_mask", - "location", - "tag_template_id", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - operator_extra_links = (DataCatalogTagTemplateLink(),) - - def __init__( - self, - *, - tag_template: dict | TagTemplate, - update_mask: dict | FieldMask, - location: str | None = None, - tag_template_id: str | None = None, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.tag_template = tag_template - self.update_mask = update_mask - self.location = location - self.tag_template_id = tag_template_id - self.project_id = project_id - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> None: - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - result = hook.update_tag_template( - tag_template=self.tag_template, - update_mask=self.update_mask, - location=self.location, - tag_template_id=self.tag_template_id, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - - location_id, tag_template_id = result.name.split("/")[3::2] - DataCatalogTagTemplateLink.persist( - context=context, - tag_template_id=self.tag_template_id or tag_template_id, - location_id=self.location or location_id, - project_id=self.project_id or hook.project_id, - ) - - -@deprecated( - planned_removal_date="January 30, 2026", - use_instead="airflow.providers.google.cloud.operators.dataplex.DataplexCatalogUpdateAspectTypeOperator", - reason="The Data Catalog will be discontinued on January 30, 2026 " - "in favor of Dataplex Universal Catalog.", - category=AirflowProviderDeprecationWarning, -) -class CloudDataCatalogUpdateTagTemplateFieldOperator(GoogleCloudBaseOperator): - """ - Updates a field in a tag template. This method cannot be used to update the field type. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:CloudDataCatalogUpdateTagTemplateFieldOperator` - - :param tag_template_field: Required. The template to update. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplateField` - :param update_mask: The field mask specifies the parts of the template to be updated. Allowed fields: - - - ``display_name`` - - ``type.enum_type`` - - If ``update_mask`` is not set or empty, all of the allowed fields above will be updated. - - When updating an enum type, the provided values will be merged with the existing values. - Therefore, enum values can only be added, existing enum values cannot be deleted nor renamed. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.protobuf.field_mask_pb2.FieldMask` - :param tag_template_field_name: Optional. The name of the tag template field to rename. - :param location: Optional. The location of the tag to rename. - :param tag_template: Optional. The tag template ID for tag template field to rename. - :param tag_template_field_id: Optional. The ID of tag template field to rename. - :param project_id: The ID of the Google Cloud project that owns the entry group. - If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be - retried using a default configuration. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. - Defaults to 'google_cloud_default'. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "tag_template_field", - "update_mask", - "tag_template_field_name", - "location", - "tag_template", - "tag_template_field_id", - "project_id", - "retry", - "timeout", - "metadata", - "gcp_conn_id", - "impersonation_chain", - ) - operator_extra_links = (DataCatalogTagTemplateLink(),) - - def __init__( - self, - *, - tag_template_field: dict | TagTemplateField, - update_mask: dict | FieldMask, - tag_template_field_name: str | None = None, - location: str | None = None, - tag_template: str | None = None, - tag_template_field_id: str | None = None, - project_id: str = PROVIDE_PROJECT_ID, - retry: Retry | _MethodDefault = DEFAULT, - timeout: float | None = None, - metadata: Sequence[tuple[str, str]] = (), - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.tag_template_field_name = tag_template_field_name - self.location = location - self.tag_template = tag_template - self.tag_template_field_id = tag_template_field_id - self.project_id = project_id - self.tag_template_field = tag_template_field - self.update_mask = update_mask - self.retry = retry - self.timeout = timeout - self.metadata = metadata - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> None: - hook = CloudDataCatalogHook( - gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain - ) - result = hook.update_tag_template_field( - tag_template_field=self.tag_template_field, - update_mask=self.update_mask, - tag_template_field_name=self.tag_template_field_name, - location=self.location, - tag_template=self.tag_template, - tag_template_field_id=self.tag_template_field_id, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - - location_id, tag_template_id = result.name.split("/")[3:6:2] - DataCatalogTagTemplateLink.persist( - context=context, - tag_template_id=self.tag_template or tag_template_id, - location_id=self.location or location_id, - project_id=self.project_id or hook.project_id, - ) diff --git a/providers/google/src/airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py b/providers/google/src/airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py index 07780a2754034..7884bac0c00fd 100644 --- a/providers/google/src/airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +++ b/providers/google/src/airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py @@ -477,7 +477,7 @@ def execute(self, context: Context): @deprecated( planned_removal_date="March 24, 2026", - use_instead="airflow.providers.google.cloud.operators.vertex_ai.generative_model.SupervisedFineTuningTrainOperator", + use_instead="airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAISupervisedFineTuningTrainOperator", category=AirflowProviderDeprecationWarning, ) class CreateAutoMLVideoTrainingJobOperator(AutoMLTrainingJobBaseOperator): diff --git a/providers/google/src/airflow/providers/google/cloud/operators/vertex_ai/generative_model.py b/providers/google/src/airflow/providers/google/cloud/operators/vertex_ai/generative_model.py index 94c9f01e8b215..de500e3388939 100644 --- a/providers/google/src/airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +++ b/providers/google/src/airflow/providers/google/cloud/operators/vertex_ai/generative_model.py @@ -20,351 +20,17 @@ from __future__ import annotations from collections.abc import Sequence -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING -from google.api_core import exceptions - -from airflow.exceptions import AirflowProviderDeprecationWarning -from airflow.providers.common.compat.sdk import AirflowException from airflow.providers.google.cloud.hooks.vertex_ai.generative_model import ( - ExperimentRunHook, GenerativeModelHook, ) from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator -from airflow.providers.google.common.deprecated import deprecated if TYPE_CHECKING: from airflow.providers.common.compat.sdk import Context -@deprecated( - planned_removal_date="January 3, 2026", - use_instead="airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAIGenerateEmbeddingsOperator", - category=AirflowProviderDeprecationWarning, -) -class TextEmbeddingModelGetEmbeddingsOperator(GoogleCloudBaseOperator): - """ - Uses the Vertex AI Embeddings API to generate embeddings based on prompt. - - :param project_id: Required. The ID of the Google Cloud project that the - service belongs to (templated). - :param location: Required. The ID of the Google Cloud location that the - service belongs to (templated). - :param prompt: Required. Inputs or queries that a user or a program gives - to the Vertex AI Generative Model API, in order to elicit a specific response (templated). - :param pretrained_model: Required. Model, optimized for performing text embeddings. - :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields = ("location", "project_id", "impersonation_chain", "prompt", "pretrained_model") - - def __init__( - self, - *, - project_id: str, - location: str, - prompt: str, - pretrained_model: str, - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.project_id = project_id - self.location = location - self.prompt = prompt - self.pretrained_model = pretrained_model - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - self.hook = GenerativeModelHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - - self.log.info("Generating text embeddings") - response = self.hook.text_embedding_model_get_embeddings( - project_id=self.project_id, - location=self.location, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - ) - - self.log.info("Model response: %s", response) - context["ti"].xcom_push(key="model_response", value=response) - - return response - - -@deprecated( - planned_removal_date="January 3, 2026", - use_instead="airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAIGenerateContentOperator", - category=AirflowProviderDeprecationWarning, -) -class GenerativeModelGenerateContentOperator(GoogleCloudBaseOperator): - """ - Use the Vertex AI Gemini Pro foundation model to generate content. - - :param project_id: Required. The ID of the Google Cloud project that the - service belongs to (templated). - :param location: Required. The ID of the Google Cloud location that the - service belongs to (templated). - :param contents: Required. The multi-part content of a message that a user or a program - gives to the generative model, in order to elicit a specific response. - :param generation_config: Optional. Generation configuration settings. - :param safety_settings: Optional. Per request settings for blocking unsafe content. - :param tools: Optional. A list of tools available to the model during evaluation, such as a data store. - :param system_instruction: Optional. An instruction given to the model to guide its behavior. - :param pretrained_model: Required. The name of the model to use for content generation, - which can be a text-only or multimodal model. For example, `gemini-pro` or - `gemini-pro-vision`. - :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields = ("location", "project_id", "impersonation_chain", "contents", "pretrained_model") - - def __init__( - self, - *, - project_id: str, - location: str, - contents: list, - tools: list | None = None, - generation_config: dict | None = None, - safety_settings: dict | None = None, - system_instruction: str | None = None, - pretrained_model: str, - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.project_id = project_id - self.location = location - self.contents = contents - self.tools = tools - self.generation_config = generation_config - self.safety_settings = safety_settings - self.system_instruction = system_instruction - self.pretrained_model = pretrained_model - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - self.hook = GenerativeModelHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - response = self.hook.generative_model_generate_content( - project_id=self.project_id, - location=self.location, - contents=self.contents, - tools=self.tools, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - system_instruction=self.system_instruction, - pretrained_model=self.pretrained_model, - ) - - self.log.info("Model response: %s", response) - context["ti"].xcom_push(key="model_response", value=response) - - return response - - -@deprecated( - planned_removal_date="January 3, 2026", - use_instead="airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAISupervisedFineTuningTrainOperator", - category=AirflowProviderDeprecationWarning, -) -class SupervisedFineTuningTrainOperator(GoogleCloudBaseOperator): - """ - Use the Supervised Fine Tuning API to create a tuning job. - - :param project_id: Required. The ID of the Google Cloud project that the - service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param source_model: Required. A pre-trained model optimized for performing natural - language tasks such as classification, summarization, extraction, content - creation, and ideation. - :param train_dataset: Required. Cloud Storage URI of your training dataset. The dataset - must be formatted as a JSONL file. For best results, provide at least 100 to 500 examples. - :param tuned_model_display_name: Optional. Display name of the TunedModel. The name can be up - to 128 characters long and can consist of any UTF-8 characters. - :param validation_dataset: Optional. Cloud Storage URI of your training dataset. The dataset must be - formatted as a JSONL file. For best results, provide at least 100 to 500 examples. - :param epochs: Optional. To optimize performance on a specific dataset, try using a higher - epoch value. Increasing the number of epochs might improve results. However, be cautious - about over-fitting, especially when dealing with small datasets. If over-fitting occurs, - consider lowering the epoch number. - :param adapter_size: Optional. Adapter size for tuning. - :param learning_multiplier_rate: Optional. Multiplier for adjusting the default learning rate. - :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields = ( - "location", - "project_id", - "impersonation_chain", - "train_dataset", - "validation_dataset", - "source_model", - ) - - def __init__( - self, - *, - project_id: str, - location: str, - source_model: str, - train_dataset: str, - tuned_model_display_name: str | None = None, - validation_dataset: str | None = None, - epochs: int | None = None, - adapter_size: Literal[1, 4, 8, 16] | None = None, - learning_rate_multiplier: float | None = None, - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.project_id = project_id - self.location = location - self.source_model = source_model - self.train_dataset = train_dataset - self.tuned_model_display_name = tuned_model_display_name - self.validation_dataset = validation_dataset - self.epochs = epochs - self.adapter_size = adapter_size - self.learning_rate_multiplier = learning_rate_multiplier - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - self.hook = GenerativeModelHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - response = self.hook.supervised_fine_tuning_train( - project_id=self.project_id, - location=self.location, - source_model=self.source_model, - train_dataset=self.train_dataset, - validation_dataset=self.validation_dataset, - epochs=self.epochs, - adapter_size=self.adapter_size, - learning_rate_multiplier=self.learning_rate_multiplier, - tuned_model_display_name=self.tuned_model_display_name, - ) - - self.log.info("Tuned Model Name: %s", response.tuned_model_name) - self.log.info("Tuned Model Endpoint Name: %s", response.tuned_model_endpoint_name) - - context["ti"].xcom_push(key="tuned_model_name", value=response.tuned_model_name) - context["ti"].xcom_push(key="tuned_model_endpoint_name", value=response.tuned_model_endpoint_name) - - result = { - "tuned_model_name": response.tuned_model_name, - "tuned_model_endpoint_name": response.tuned_model_endpoint_name, - } - - return result - - -@deprecated( - planned_removal_date="January 3, 2026", - use_instead="airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAICountTokensOperator", - category=AirflowProviderDeprecationWarning, -) -class CountTokensOperator(GoogleCloudBaseOperator): - """ - Use the Vertex AI Count Tokens API to calculate the number of input tokens before sending a request to the Gemini API. - - :param project_id: Required. The ID of the Google Cloud project that the - service belongs to (templated). - :param location: Required. The ID of the Google Cloud location that the - service belongs to (templated). - :param contents: Required. The multi-part content of a message that a user or a program - gives to the generative model, in order to elicit a specific response. - :param pretrained_model: Required. Model, supporting prompts with text-only input, - including natural language tasks, multi-turn text and code chat, - and code generation. It can output text and code. - :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields = ("location", "project_id", "impersonation_chain", "contents", "pretrained_model") - - def __init__( - self, - *, - project_id: str, - location: str, - contents: list, - pretrained_model: str, - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.project_id = project_id - self.location = location - self.contents = contents - self.pretrained_model = pretrained_model - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - self.hook = GenerativeModelHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - response = self.hook.count_tokens( - project_id=self.project_id, - location=self.location, - contents=self.contents, - pretrained_model=self.pretrained_model, - ) - - self.log.info("Total tokens: %s", response.total_tokens) - self.log.info("Total billable characters: %s", response.total_billable_characters) - - context["ti"].xcom_push(key="total_tokens", value=response.total_tokens) - context["ti"].xcom_push(key="total_billable_characters", value=response.total_billable_characters) - - class RunEvaluationOperator(GoogleCloudBaseOperator): """ Use the Rapid Evaluation API to evaluate a model. @@ -462,235 +128,3 @@ def execute(self, context: Context): ) return response.summary_metrics - - -@deprecated( - planned_removal_date="January 3, 2026", - use_instead="airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAICreateCachedContentOperator", - category=AirflowProviderDeprecationWarning, -) -class CreateCachedContentOperator(GoogleCloudBaseOperator): - """ - Create CachedContent to reduce the cost of requests that contain repeat content with high input token counts. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param model_name: Required. The name of the publisher model to use for cached content. - :param system_instruction: Developer set system instruction. - :param contents: The content to cache. - :param ttl_hours: The TTL for this resource in hours. The expiration time is computed: now + TTL. - Defaults to one hour. - :param display_name: The user-generated meaningful display name of the cached content - :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields = ( - "location", - "project_id", - "impersonation_chain", - "model_name", - "contents", - "system_instruction", - ) - - def __init__( - self, - *, - project_id: str, - location: str, - model_name: str, - system_instruction: Any | None = None, - contents: list[Any] | None = None, - ttl_hours: float = 1, - display_name: str | None = None, - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - - self.project_id = project_id - self.location = location - self.model_name = model_name - self.system_instruction = system_instruction - self.contents = contents - self.ttl_hours = ttl_hours - self.display_name = display_name - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - self.hook = GenerativeModelHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - - cached_content_name = self.hook.create_cached_content( - project_id=self.project_id, - location=self.location, - model_name=self.model_name, - system_instruction=self.system_instruction, - contents=self.contents, - ttl_hours=self.ttl_hours, - display_name=self.display_name, - ) - - self.log.info("Cached Content Name: %s", cached_content_name) - - return cached_content_name - - -@deprecated( - planned_removal_date="January 3, 2026", - use_instead="airflow.providers.google.cloud.operators.gen_ai.generative_model.GenAIGenerateContentOperator", - category=AirflowProviderDeprecationWarning, -) -class GenerateFromCachedContentOperator(GoogleCloudBaseOperator): - """ - Generate a response from CachedContent. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param cached_content_name: Required. The name of the cached content resource. - :param contents: Required. The multi-part content of a message that a user or a program - gives to the generative model, in order to elicit a specific response. - :param generation_config: Optional. Generation configuration settings. - :param safety_settings: Optional. Per request settings for blocking unsafe content. - :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields = ( - "location", - "project_id", - "impersonation_chain", - "cached_content_name", - "contents", - ) - - def __init__( - self, - *, - project_id: str, - location: str, - cached_content_name: str, - contents: list, - generation_config: dict | None = None, - safety_settings: dict | None = None, - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - - self.project_id = project_id - self.location = location - self.cached_content_name = cached_content_name - self.contents = contents - self.generation_config = generation_config - self.safety_settings = safety_settings - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - self.hook = GenerativeModelHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - cached_content_text = self.hook.generate_from_cached_content( - project_id=self.project_id, - location=self.location, - cached_content_name=self.cached_content_name, - contents=self.contents, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - ) - - self.log.info("Cached Content Response: %s", cached_content_text) - - return cached_content_text - - -@deprecated( - planned_removal_date="January 3, 2026", - use_instead="airflow.providers.google.cloud.operators.vertex_ai.experiment_service.DeleteExperimentRunOperator", - category=AirflowProviderDeprecationWarning, -) -class DeleteExperimentRunOperator(GoogleCloudBaseOperator): - """ - Use the Rapid Evaluation API to evaluate a model. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param experiment_name: Required. The name of the evaluation experiment. - :param experiment_run_name: Required. The specific run name or ID for this experiment. - :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields = ( - "location", - "project_id", - "impersonation_chain", - "experiment_name", - "experiment_run_name", - ) - - def __init__( - self, - *, - project_id: str, - location: str, - experiment_name: str, - experiment_run_name: str, - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.project_id = project_id - self.location = location - self.experiment_name = experiment_name - self.experiment_run_name = experiment_run_name - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context) -> None: - self.hook = ExperimentRunHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - - try: - self.hook.delete_experiment_run( - project_id=self.project_id, - location=self.location, - experiment_name=self.experiment_name, - experiment_run_name=self.experiment_run_name, - ) - except exceptions.NotFound: - raise AirflowException(f"Experiment Run with name {self.experiment_run_name} not found") - - self.log.info("Deleted experiment run: %s", self.experiment_run_name) diff --git a/providers/google/src/airflow/providers/google/get_provider_info.py b/providers/google/src/airflow/providers/google/get_provider_info.py index 4f45d95ad46bb..cab93aa57c9e1 100644 --- a/providers/google/src/airflow/providers/google/get_provider_info.py +++ b/providers/google/src/airflow/providers/google/get_provider_info.py @@ -268,13 +268,6 @@ def get_provider_info(): "logo": "/docs/integration-logos/Google-Data-Proc.png", "tags": ["gcp"], }, - { - "integration-name": "Google Data Catalog", - "external-doc-url": "https://cloud.google.com/data-catalog/", - "how-to-guide": ["/docs/apache-airflow-providers-google/operators/cloud/datacatalog.rst"], - "logo": "/docs/integration-logos/Google-Data-Catalog.png", - "tags": ["gcp"], - }, { "integration-name": "Google Dataflow", "external-doc-url": "https://cloud.google.com/dataflow/", diff --git a/providers/google/tests/unit/google/cloud/hooks/test_datacatalog.py b/providers/google/tests/unit/google/cloud/hooks/test_datacatalog.py deleted file mode 100644 index f1a93a9aa8178..0000000000000 --- a/providers/google/tests/unit/google/cloud/hooks/test_datacatalog.py +++ /dev/null @@ -1,1600 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -import re -from collections.abc import Sequence -from copy import deepcopy -from typing import TYPE_CHECKING -from unittest import mock - -import pytest -from google.api_core.retry import Retry -from google.cloud.datacatalog import CreateTagRequest, CreateTagTemplateRequest, Entry, Tag, TagTemplate -from google.protobuf.field_mask_pb2 import FieldMask - -from airflow.exceptions import AirflowProviderDeprecationWarning -from airflow.providers.common.compat.sdk import AirflowException -from airflow.providers.google.cloud.hooks.datacatalog import CloudDataCatalogHook - -from unit.google.cloud.utils.base_gcp_mock import ( - mock_base_gcp_hook_default_project_id, - mock_base_gcp_hook_no_default_project_id, -) - -if TYPE_CHECKING: - from google.api_core.gapic_v1.method import _MethodDefault - -TEST_GCP_CONN_ID: str = "test-gcp-conn-id" -TEST_LOCATION: str = "europe-west-3b" -TEST_ENTRY_ID: str = "test-entry-id" -TEST_ENTRY: dict = {} -TEST_RETRY: Retry | _MethodDefault = Retry() -TEST_TIMEOUT: float = 4 -TEST_METADATA: Sequence[tuple[str, str]] = () -TEST_ENTRY_GROUP_ID: str = "test-entry-group-id" -TEST_ENTRY_GROUP: dict = {} -TEST_TAG: dict = {} -TEST_TAG_TEMPLATE_ID: str = "test-tag-template-id" -TEST_TAG_TEMPLATE: dict = {"name": TEST_TAG_TEMPLATE_ID} -TEST_TAG_TEMPLATE_FIELD_ID: str = "test-tag-template-field-id" -TEST_TAG_TEMPLATE_FIELD: dict = {} -TEST_FORCE: bool = False -TEST_READ_MASK: FieldMask = FieldMask(paths=["name"]) -TEST_RESOURCE: str = "test-resource" -TEST_PAGE_SIZE: int = 50 -TEST_LINKED_RESOURCE: str = "test-linked-resource" -TEST_SQL_RESOURCE: str = "test-sql-resource" -TEST_NEW_TAG_TEMPLATE_FIELD_ID: str = "test-new-tag-template-field-id" -TEST_SCOPE: dict = {"include_project_ids": ["example-scope-project"]} -TEST_QUERY: str = "test-query" -TEST_ORDER_BY: str = "test-order-by" -TEST_UPDATE_MASK: dict = {"fields": ["name"]} -TEST_PARENT: str = "test-parent" -TEST_NAME: str = "test-name" -TEST_TAG_ID: str = "test-tag-id" -TEST_LOCATION_PATH: str = f"projects/{{}}/locations/{TEST_LOCATION}" -TEST_ENTRY_PATH: str = ( - f"projects/{{}}/locations/{TEST_LOCATION}/entryGroups/{TEST_ENTRY_GROUP_ID}/entries/{TEST_ENTRY_ID}" -) -TEST_ENTRY_GROUP_PATH: str = f"projects/{{}}/locations/{TEST_LOCATION}/entryGroups/{TEST_ENTRY_GROUP_ID}" -TEST_TAG_TEMPLATE_PATH: str = f"projects/{{}}/locations/{TEST_LOCATION}/tagTemplates/{TEST_TAG_TEMPLATE_ID}" -TEST_TAG_TEMPLATE_FIELD_PATH: str = ( - f"projects/{{}}/locations/{TEST_LOCATION}/tagTemplates/" - f"{TEST_TAG_TEMPLATE_ID}/fields/{TEST_TAG_TEMPLATE_FIELD_ID}" -) -TEST_TAG_PATH: str = ( - f"projects/{{}}/locations/{TEST_LOCATION}/entryGroups/{TEST_ENTRY_GROUP_ID}" - f"/entries/{TEST_ENTRY_ID}/tags/{TEST_TAG_ID}" -) -TEST_PROJECT_ID_1 = "example-project-1" -TEST_PROJECT_ID_2 = "example-project-2" -TEST_CREDENTIALS = mock.MagicMock() - - -class TestCloudDataCatalog: - def setup_method(self): - with pytest.warns(AirflowProviderDeprecationWarning): - with mock.patch( - "airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.__init__", - new=mock_base_gcp_hook_default_project_id, - ): - self.hook = CloudDataCatalogHook(gcp_conn_id="test") - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_lookup_entry_with_linked_resource(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.lookup_entry( - linked_resource=TEST_LINKED_RESOURCE, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.lookup_entry.assert_called_once_with( - request=dict(linked_resource=TEST_LINKED_RESOURCE), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_lookup_entry_with_sql_resource(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.lookup_entry( - sql_resource=TEST_SQL_RESOURCE, retry=TEST_RETRY, timeout=TEST_TIMEOUT, metadata=TEST_METADATA - ) - mock_get_conn.return_value.lookup_entry.assert_called_once_with( - request=dict(sql_resource=TEST_SQL_RESOURCE), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_lookup_entry_without_resource(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises( - AirflowException, match=re.escape("At least one of linked_resource, sql_resource should be set.") - ): - self.hook.lookup_entry(retry=TEST_RETRY, timeout=TEST_TIMEOUT, metadata=TEST_METADATA) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_search_catalog(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.search_catalog( - scope=TEST_SCOPE, - query=TEST_QUERY, - page_size=TEST_PAGE_SIZE, - order_by=TEST_ORDER_BY, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.search_catalog.assert_called_once_with( - request=dict( - scope=TEST_SCOPE, query=TEST_QUERY, page_size=TEST_PAGE_SIZE, order_by=TEST_ORDER_BY - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogWithDefaultProjectIdHook: - def setup_method(self): - with pytest.warns(AirflowProviderDeprecationWarning): - with mock.patch( - "airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.__init__", - new=mock_base_gcp_hook_default_project_id, - ): - self.hook = CloudDataCatalogHook(gcp_conn_id="test") - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_entry(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.create_entry( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry_id=TEST_ENTRY_ID, - entry=TEST_ENTRY, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.create_entry.assert_called_once_with( - request=dict( - parent=TEST_ENTRY_GROUP_PATH.format(TEST_PROJECT_ID_1), - entry_id=TEST_ENTRY_ID, - entry=TEST_ENTRY, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_entry_group(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.create_entry_group( - location=TEST_LOCATION, - entry_group_id=TEST_ENTRY_GROUP_ID, - entry_group=TEST_ENTRY_GROUP, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.create_entry_group.assert_called_once_with( - request=dict( - parent=TEST_LOCATION_PATH.format(TEST_PROJECT_ID_1), - entry_group_id=TEST_ENTRY_GROUP_ID, - entry_group=TEST_ENTRY_GROUP, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_tag(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.create_tag( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag=deepcopy(TEST_TAG), - template_id=TEST_TAG_TEMPLATE_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.create_tag.assert_called_once_with( - request=CreateTagRequest( - parent=TEST_ENTRY_PATH.format(TEST_PROJECT_ID_1), - tag=Tag(template=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_1)), - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_tag_protobuff(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.create_tag( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag=Tag(), - template_id=TEST_TAG_TEMPLATE_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.create_tag.assert_called_once_with( - request=CreateTagRequest( - parent=TEST_ENTRY_PATH.format(TEST_PROJECT_ID_1), - tag=Tag(template=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_1)), - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_tag_template(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.create_tag_template( - location=TEST_LOCATION, - tag_template_id=TEST_TAG_TEMPLATE_ID, - tag_template=TEST_TAG_TEMPLATE, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.create_tag_template.assert_called_once_with( - request=CreateTagTemplateRequest( - parent=TEST_LOCATION_PATH.format(TEST_PROJECT_ID_1), - tag_template_id=TEST_TAG_TEMPLATE_ID, - tag_template=TEST_TAG_TEMPLATE, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_tag_template_field(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.create_tag_template_field( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - tag_template_field_id=TEST_TAG_TEMPLATE_FIELD_ID, - tag_template_field=TEST_TAG_TEMPLATE_FIELD, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.create_tag_template_field.assert_called_once_with( - request=dict( - parent=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_1), - tag_template_field_id=TEST_TAG_TEMPLATE_FIELD_ID, - tag_template_field=TEST_TAG_TEMPLATE_FIELD, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_delete_entry(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.delete_entry( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.delete_entry.assert_called_once_with( - request=dict( - name=TEST_ENTRY_PATH.format(TEST_PROJECT_ID_1), - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_delete_entry_group(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.delete_entry_group( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.delete_entry_group.assert_called_once_with( - request=dict( - name=TEST_ENTRY_GROUP_PATH.format(TEST_PROJECT_ID_1), - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_delete_tag(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.delete_tag( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag=TEST_TAG_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.delete_tag.assert_called_once_with( - request=dict( - name=TEST_TAG_PATH.format(TEST_PROJECT_ID_1), - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_delete_tag_template(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.delete_tag_template( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - force=TEST_FORCE, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.delete_tag_template.assert_called_once_with( - request=dict(name=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_1), force=TEST_FORCE), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_delete_tag_template_field(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.delete_tag_template_field( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - field=TEST_TAG_TEMPLATE_FIELD_ID, - force=TEST_FORCE, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.delete_tag_template_field.assert_called_once_with( - request=dict( - name=TEST_TAG_TEMPLATE_FIELD_PATH.format(TEST_PROJECT_ID_1), - force=TEST_FORCE, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_get_entry(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.get_entry( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.get_entry.assert_called_once_with( - request=dict( - name=TEST_ENTRY_PATH.format(TEST_PROJECT_ID_1), - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_get_entry_group(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.get_entry_group( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - read_mask=TEST_READ_MASK, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.get_entry_group.assert_called_once_with( - request=dict( - name=TEST_ENTRY_GROUP_PATH.format(TEST_PROJECT_ID_1), - read_mask=TEST_READ_MASK, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_get_tag_template(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.get_tag_template( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.get_tag_template.assert_called_once_with( - request=dict( - name=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_1), - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_list_tags(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.list_tags( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - page_size=TEST_PAGE_SIZE, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.list_tags.assert_called_once_with( - request=dict( - parent=TEST_ENTRY_PATH.format(TEST_PROJECT_ID_1), - page_size=TEST_PAGE_SIZE, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_get_tag_for_template_name(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - tag_1 = mock.MagicMock(template=TEST_TAG_TEMPLATE_PATH.format("invalid-project")) - tag_2 = mock.MagicMock(template=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_1)) - - mock_get_conn.return_value.list_tags.return_value = [tag_1, tag_2] - result = self.hook.get_tag_for_template_name( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - template_name=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_1), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.list_tags.assert_called_once_with( - request=dict( - parent=TEST_ENTRY_PATH.format(TEST_PROJECT_ID_1), - page_size=100, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - assert result == tag_2 - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_rename_tag_template_field(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.rename_tag_template_field( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - field=TEST_TAG_TEMPLATE_FIELD_ID, - new_tag_template_field_id=TEST_NEW_TAG_TEMPLATE_FIELD_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.rename_tag_template_field.assert_called_once_with( - request=dict( - name=TEST_TAG_TEMPLATE_FIELD_PATH.format(TEST_PROJECT_ID_1), - new_tag_template_field_id=TEST_NEW_TAG_TEMPLATE_FIELD_ID, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_update_entry(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.update_entry( - entry=TEST_ENTRY, - update_mask=TEST_UPDATE_MASK, - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry_id=TEST_ENTRY_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.update_entry.assert_called_once_with( - request=dict( - entry=Entry(name=TEST_ENTRY_PATH.format(TEST_PROJECT_ID_1)), - update_mask=TEST_UPDATE_MASK, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_update_tag(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.update_tag( - tag=deepcopy(TEST_TAG), - update_mask=TEST_UPDATE_MASK, - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag_id=TEST_TAG_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.update_tag.assert_called_once_with( - request=dict(tag=Tag(name=TEST_TAG_PATH.format(TEST_PROJECT_ID_1)), update_mask=TEST_UPDATE_MASK), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_update_tag_template(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.update_tag_template( - tag_template=TEST_TAG_TEMPLATE, - update_mask=TEST_UPDATE_MASK, - location=TEST_LOCATION, - tag_template_id=TEST_TAG_TEMPLATE_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.update_tag_template.assert_called_once_with( - request=dict( - tag_template=TagTemplate(name=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_1)), - update_mask=TEST_UPDATE_MASK, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, TEST_PROJECT_ID_1), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_update_tag_template_field(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.update_tag_template_field( - tag_template_field=TEST_TAG_TEMPLATE_FIELD, - update_mask=TEST_UPDATE_MASK, - tag_template=TEST_TAG_TEMPLATE_ID, - location=TEST_LOCATION, - tag_template_field_id=TEST_TAG_TEMPLATE_FIELD_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.update_tag_template_field.assert_called_once_with( - request=dict( - name=TEST_TAG_TEMPLATE_FIELD_PATH.format(TEST_PROJECT_ID_1), - tag_template_field=TEST_TAG_TEMPLATE_FIELD, - update_mask=TEST_UPDATE_MASK, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogWithoutDefaultProjectIdHook: - def setup_method(self): - with pytest.warns(AirflowProviderDeprecationWarning): - with mock.patch( - "airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.__init__", - new=mock_base_gcp_hook_no_default_project_id, - ): - self.hook = CloudDataCatalogHook(gcp_conn_id="test") - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_entry(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.create_entry( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry_id=TEST_ENTRY_ID, - entry=TEST_ENTRY, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.create_entry.assert_called_once_with( - request=dict( - parent=TEST_ENTRY_GROUP_PATH.format(TEST_PROJECT_ID_2), - entry_id=TEST_ENTRY_ID, - entry=TEST_ENTRY, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_entry_group(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.create_entry_group( - location=TEST_LOCATION, - entry_group_id=TEST_ENTRY_GROUP_ID, - entry_group=TEST_ENTRY_GROUP, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.create_entry_group.assert_called_once_with( - request=dict( - parent=TEST_LOCATION_PATH.format(TEST_PROJECT_ID_2), - entry_group_id=TEST_ENTRY_GROUP_ID, - entry_group=TEST_ENTRY_GROUP, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_tag(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.create_tag( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag=deepcopy(TEST_TAG), - template_id=TEST_TAG_TEMPLATE_ID, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.create_tag.assert_called_once_with( - request=CreateTagRequest( - parent=TEST_ENTRY_PATH.format(TEST_PROJECT_ID_2), - tag=Tag(template=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_2)), - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_tag_protobuff(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.create_tag( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag=Tag(), - template_id=TEST_TAG_TEMPLATE_ID, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.create_tag.assert_called_once_with( - request=CreateTagRequest( - parent=TEST_ENTRY_PATH.format(TEST_PROJECT_ID_2), - tag=Tag(template=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_2)), - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_tag_template(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.create_tag_template( - location=TEST_LOCATION, - tag_template_id=TEST_TAG_TEMPLATE_ID, - tag_template=TEST_TAG_TEMPLATE, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.create_tag_template.assert_called_once_with( - request=CreateTagTemplateRequest( - parent=TEST_LOCATION_PATH.format(TEST_PROJECT_ID_2), - tag_template_id=TEST_TAG_TEMPLATE_ID, - tag_template=TEST_TAG_TEMPLATE, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_tag_template_field(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.create_tag_template_field( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - tag_template_field_id=TEST_TAG_TEMPLATE_FIELD_ID, - tag_template_field=TEST_TAG_TEMPLATE_FIELD, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.create_tag_template_field.assert_called_once_with( - request=dict( - parent=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_2), - tag_template_field_id=TEST_TAG_TEMPLATE_FIELD_ID, - tag_template_field=TEST_TAG_TEMPLATE_FIELD, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_delete_entry(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.delete_entry( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.delete_entry.assert_called_once_with( - request=dict(name=TEST_ENTRY_PATH.format(TEST_PROJECT_ID_2)), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_delete_entry_group(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.delete_entry_group( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.delete_entry_group.assert_called_once_with( - request=dict(name=TEST_ENTRY_GROUP_PATH.format(TEST_PROJECT_ID_2)), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_delete_tag(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.delete_tag( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag=TEST_TAG_ID, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.delete_tag.assert_called_once_with( - request=dict(name=TEST_TAG_PATH.format(TEST_PROJECT_ID_2)), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_delete_tag_template(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.delete_tag_template( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - force=TEST_FORCE, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.delete_tag_template.assert_called_once_with( - request=dict(name=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_2), force=TEST_FORCE), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_delete_tag_template_field(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.delete_tag_template_field( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - field=TEST_TAG_TEMPLATE_FIELD_ID, - force=TEST_FORCE, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.delete_tag_template_field.assert_called_once_with( - request=dict(name=TEST_TAG_TEMPLATE_FIELD_PATH.format(TEST_PROJECT_ID_2), force=TEST_FORCE), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_get_entry(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.get_entry( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.get_entry.assert_called_once_with( - request=dict(name=TEST_ENTRY_PATH.format(TEST_PROJECT_ID_2)), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_get_entry_group(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.get_entry_group( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - read_mask=TEST_READ_MASK, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.get_entry_group.assert_called_once_with( - request=dict( - name=TEST_ENTRY_GROUP_PATH.format(TEST_PROJECT_ID_2), - read_mask=TEST_READ_MASK, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_get_tag_template(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.get_tag_template( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.get_tag_template.assert_called_once_with( - request=dict(name=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_2)), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_list_tags(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.list_tags( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - page_size=TEST_PAGE_SIZE, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.list_tags.assert_called_once_with( - request=dict(parent=TEST_ENTRY_PATH.format(TEST_PROJECT_ID_2), page_size=TEST_PAGE_SIZE), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_get_tag_for_template_name(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - tag_1 = mock.MagicMock(template=TEST_TAG_TEMPLATE_PATH.format("invalid-project")) - tag_2 = mock.MagicMock(template=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_2)) - - mock_get_conn.return_value.list_tags.return_value = [tag_1, tag_2] - result = self.hook.get_tag_for_template_name( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - template_name=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_2), - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.list_tags.assert_called_once_with( - request=dict(parent=TEST_ENTRY_PATH.format(TEST_PROJECT_ID_2), page_size=100), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - assert result == tag_2 - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_rename_tag_template_field(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.rename_tag_template_field( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - field=TEST_TAG_TEMPLATE_FIELD_ID, - new_tag_template_field_id=TEST_NEW_TAG_TEMPLATE_FIELD_ID, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.rename_tag_template_field.assert_called_once_with( - request=dict( - name=TEST_TAG_TEMPLATE_FIELD_PATH.format(TEST_PROJECT_ID_2), - new_tag_template_field_id=TEST_NEW_TAG_TEMPLATE_FIELD_ID, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_update_entry(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.update_entry( - entry=TEST_ENTRY, - update_mask=TEST_UPDATE_MASK, - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry_id=TEST_ENTRY_ID, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.update_entry.assert_called_once_with( - request=dict( - entry=Entry(name=TEST_ENTRY_PATH.format(TEST_PROJECT_ID_2)), update_mask=TEST_UPDATE_MASK - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_update_tag(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.update_tag( - tag=deepcopy(TEST_TAG), - update_mask=TEST_UPDATE_MASK, - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag_id=TEST_TAG_ID, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.update_tag.assert_called_once_with( - request=dict(tag=Tag(name=TEST_TAG_PATH.format(TEST_PROJECT_ID_2)), update_mask=TEST_UPDATE_MASK), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_update_tag_template(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.update_tag_template( - tag_template=TEST_TAG_TEMPLATE, - update_mask=TEST_UPDATE_MASK, - location=TEST_LOCATION, - tag_template_id=TEST_TAG_TEMPLATE_ID, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.update_tag_template.assert_called_once_with( - request=dict( - tag_template=TagTemplate(name=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_2)), - update_mask=TEST_UPDATE_MASK, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_update_tag_template_field(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - self.hook.update_tag_template_field( - tag_template_field=TEST_TAG_TEMPLATE_FIELD, - update_mask=TEST_UPDATE_MASK, - tag_template=TEST_TAG_TEMPLATE_ID, - location=TEST_LOCATION, - tag_template_field_id=TEST_TAG_TEMPLATE_FIELD_ID, - project_id=TEST_PROJECT_ID_2, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_get_conn.return_value.update_tag_template_field.assert_called_once_with( - request=dict( - name=TEST_TAG_TEMPLATE_FIELD_PATH.format(TEST_PROJECT_ID_2), - tag_template_field=TEST_TAG_TEMPLATE_FIELD, - update_mask=TEST_UPDATE_MASK, - ), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -TEST_MESSAGE = re.escape( - "The project id must be passed either as keyword project_id parameter or as project_id extra in " - "Google Cloud connection definition. Both are not set!" -) - - -class TestCloudDataCatalogMissingProjectIdHook: - def setup_method(self): - with pytest.warns(AirflowProviderDeprecationWarning): - with mock.patch( - "airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.__init__", - new=mock_base_gcp_hook_no_default_project_id, - ): - self.hook = CloudDataCatalogHook(gcp_conn_id="test") - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_entry(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.create_entry( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry_id=TEST_ENTRY_ID, - entry=TEST_ENTRY, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_entry_group(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.create_entry_group( - location=TEST_LOCATION, - entry_group_id=TEST_ENTRY_GROUP_ID, - entry_group=TEST_ENTRY_GROUP, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_tag(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.create_tag( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag=deepcopy(TEST_TAG), - template_id=TEST_TAG_TEMPLATE_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_tag_protobuff(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.create_tag( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag=Tag(), - template_id=TEST_TAG_TEMPLATE_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_tag_template(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.create_tag_template( - location=TEST_LOCATION, - tag_template_id=TEST_TAG_TEMPLATE_ID, - tag_template=TEST_TAG_TEMPLATE, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_create_tag_template_field(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.create_tag_template_field( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - tag_template_field_id=TEST_TAG_TEMPLATE_FIELD_ID, - tag_template_field=TEST_TAG_TEMPLATE_FIELD, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_delete_entry(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.delete_entry( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_delete_entry_group(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.delete_entry_group( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_delete_tag(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.delete_tag( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag=TEST_TAG_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_delete_tag_template(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.delete_tag_template( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - force=TEST_FORCE, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_delete_tag_template_field(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.delete_tag_template_field( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - field=TEST_TAG_TEMPLATE_FIELD_ID, - force=TEST_FORCE, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_get_entry(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.get_entry( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_get_entry_group(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.get_entry_group( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - read_mask=TEST_READ_MASK, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_get_tag_template(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.get_tag_template( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_list_tags(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.list_tags( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - page_size=TEST_PAGE_SIZE, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_get_tag_for_template_name(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - tag_1 = mock.MagicMock(template=TEST_TAG_TEMPLATE_PATH.format("invalid-project")) - tag_2 = mock.MagicMock(template=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_2)) - - mock_get_conn.return_value.list_tags.return_value = [tag_1, tag_2] - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.get_tag_for_template_name( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - template_name=TEST_TAG_TEMPLATE_PATH.format(TEST_PROJECT_ID_2), - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_rename_tag_template_field(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.rename_tag_template_field( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - field=TEST_TAG_TEMPLATE_FIELD_ID, - new_tag_template_field_id=TEST_NEW_TAG_TEMPLATE_FIELD_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_update_entry(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.update_entry( - entry=TEST_ENTRY, - update_mask=TEST_UPDATE_MASK, - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry_id=TEST_ENTRY_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_update_tag(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.update_tag( - tag=deepcopy(TEST_TAG), - update_mask=TEST_UPDATE_MASK, - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag_id=TEST_TAG_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_update_tag_template(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.update_tag_template( - tag_template=TEST_TAG_TEMPLATE, - update_mask=TEST_UPDATE_MASK, - location=TEST_LOCATION, - tag_template_id=TEST_TAG_TEMPLATE_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - @mock.patch( - "airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id", - return_value=(TEST_CREDENTIALS, None), - ) - @mock.patch("airflow.providers.google.cloud.hooks.datacatalog.CloudDataCatalogHook.get_conn") - def test_update_tag_template_field(self, mock_get_conn, mock_get_creds_and_project_id) -> None: - with pytest.raises(AirflowException, match=TEST_MESSAGE): - self.hook.update_tag_template_field( - tag_template_field=TEST_TAG_TEMPLATE_FIELD, - update_mask=TEST_UPDATE_MASK, - tag_template=TEST_TAG_TEMPLATE_ID, - location=TEST_LOCATION, - tag_template_field_id=TEST_TAG_TEMPLATE_FIELD_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) diff --git a/providers/google/tests/unit/google/cloud/hooks/vertex_ai/test_generative_model.py b/providers/google/tests/unit/google/cloud/hooks/vertex_ai/test_generative_model.py index 98217fedeaca9..3146992dba599 100644 --- a/providers/google/tests/unit/google/cloud/hooks/vertex_ai/test_generative_model.py +++ b/providers/google/tests/unit/google/cloud/hooks/vertex_ai/test_generative_model.py @@ -21,11 +21,8 @@ import pytest -from airflow.exceptions import AirflowProviderDeprecationWarning - # For no Pydantic environment, we need to skip the tests pytest.importorskip("google.cloud.aiplatform_v1") -from datetime import timedelta from vertexai.generative_models import HarmBlockThreshold, HarmCategory, Part, Tool, grounding from vertexai.preview.evaluation import MetricPromptTemplateExamples @@ -146,77 +143,6 @@ def setup_method(self): self.hook = GenerativeModelHook(gcp_conn_id=TEST_GCP_CONN_ID) self.hook.get_credentials = self.dummy_get_credentials - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_text_embedding_model")) - def test_text_embedding_model_get_embeddings(self, mock_model) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - self.hook.text_embedding_model_get_embeddings( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=TEST_PROMPT, - pretrained_model=TEST_TEXT_EMBEDDING_MODEL, - ) - mock_model.assert_called_once_with(TEST_TEXT_EMBEDDING_MODEL) - mock_model.return_value.get_embeddings.assert_called_once_with([TEST_PROMPT]) - - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_generative_model")) - def test_generative_model_generate_content(self, mock_model) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - self.hook.generative_model_generate_content( - project_id=GCP_PROJECT, - contents=TEST_CONTENTS, - location=GCP_LOCATION, - tools=TEST_TOOLS, - generation_config=TEST_GENERATION_CONFIG, - safety_settings=TEST_SAFETY_SETTINGS, - pretrained_model=TEST_MULTIMODAL_PRETRAINED_MODEL, - ) - mock_model.assert_called_once_with( - pretrained_model=TEST_MULTIMODAL_PRETRAINED_MODEL, - system_instruction=None, - ) - mock_model.return_value.generate_content.assert_called_once_with( - contents=TEST_CONTENTS, - tools=TEST_TOOLS, - generation_config=TEST_GENERATION_CONFIG, - safety_settings=TEST_SAFETY_SETTINGS, - ) - - @mock.patch("vertexai.preview.tuning.sft.train") - def test_supervised_fine_tuning_train(self, mock_sft_train) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - self.hook.supervised_fine_tuning_train( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - source_model=SOURCE_MODEL, - train_dataset=TRAIN_DATASET, - ) - - mock_sft_train.assert_called_once_with( - source_model=SOURCE_MODEL, - train_dataset=TRAIN_DATASET, - validation_dataset=None, - epochs=None, - adapter_size=None, - learning_rate_multiplier=None, - tuned_model_display_name=None, - ) - - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_generative_model")) - def test_count_tokens(self, mock_model) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - self.hook.count_tokens( - project_id=GCP_PROJECT, - contents=TEST_CONTENTS, - location=GCP_LOCATION, - pretrained_model=TEST_MULTIMODAL_PRETRAINED_MODEL, - ) - mock_model.assert_called_once_with( - pretrained_model=TEST_MULTIMODAL_PRETRAINED_MODEL, - ) - mock_model.return_value.count_tokens.assert_called_once_with( - contents=TEST_CONTENTS, - ) - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_generative_model")) @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_eval_task")) def test_run_evaluation(self, mock_eval_task, mock_model) -> None: @@ -248,40 +174,3 @@ def test_run_evaluation(self, mock_eval_task, mock_model) -> None: prompt_template=TEST_PROMPT_TEMPLATE, experiment_run_name=TEST_EXPERIMENT_RUN_NAME, ) - - @mock.patch("vertexai.preview.caching.CachedContent.create") - def test_create_cached_content(self, mock_cached_content_create) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - self.hook.create_cached_content( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - model_name=TEST_CACHED_MODEL, - system_instruction=TEST_CACHED_SYSTEM_INSTRUCTION, - contents=TEST_CACHED_CONTENTS, - ttl_hours=TEST_CACHED_TTL, - display_name=TEST_CACHED_DISPLAY_NAME, - ) - - mock_cached_content_create.assert_called_once_with( - model_name=TEST_CACHED_MODEL, - system_instruction=TEST_CACHED_SYSTEM_INSTRUCTION, - contents=TEST_CACHED_CONTENTS, - ttl=timedelta(hours=TEST_CACHED_TTL), - display_name=TEST_CACHED_DISPLAY_NAME, - ) - - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_cached_context_model")) - def test_generate_from_cached_content(self, mock_cached_context_model) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - self.hook.generate_from_cached_content( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - cached_content_name=TEST_CACHED_CONTENT_NAME, - contents=TEST_CACHED_CONTENT_PROMPT, - ) - - mock_cached_context_model.return_value.generate_content.assert_called_once_with( - contents=TEST_CACHED_CONTENT_PROMPT, - generation_config=None, - safety_settings=None, - ) diff --git a/providers/google/tests/unit/google/cloud/operators/test_datacatalog.py b/providers/google/tests/unit/google/cloud/operators/test_datacatalog.py deleted file mode 100644 index 38e4f33d74d9b..0000000000000 --- a/providers/google/tests/unit/google/cloud/operators/test_datacatalog.py +++ /dev/null @@ -1,994 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -from collections.abc import Sequence -from typing import TYPE_CHECKING -from unittest import mock - -import pytest -from google.api_core.exceptions import AlreadyExists -from google.api_core.retry import Retry -from google.cloud.datacatalog import Entry, EntryGroup, Tag, TagTemplate, TagTemplateField -from google.protobuf.field_mask_pb2 import FieldMask - -from airflow.exceptions import AirflowProviderDeprecationWarning -from airflow.providers.google.cloud.operators.datacatalog import ( - CloudDataCatalogCreateEntryGroupOperator, - CloudDataCatalogCreateEntryOperator, - CloudDataCatalogCreateTagOperator, - CloudDataCatalogCreateTagTemplateFieldOperator, - CloudDataCatalogCreateTagTemplateOperator, - CloudDataCatalogDeleteEntryGroupOperator, - CloudDataCatalogDeleteEntryOperator, - CloudDataCatalogDeleteTagOperator, - CloudDataCatalogDeleteTagTemplateFieldOperator, - CloudDataCatalogDeleteTagTemplateOperator, - CloudDataCatalogGetEntryGroupOperator, - CloudDataCatalogGetEntryOperator, - CloudDataCatalogGetTagTemplateOperator, - CloudDataCatalogListTagsOperator, - CloudDataCatalogLookupEntryOperator, - CloudDataCatalogRenameTagTemplateFieldOperator, - CloudDataCatalogSearchCatalogOperator, - CloudDataCatalogUpdateEntryOperator, - CloudDataCatalogUpdateTagOperator, - CloudDataCatalogUpdateTagTemplateFieldOperator, - CloudDataCatalogUpdateTagTemplateOperator, -) - -from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS - -if TYPE_CHECKING: - from google.api_core.gapic_v1.method import _MethodDefault - -BASE_PATH = "airflow.providers.google.cloud.operators.datacatalog.{}" -TEST_PROJECT_ID: str = "example_id" -TEST_LOCATION: str = "en-west-3" -TEST_ENTRY_ID: str = "test-entry-id" -TEST_TAG_ID: str = "test-tag-id" -TEST_RETRY: Retry | _MethodDefault = Retry() -TEST_TIMEOUT: float = 0.5 -TEST_METADATA: Sequence[tuple[str, str]] = [] -TEST_GCP_CONN_ID: str = "test-gcp-conn-id" -TEST_IMPERSONATION_CHAIN: Sequence[str] = ["ACCOUNT_1", "ACCOUNT_2", "ACCOUNT_3"] -TEST_ENTRY_GROUP_ID: str = "test-entry-group-id" -TEST_TAG_TEMPLATE_ID: str = "test-tag-template-id" -TEST_TAG_TEMPLATE_FIELD_ID: str = "test-tag-template-field-id" -TEST_TAG_TEMPLATE_NAME: str = "test-tag-template-field-name" -TEST_FORCE: bool = False -TEST_READ_MASK: FieldMask = FieldMask(paths=["name"]) -TEST_RESOURCE: str = "test-resource" -TEST_OPTIONS_: dict = {} -TEST_PAGE_SIZE: int = 50 -TEST_LINKED_RESOURCE: str = "test-linked-resource" -TEST_SQL_RESOURCE: str = "test-sql-resource" -TEST_NEW_TAG_TEMPLATE_FIELD_ID: str = "test-new-tag-template-field-id" -TEST_SCOPE: dict = dict(include_project_ids=["example-scope-project"]) -TEST_QUERY: str = "test-query" -TEST_ORDER_BY: str = "test-order-by" -TEST_UPDATE_MASK: dict = {"fields": ["name"]} -TEST_ENTRY_PATH: str = ( - f"projects/{TEST_PROJECT_ID}/locations/{TEST_LOCATION}" - f"/entryGroups/{TEST_ENTRY_GROUP_ID}/entries/{TEST_ENTRY_ID}" -) -TEST_ENTRY_GROUP_PATH: str = ( - f"projects/{TEST_PROJECT_ID}/locations/{TEST_LOCATION}/entryGroups/{TEST_ENTRY_GROUP_ID}" -) -TEST_TAG_TEMPLATE_PATH: str = ( - f"projects/{TEST_PROJECT_ID}/locations/{TEST_LOCATION}/tagTemplates/{TEST_TAG_TEMPLATE_ID}" -) -TEST_TAG_PATH: str = ( - f"projects/{TEST_PROJECT_ID}/locations/{TEST_LOCATION}/entryGroups/" - f"{TEST_ENTRY_GROUP_ID}/entries/{TEST_ENTRY_ID}/tags/{TEST_TAG_ID}" -) - -TEST_ENTRY: Entry = Entry(name=TEST_ENTRY_PATH) -TEST_ENTRY_DICT: dict = { - "description": "", - "display_name": "", - "linked_resource": "", - "fully_qualified_name": "", - "labels": {}, - "name": TEST_ENTRY_PATH, -} -TEST_ENTRY_GROUP: EntryGroup = EntryGroup(name=TEST_ENTRY_GROUP_PATH) -TEST_ENTRY_GROUP_DICT: dict = { - "description": "", - "display_name": "", - "name": TEST_ENTRY_GROUP_PATH, - "transferred_to_dataplex": False, -} -TEST_TAG: Tag = Tag(name=TEST_TAG_PATH) -TEST_TAG_DICT: dict = { - "fields": {}, - "name": TEST_TAG_PATH, - "template": "", - "template_display_name": "", - "dataplex_transfer_status": 0, -} -TEST_TAG_TEMPLATE: TagTemplate = TagTemplate(name=TEST_TAG_TEMPLATE_PATH) -TEST_TAG_TEMPLATE_DICT: dict = { - "dataplex_transfer_status": 0, - "display_name": "", - "fields": {}, - "is_publicly_readable": False, - "name": TEST_TAG_TEMPLATE_PATH, -} -TEST_TAG_TEMPLATE_FIELD: TagTemplateField = TagTemplateField(name=TEST_TAG_TEMPLATE_FIELD_ID) -TEST_TAG_TEMPLATE_FIELD_DICT: dict = { - "description": "", - "display_name": "", - "is_required": False, - "name": TEST_TAG_TEMPLATE_FIELD_ID, - "order": 0, -} -TEST_ENTRY_LINK = "projects/{project_id}/locations/{location}/entryGroups/{entry_group_id}/entries/{entry_id}" -TEST_TAG_TEMPLATE_LINK = "projects/{project_id}/locations/{location}/tagTemplates/{tag_template_id}" -TEST_TAG_TEMPLATE_FIELD_LINK = "projects/{project_id}/locations/{location}/tagTemplates/{tag_template_id}\ - /fields/{tag_template_field_id}" - - -class TestCloudDataCatalogCreateEntryOperator: - @mock.patch( - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook", - **{"return_value.create_entry.return_value": TEST_ENTRY}, - ) - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogCreateEntryOperator( - task_id="task_id", - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry_id=TEST_ENTRY_ID, - entry=TEST_ENTRY, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_ti = mock.MagicMock() - mock_context = {"ti": mock_ti} - if not AIRFLOW_V_3_0_PLUS: - mock_context["task"] = task # type: ignore[assignment] - result = task.execute(context=mock_context) # type: ignore[arg-type] - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.create_entry.assert_called_once_with( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry_id=TEST_ENTRY_ID, - entry=TEST_ENTRY, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_ti.xcom_push.assert_any_call( - key="entry_id", - value=TEST_ENTRY_ID, - ) - - assert result == TEST_ENTRY_DICT - - @mock.patch("airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook") - def test_assert_valid_hook_call_when_exists(self, mock_hook) -> None: - mock_hook.return_value.create_entry.side_effect = AlreadyExists(message="message") - mock_hook.return_value.get_entry.return_value = TEST_ENTRY - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogCreateEntryOperator( - task_id="task_id", - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry_id=TEST_ENTRY_ID, - entry=TEST_ENTRY, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_ti = mock.MagicMock() - mock_context = {"ti": mock_ti} - if not AIRFLOW_V_3_0_PLUS: - mock_context["task"] = task # type: ignore[assignment] - result = task.execute(context=mock_context) # type: ignore[arg-type] - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.create_entry.assert_called_once_with( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry_id=TEST_ENTRY_ID, - entry=TEST_ENTRY, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_hook.return_value.get_entry.assert_called_once_with( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_ti.xcom_push.assert_any_call( - key="entry_id", - value=TEST_ENTRY_ID, - ) - assert result == TEST_ENTRY_DICT - - -class TestCloudDataCatalogCreateEntryGroupOperator: - @mock.patch( - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook", - **{"return_value.create_entry_group.return_value": TEST_ENTRY_GROUP}, - ) - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogCreateEntryGroupOperator( - task_id="task_id", - location=TEST_LOCATION, - entry_group_id=TEST_ENTRY_GROUP_ID, - entry_group=TEST_ENTRY_GROUP, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_ti = mock.MagicMock() - mock_context = {"ti": mock_ti} - if not AIRFLOW_V_3_0_PLUS: - mock_context["task"] = task # type: ignore[assignment] - result = task.execute(context=mock_context) # type: ignore[arg-type] - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.create_entry_group.assert_called_once_with( - location=TEST_LOCATION, - entry_group_id=TEST_ENTRY_GROUP_ID, - entry_group=TEST_ENTRY_GROUP, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_ti.xcom_push.assert_any_call( - key="entry_group_id", - value=TEST_ENTRY_GROUP_ID, - ) - assert result == TEST_ENTRY_GROUP_DICT - - -class TestCloudDataCatalogCreateTagOperator: - @mock.patch( - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook", - **{"return_value.create_tag.return_value": TEST_TAG}, - ) - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogCreateTagOperator( - task_id="task_id", - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag=TEST_TAG, - template_id=TEST_TAG_TEMPLATE_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_ti = mock.MagicMock() - mock_context = {"ti": mock_ti} - if not AIRFLOW_V_3_0_PLUS: - mock_context["task"] = task # type: ignore[assignment] - result = task.execute(context=mock_context) # type: ignore[arg-type] - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.create_tag.assert_called_once_with( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag=TEST_TAG, - template_id=TEST_TAG_TEMPLATE_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_ti.xcom_push.assert_any_call( - key="tag_id", - value=TEST_TAG_ID, - ) - assert result == TEST_TAG_DICT - - -class TestCloudDataCatalogCreateTagTemplateOperator: - @mock.patch( - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook", - **{"return_value.create_tag_template.return_value": TEST_TAG_TEMPLATE}, - ) - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogCreateTagTemplateOperator( - task_id="task_id", - location=TEST_LOCATION, - tag_template_id=TEST_TAG_TEMPLATE_ID, - tag_template=TEST_TAG_TEMPLATE, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_ti = mock.MagicMock() - mock_context = {"ti": mock_ti} - if not AIRFLOW_V_3_0_PLUS: - mock_context["task"] = task # type: ignore[assignment] - result = task.execute(context=mock_context) # type: ignore[arg-type] - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.create_tag_template.assert_called_once_with( - location=TEST_LOCATION, - tag_template_id=TEST_TAG_TEMPLATE_ID, - tag_template=TEST_TAG_TEMPLATE, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_ti.xcom_push.assert_any_call( - key="tag_template_id", - value=TEST_TAG_TEMPLATE_ID, - ) - assert result == TEST_TAG_TEMPLATE_DICT - - -class TestCloudDataCatalogCreateTagTemplateFieldOperator: - @mock.patch( - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook", - **{"return_value.create_tag_template_field.return_value": TEST_TAG_TEMPLATE_FIELD}, - ) - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogCreateTagTemplateFieldOperator( - task_id="task_id", - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - tag_template_field_id=TEST_TAG_TEMPLATE_FIELD_ID, - tag_template_field=TEST_TAG_TEMPLATE_FIELD, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_ti = mock.MagicMock() - mock_context = {"ti": mock_ti} - if not AIRFLOW_V_3_0_PLUS: - mock_context["task"] = task # type: ignore[assignment] - result = task.execute(context=mock_context) # type: ignore[arg-type] - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.create_tag_template_field.assert_called_once_with( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - tag_template_field_id=TEST_TAG_TEMPLATE_FIELD_ID, - tag_template_field=TEST_TAG_TEMPLATE_FIELD, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - mock_ti.xcom_push.assert_any_call( - key="tag_template_field_id", - value=TEST_TAG_TEMPLATE_FIELD_ID, - ) - assert result == TEST_TAG_TEMPLATE_FIELD_DICT - - -class TestCloudDataCatalogDeleteEntryOperator: - @mock.patch("airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook") - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogDeleteEntryOperator( - task_id="task_id", - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - task.execute(context=mock.MagicMock()) - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.delete_entry.assert_called_once_with( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogDeleteEntryGroupOperator: - @mock.patch("airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook") - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogDeleteEntryGroupOperator( - task_id="task_id", - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - task.execute(context=mock.MagicMock()) - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.delete_entry_group.assert_called_once_with( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogDeleteTagOperator: - @mock.patch("airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook") - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogDeleteTagOperator( - task_id="task_id", - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag=TEST_TAG_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - task.execute(context=mock.MagicMock()) - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.delete_tag.assert_called_once_with( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag=TEST_TAG_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogDeleteTagTemplateOperator: - @mock.patch("airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook") - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogDeleteTagTemplateOperator( - task_id="task_id", - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - force=TEST_FORCE, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - task.execute(context=mock.MagicMock()) - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.delete_tag_template.assert_called_once_with( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - force=TEST_FORCE, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogDeleteTagTemplateFieldOperator: - @mock.patch("airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook") - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogDeleteTagTemplateFieldOperator( - task_id="task_id", - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - field=TEST_TAG_TEMPLATE_FIELD_ID, - force=TEST_FORCE, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - task.execute(context=mock.MagicMock()) - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.delete_tag_template_field.assert_called_once_with( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - field=TEST_TAG_TEMPLATE_FIELD_ID, - force=TEST_FORCE, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogGetEntryOperator: - @mock.patch( - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook", - **{"return_value.get_entry.return_value": TEST_ENTRY}, - ) - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogGetEntryOperator( - task_id="task_id", - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - task.execute(context=mock.MagicMock()) - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.get_entry.assert_called_once_with( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogGetEntryGroupOperator: - @mock.patch( - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook", - **{"return_value.get_entry_group.return_value": TEST_ENTRY_GROUP}, - ) - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogGetEntryGroupOperator( - task_id="task_id", - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - read_mask=TEST_READ_MASK, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - task.execute(context=mock.MagicMock()) - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.get_entry_group.assert_called_once_with( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - read_mask=TEST_READ_MASK, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogGetTagTemplateOperator: - @mock.patch( - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook", - **{"return_value.get_tag_template.return_value": TEST_TAG_TEMPLATE}, - ) - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogGetTagTemplateOperator( - task_id="task_id", - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - task.execute(context=mock.MagicMock()) - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.get_tag_template.assert_called_once_with( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogListTagsOperator: - @mock.patch( - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook", - return_value=mock.MagicMock(list_tags=mock.MagicMock(return_value=[TEST_TAG])), - ) - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogListTagsOperator( - task_id="task_id", - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - page_size=TEST_PAGE_SIZE, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - task.execute(context=mock.MagicMock()) - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.list_tags.assert_called_once_with( - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - page_size=TEST_PAGE_SIZE, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogLookupEntryOperator: - @mock.patch( - "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook", - **{"return_value.lookup_entry.return_value": TEST_ENTRY}, - ) - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogLookupEntryOperator( - task_id="task_id", - linked_resource=TEST_LINKED_RESOURCE, - sql_resource=TEST_SQL_RESOURCE, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - task.execute(context=mock.MagicMock()) - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.lookup_entry.assert_called_once_with( - linked_resource=TEST_LINKED_RESOURCE, - sql_resource=TEST_SQL_RESOURCE, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogRenameTagTemplateFieldOperator: - @mock.patch("airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook") - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogRenameTagTemplateFieldOperator( - task_id="task_id", - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - field=TEST_TAG_TEMPLATE_FIELD_ID, - new_tag_template_field_id=TEST_NEW_TAG_TEMPLATE_FIELD_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - task.execute(context=mock.MagicMock()) - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.rename_tag_template_field.assert_called_once_with( - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - field=TEST_TAG_TEMPLATE_FIELD_ID, - new_tag_template_field_id=TEST_NEW_TAG_TEMPLATE_FIELD_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogSearchCatalogOperator: - @mock.patch("airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook") - def test_assert_valid_hook_call(self, mock_hook) -> None: - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogSearchCatalogOperator( - task_id="task_id", - scope=TEST_SCOPE, - query=TEST_QUERY, - page_size=TEST_PAGE_SIZE, - order_by=TEST_ORDER_BY, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - task.execute(context=mock.MagicMock()) - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.search_catalog.assert_called_once_with( - scope=TEST_SCOPE, - query=TEST_QUERY, - page_size=TEST_PAGE_SIZE, - order_by=TEST_ORDER_BY, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogUpdateEntryOperator: - @mock.patch("airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook") - def test_assert_valid_hook_call(self, mock_hook) -> None: - mock_hook.return_value.update_entry.return_value.name = TEST_ENTRY_LINK.format( - project_id=TEST_PROJECT_ID, - location=TEST_LOCATION, - entry_group_id=TEST_ENTRY_GROUP_ID, - entry_id=TEST_ENTRY_ID, - ) - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogUpdateEntryOperator( - task_id="task_id", - entry=TEST_ENTRY, - update_mask=TEST_UPDATE_MASK, - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry_id=TEST_ENTRY_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - task.execute(context=mock.MagicMock()) - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.update_entry.assert_called_once_with( - entry=TEST_ENTRY, - update_mask=TEST_UPDATE_MASK, - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry_id=TEST_ENTRY_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogUpdateTagOperator: - @mock.patch("airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook") - def test_assert_valid_hook_call(self, mock_hook) -> None: - mock_hook.return_value.update_tag.return_value.name = TEST_ENTRY_LINK.format( - project_id=TEST_PROJECT_ID, - location=TEST_LOCATION, - entry_group_id=TEST_ENTRY_GROUP_ID, - entry_id=TEST_ENTRY_ID, - ) - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogUpdateTagOperator( - task_id="task_id", - tag=Tag(name=TEST_TAG_ID), - update_mask=TEST_UPDATE_MASK, - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag_id=TEST_TAG_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - task.execute(context=mock.MagicMock()) - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.update_tag.assert_called_once_with( - tag=Tag(name=TEST_TAG_ID), - update_mask=TEST_UPDATE_MASK, - location=TEST_LOCATION, - entry_group=TEST_ENTRY_GROUP_ID, - entry=TEST_ENTRY_ID, - tag_id=TEST_TAG_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogUpdateTagTemplateOperator: - @mock.patch("airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook") - def test_assert_valid_hook_call(self, mock_hook) -> None: - mock_hook.return_value.update_tag_template.return_value.name = TEST_TAG_TEMPLATE_LINK.format( - project_id=TEST_PROJECT_ID, - location=TEST_LOCATION, - tag_template_id=TEST_TAG_TEMPLATE_ID, - ) - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogUpdateTagTemplateOperator( - task_id="task_id", - tag_template=TagTemplate(name=TEST_TAG_TEMPLATE_ID), - update_mask=TEST_UPDATE_MASK, - location=TEST_LOCATION, - tag_template_id=TEST_TAG_TEMPLATE_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - task.execute(context=mock.MagicMock()) - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.update_tag_template.assert_called_once_with( - tag_template=TagTemplate(name=TEST_TAG_TEMPLATE_ID), - update_mask=TEST_UPDATE_MASK, - location=TEST_LOCATION, - tag_template_id=TEST_TAG_TEMPLATE_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) - - -class TestCloudDataCatalogUpdateTagTemplateFieldOperator: - @mock.patch("airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook") - def test_assert_valid_hook_call(self, mock_hook) -> None: - mock_hook.return_value.update_tag_template_field.return_value.name = ( - TEST_TAG_TEMPLATE_FIELD_LINK.format( - project_id=TEST_PROJECT_ID, - location=TEST_LOCATION, - tag_template_id=TEST_TAG_TEMPLATE_ID, - tag_template_field_id=TEST_TAG_TEMPLATE_FIELD_ID, - ) - ) - with pytest.warns(AirflowProviderDeprecationWarning): - task = CloudDataCatalogUpdateTagTemplateFieldOperator( - task_id="task_id", - tag_template_field=TEST_TAG_TEMPLATE_FIELD, - update_mask=TEST_UPDATE_MASK, - tag_template_field_name=TEST_TAG_TEMPLATE_NAME, - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - tag_template_field_id=TEST_TAG_TEMPLATE_FIELD_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - task.execute(context=mock.MagicMock()) - mock_hook.assert_called_once_with( - gcp_conn_id=TEST_GCP_CONN_ID, - impersonation_chain=TEST_IMPERSONATION_CHAIN, - ) - mock_hook.return_value.update_tag_template_field.assert_called_once_with( - tag_template_field=TEST_TAG_TEMPLATE_FIELD, - update_mask=TEST_UPDATE_MASK, - tag_template_field_name=TEST_TAG_TEMPLATE_NAME, - location=TEST_LOCATION, - tag_template=TEST_TAG_TEMPLATE_ID, - tag_template_field_id=TEST_TAG_TEMPLATE_FIELD_ID, - project_id=TEST_PROJECT_ID, - retry=TEST_RETRY, - timeout=TEST_TIMEOUT, - metadata=TEST_METADATA, - ) diff --git a/providers/google/tests/unit/google/cloud/operators/vertex_ai/test_generative_model.py b/providers/google/tests/unit/google/cloud/operators/vertex_ai/test_generative_model.py index 86a6be37bf82c..e9bd014eac794 100644 --- a/providers/google/tests/unit/google/cloud/operators/vertex_ai/test_generative_model.py +++ b/providers/google/tests/unit/google/cloud/operators/vertex_ai/test_generative_model.py @@ -24,19 +24,11 @@ pytest.importorskip("google.cloud.aiplatform_v1") pytest.importorskip("google.cloud.aiplatform_v1beta1") vertexai = pytest.importorskip("vertexai.generative_models") -from vertexai.generative_models import HarmBlockThreshold, HarmCategory, Part, Tool, grounding +from vertexai.generative_models import HarmBlockThreshold, HarmCategory, Tool, grounding from vertexai.preview.evaluation import MetricPromptTemplateExamples -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.providers.google.cloud.operators.vertex_ai.generative_model import ( - CountTokensOperator, - CreateCachedContentOperator, - DeleteExperimentRunOperator, - GenerateFromCachedContentOperator, - GenerativeModelGenerateContentOperator, RunEvaluationOperator, - SupervisedFineTuningTrainOperator, - TextEmbeddingModelGetEmbeddingsOperator, ) VERTEX_AI_PATH = "airflow.providers.google.cloud.operators.vertex_ai.{}" @@ -52,146 +44,6 @@ def assert_warning(msg: str, warnings): assert any(msg in str(w) for w in warnings) -class TestVertexAITextEmbeddingModelGetEmbeddingsOperator: - @mock.patch(VERTEX_AI_PATH.format("generative_model.GenerativeModelHook")) - def test_execute(self, mock_hook): - prompt = "In 10 words or less, what is Apache Airflow?" - pretrained_model = "textembedding-gecko" - with pytest.warns(AirflowProviderDeprecationWarning): - op = TextEmbeddingModelGetEmbeddingsOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=prompt, - pretrained_model=pretrained_model, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - op.execute(context={"ti": mock.MagicMock()}) - mock_hook.assert_called_once_with( - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - mock_hook.return_value.text_embedding_model_get_embeddings.assert_called_once_with( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=prompt, - pretrained_model=pretrained_model, - ) - - -class TestVertexAIGenerativeModelGenerateContentOperator: - @mock.patch(VERTEX_AI_PATH.format("generative_model.GenerativeModelHook")) - def test_execute(self, mock_hook): - contents = ["In 10 words or less, what is Apache Airflow?"] - tools = [Tool.from_google_search_retrieval(grounding.GoogleSearchRetrieval())] - pretrained_model = "gemini-pro" - safety_settings = { - HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH, - HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, - HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH, - HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, - } - generation_config = {"max_output_tokens": 256, "top_p": 0.8, "temperature": 0.0} - system_instruction = "be concise." - with pytest.warns(AirflowProviderDeprecationWarning): - op = GenerativeModelGenerateContentOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - contents=contents, - tools=tools, - generation_config=generation_config, - safety_settings=safety_settings, - pretrained_model=pretrained_model, - system_instruction=system_instruction, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - op.execute(context={"ti": mock.MagicMock()}) - mock_hook.assert_called_once_with( - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - mock_hook.return_value.generative_model_generate_content.assert_called_once_with( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - contents=contents, - tools=tools, - generation_config=generation_config, - safety_settings=safety_settings, - pretrained_model=pretrained_model, - system_instruction=system_instruction, - ) - - -class TestVertexAISupervisedFineTuningTrainOperator: - @mock.patch(VERTEX_AI_PATH.format("generative_model.GenerativeModelHook")) - @mock.patch("google.cloud.aiplatform_v1.types.TuningJob.to_dict") - def test_execute( - self, - to_dict_mock, - mock_hook, - ): - source_model = "gemini-1.0-pro-002" - train_dataset = "gs://cloud-samples-data/ai-platform/generative_ai/sft_train_data.jsonl" - with pytest.warns(AirflowProviderDeprecationWarning): - op = SupervisedFineTuningTrainOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - source_model=source_model, - train_dataset=train_dataset, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - op.execute(context={"ti": mock.MagicMock()}) - mock_hook.assert_called_once_with( - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - mock_hook.return_value.supervised_fine_tuning_train.assert_called_once_with( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - source_model=source_model, - train_dataset=train_dataset, - adapter_size=None, - epochs=None, - learning_rate_multiplier=None, - tuned_model_display_name=None, - validation_dataset=None, - ) - - -class TestVertexAICountTokensOperator: - @mock.patch(VERTEX_AI_PATH.format("generative_model.GenerativeModelHook")) - @mock.patch("google.cloud.aiplatform_v1beta1.types.CountTokensResponse.to_dict") - def test_execute(self, to_dict_mock, mock_hook): - contents = ["In 10 words or less, what is Apache Airflow?"] - pretrained_model = "gemini-pro" - with pytest.warns(AirflowProviderDeprecationWarning): - op = CountTokensOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - contents=contents, - pretrained_model=pretrained_model, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - op.execute(context={"ti": mock.MagicMock()}) - mock_hook.assert_called_once_with( - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - mock_hook.return_value.count_tokens.assert_called_once_with( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - contents=contents, - pretrained_model=pretrained_model, - ) - - class TestVertexAIRunEvaluationOperator: @mock.patch(VERTEX_AI_PATH.format("generative_model.GenerativeModelHook")) def test_execute( @@ -277,112 +129,3 @@ def test_execute( safety_settings=safety_settings, tools=tools, ) - - -class TestVertexAICreateCachedContentOperator: - @mock.patch(VERTEX_AI_PATH.format("generative_model.GenerativeModelHook")) - def test_execute(self, mock_hook): - model_name = "gemini-1.5-pro-002" - system_instruction = """ - You are an expert researcher. You always stick to the facts in the sources provided, and never make up new facts. - Now look at these research papers, and answer the following questions. - """ - - contents = [ - Part.from_uri( - "gs://cloud-samples-data/generative-ai/pdf/2312.11805v3.pdf", - mime_type="application/pdf", - ), - Part.from_uri( - "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf", - mime_type="application/pdf", - ), - ] - ttl_hours = 1 - display_name = "test-example-cache" - with pytest.warns(AirflowProviderDeprecationWarning): - op = CreateCachedContentOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - model_name=model_name, - system_instruction=system_instruction, - contents=contents, - ttl_hours=ttl_hours, - display_name=display_name, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - op.execute(context={"ti": mock.MagicMock()}) - mock_hook.assert_called_once_with( - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - mock_hook.return_value.create_cached_content.assert_called_once_with( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - model_name=model_name, - system_instruction=system_instruction, - contents=contents, - ttl_hours=ttl_hours, - display_name=display_name, - ) - - -class TestVertexAIGenerateFromCachedContentOperator: - @mock.patch(VERTEX_AI_PATH.format("generative_model.GenerativeModelHook")) - def test_execute(self, mock_hook): - cached_content_name = "test" - contents = ["what are in these papers"] - with pytest.warns(AirflowProviderDeprecationWarning): - op = GenerateFromCachedContentOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - cached_content_name=cached_content_name, - contents=contents, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - op.execute(context={"ti": mock.MagicMock()}) - mock_hook.assert_called_once_with( - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - mock_hook.return_value.generate_from_cached_content.assert_called_once_with( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - cached_content_name=cached_content_name, - contents=contents, - generation_config=None, - safety_settings=None, - ) - - -class TestVertexAIDeleteExperimentRunOperator: - @mock.patch(VERTEX_AI_PATH.format("generative_model.ExperimentRunHook")) - def test_execute(self, mock_hook): - test_experiment_name = "test_experiment_name" - test_experiment_run_name = "test_experiment_run_name" - - with pytest.warns(AirflowProviderDeprecationWarning): - op = DeleteExperimentRunOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - experiment_name=test_experiment_name, - experiment_run_name=test_experiment_run_name, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - op.execute(context={"ti": mock.MagicMock()}) - mock_hook.assert_called_once_with( - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - mock_hook.return_value.delete_experiment_run.assert_called_once_with( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - experiment_name=test_experiment_name, - experiment_run_name=test_experiment_run_name, - ) diff --git a/scripts/in_container/run_provider_yaml_files_check.py b/scripts/in_container/run_provider_yaml_files_check.py index bf6e104db6a28..17fbf527c6123 100755 --- a/scripts/in_container/run_provider_yaml_files_check.py +++ b/scripts/in_container/run_provider_yaml_files_check.py @@ -60,9 +60,6 @@ "airflow.providers.tabular.hooks.tabular", "airflow.providers.yandex.hooks.yandexcloud_dataproc", "airflow.providers.yandex.operators.yandexcloud_dataproc", - "airflow.providers.google.cloud.hooks.datacatalog", - "airflow.providers.google.cloud.operators.datacatalog", - "airflow.providers.google.cloud.links.datacatalog", ] KNOWN_DEPRECATED_CLASSES = [