From 6658483693323f0662dbc421cc59834e427cfcde Mon Sep 17 00:00:00 2001 From: reidhin Date: Sat, 2 Aug 2025 15:55:38 +0200 Subject: [PATCH 1/3] Adding explanation to README on filtering and selection of columns. --- README.md | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/README.md b/README.md index 0db6abe..d2dd3dd 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,45 @@ your file system. >>> data = cbsodata.get_data('82070ENG', dir="dir_to_save_data") ``` +It is possible restrict the download using filter and select statements. +This may shorten the download time considerably. +The code below downloads the number of inhabitants in the Dutch municipalities - +ignoring all other characteristics and data on neighbourhood level. + +``` python +>>> data = cbsodata.get_data( +... table_id="85984NED", +... filters="startswith(WijkenEnBuurten,'GM')", +... select=["WijkenEnBuurten", "Gemeentenaam_1", "SoortRegio_2", "AantalInwoners_5"] +... ) +>>> len(data) +342 +``` + +The code below downloads the data for fruit farming regions for the years 2000 and 2010 only: + +``` python +>>> cbsodata.get_data( +... table_id="71509ENG", +... filters="Periods eq '2010JJ00' or substringof('2000', Periods)", +... select=["FruitFarmingRegions", "Periods", "TotalAppleVarieties_1"] +... ) +[{'FruitFarmingRegions': 'Total Netherlands', + 'Periods': '2000', + 'TotalAppleVarieties_1': 461}, + {'FruitFarmingRegions': 'Total Netherlands', + 'Periods': '2010', + 'TotalAppleVarieties_1': 334}, + {'FruitFarmingRegions': 'Region North', + 'Periods': '2000', + 'TotalAppleVarieties_1': 87}, + {'FruitFarmingRegions': 'Region North', + 'Periods': '2010', + 'TotalAppleVarieties_1': 49}, + # ... +] +``` + ### Catalogs (dataderden) There are multiple ways to retrieve data from catalogs other than From aeff2ab692cc9b79c6c4dfb45bf5c56ee63cae79 Mon Sep 17 00:00:00 2001 From: reidhin Date: Sun, 3 Aug 2025 12:19:23 +0200 Subject: [PATCH 2/3] Detailing explanation on filtering a bit more --- README.md | 51 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index d2dd3dd..e29d139 100644 --- a/README.md +++ b/README.md @@ -119,22 +119,40 @@ your file system. >>> data = cbsodata.get_data('82070ENG', dir="dir_to_save_data") ``` +### Filter data + It is possible restrict the download using filter and select statements. This may shorten the download time considerably. -The code below downloads the number of inhabitants in the Dutch municipalities - -ignoring all other characteristics and data on neighbourhood level. +Filtering can only be done on columns available in the metadata. +For example, for the dataset with id `71509ENG` only the columns `FruitFarmingRegions` and `Periods` are available for filtering: ``` python ->>> data = cbsodata.get_data( -... table_id="85984NED", -... filters="startswith(WijkenEnBuurten,'GM')", -... select=["WijkenEnBuurten", "Gemeentenaam_1", "SoortRegio_2", "AantalInwoners_5"] -... ) ->>> len(data) -342 +>>> [x["name"] for x in cbsodata.get_meta("71509ENG", "")] +['TableInfos', 'UntypedDataSet', 'TypedDataSet', 'DataProperties', 'CategoryGroups', 'FruitFarmingRegions', 'Periods'] ``` -The code below downloads the data for fruit farming regions for the years 2000 and 2010 only: +Subsequently one can filter on the `Key` related to these columns. The `Key` can +be found as follows: + +``` python +>>> cbsodata.get_meta("71509ENG", "Periods") +[{'Key': '1997JJ00', + 'Title': '1997', + 'Description': None, + 'Status': 'Definitief'}, + {'Key': '1998JJ00', + 'Title': '1998', + 'Description': None, + 'Status': 'Definitief'}, + {'Key': '1999JJ00', + 'Title': '1999', + 'Description': None, + 'Status': 'Definitief'}, + # ... +] +``` + +The code to download the data for fruit farming regions for the years 2000 and 2010 then could look as follows: ``` python >>> cbsodata.get_data( @@ -158,6 +176,19 @@ The code below downloads the data for fruit farming regions for the years 2000 a ] ``` +As another example, the code below downloads the number of inhabitants in the Dutch municipalities - +ignoring all other characteristics and data on neighbourhood level. + +``` python +>>> data = cbsodata.get_data( +... table_id="85984NED", +... filters="startswith(WijkenEnBuurten,'GM')", +... select=["WijkenEnBuurten", "Gemeentenaam_1", "SoortRegio_2", "AantalInwoners_5"] +... ) +>>> len(data) # number of municipalities +342 +``` + ### Catalogs (dataderden) There are multiple ways to retrieve data from catalogs other than From 6366cce5340eee6b1b5034704d1c698c9181e3fa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 8 Aug 2025 08:45:31 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- README.md | 56 +++++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index e29d139..369bad3 100644 --- a/README.md +++ b/README.md @@ -121,9 +121,9 @@ your file system. ### Filter data -It is possible restrict the download using filter and select statements. +It is possible restrict the download using filter and select statements. This may shorten the download time considerably. -Filtering can only be done on columns available in the metadata. +Filtering can only be done on columns available in the metadata. For example, for the dataset with id `71509ENG` only the columns `FruitFarmingRegions` and `Periods` are available for filtering: ``` python @@ -136,18 +136,18 @@ be found as follows: ``` python >>> cbsodata.get_meta("71509ENG", "Periods") -[{'Key': '1997JJ00', - 'Title': '1997', - 'Description': None, - 'Status': 'Definitief'}, - {'Key': '1998JJ00', - 'Title': '1998', - 'Description': None, - 'Status': 'Definitief'}, - {'Key': '1999JJ00', - 'Title': '1999', - 'Description': None, - 'Status': 'Definitief'}, +[{'Key': '1997JJ00', + 'Title': '1997', + 'Description': None, + 'Status': 'Definitief'}, + {'Key': '1998JJ00', + 'Title': '1998', + 'Description': None, + 'Status': 'Definitief'}, + {'Key': '1999JJ00', + 'Title': '1999', + 'Description': None, + 'Status': 'Definitief'}, # ... ] ``` @@ -158,25 +158,25 @@ The code to download the data for fruit farming regions for the years 2000 and 2 >>> cbsodata.get_data( ... table_id="71509ENG", ... filters="Periods eq '2010JJ00' or substringof('2000', Periods)", -... select=["FruitFarmingRegions", "Periods", "TotalAppleVarieties_1"] +... select=["FruitFarmingRegions", "Periods", "TotalAppleVarieties_1"] ... ) -[{'FruitFarmingRegions': 'Total Netherlands', - 'Periods': '2000', - 'TotalAppleVarieties_1': 461}, - {'FruitFarmingRegions': 'Total Netherlands', - 'Periods': '2010', - 'TotalAppleVarieties_1': 334}, - {'FruitFarmingRegions': 'Region North', - 'Periods': '2000', - 'TotalAppleVarieties_1': 87}, - {'FruitFarmingRegions': 'Region North', - 'Periods': '2010', - 'TotalAppleVarieties_1': 49}, +[{'FruitFarmingRegions': 'Total Netherlands', + 'Periods': '2000', + 'TotalAppleVarieties_1': 461}, + {'FruitFarmingRegions': 'Total Netherlands', + 'Periods': '2010', + 'TotalAppleVarieties_1': 334}, + {'FruitFarmingRegions': 'Region North', + 'Periods': '2000', + 'TotalAppleVarieties_1': 87}, + {'FruitFarmingRegions': 'Region North', + 'Periods': '2010', + 'TotalAppleVarieties_1': 49}, # ... ] ``` -As another example, the code below downloads the number of inhabitants in the Dutch municipalities - +As another example, the code below downloads the number of inhabitants in the Dutch municipalities - ignoring all other characteristics and data on neighbourhood level. ``` python