diff --git a/.github/workflows/check_jupyterbook.yml b/.github/workflows/check_jupyterbook.yml index 58e2afe..51311b0 100644 --- a/.github/workflows/check_jupyterbook.yml +++ b/.github/workflows/check_jupyterbook.yml @@ -21,7 +21,7 @@ jobs: - name: Build # Build Jupyter Book shell: bash -l {0} run: | - pip install jupyter-book + pip install "jupyter-book<2.0.0" pip install -e . python -m ipykernel install --user --name=taxbrain-dev cd docs diff --git a/.github/workflows/deploy_jupyterbook.yml b/.github/workflows/deploy_jupyterbook.yml index 4e23609..fcc53b4 100644 --- a/.github/workflows/deploy_jupyterbook.yml +++ b/.github/workflows/deploy_jupyterbook.yml @@ -24,7 +24,7 @@ jobs: - name: Build # Build Jupyter Book shell: bash -l {0} run: | - pip install jupyter-book + pip install "jupyter-book<2.0.0" pip install -e . python -m ipykernel install --user --name=taxbrain-dev cd docs diff --git a/cs-config/cs_config/functions.py b/cs-config/cs_config/functions.py index 65d3c77..a9a3018 100644 --- a/cs-config/cs_config/functions.py +++ b/cs-config/cs_config/functions.py @@ -154,7 +154,7 @@ def run_model(meta_params_dict, adjustment): sampling_seed = 180 full_sample = pd.read_csv(input_path) data_start_year = taxcalc.Records.CPSCSV_YEAR - weights = taxcalc.Records.CPS_WEIGHTS_FILENAME + weights = os.path.join(taxcalc.Records.CODE_PATH, "cps_weights.csv.gz") else: raise ValueError( f"Data source '{meta_params.data_source}' is not supported." diff --git a/cs-config/cs_config/helpers.py b/cs-config/cs_config/helpers.py index 2be8769..d22a18b 100644 --- a/cs-config/cs_config/helpers.py +++ b/cs-config/cs_config/helpers.py @@ -306,7 +306,7 @@ def arbitrary_defaultdict(): ) # format table for col in tbl.columns: - tbl.update(tbl[col].apply("${:,.2f}".format)) + tbl[col] = tbl[col].apply("${:,.2f}".format) title = RESULTS_TABLE_TITLES[id] tags = RESULTS_TABLE_TAGS[id] diff --git a/environment.yml b/environment.yml index 51acf0d..5476c7c 100644 --- a/environment.yml +++ b/environment.yml @@ -3,7 +3,7 @@ channels: - conda-forge dependencies: - python>=3.6.5 -- taxcalc>=3.0.0 +- taxcalc>=6.0.0 - behresp>=0.11.0 - pandas>=0.23 - numpy>=1.14 diff --git a/taxbrain/report.py b/taxbrain/report.py index 70ca961..e5113e6 100644 --- a/taxbrain/report.py +++ b/taxbrain/report.py @@ -92,9 +92,9 @@ def format_table(df, int_cols, float_cols, float_perc=2): table of output """ for col in int_cols: - df.update(df[col].astype(int).apply("{:,}".format)) + df[col] = df[col].astype(int).apply("{:,}".format) for col in float_cols: - df.update( + df[col] = ( df[col] .astype(float) .apply("{:,.{}}".format, args=(float_perc,)) @@ -124,14 +124,14 @@ def export_plot(plot, graph): full_filename = Path(output_path, filename) plot.savefig(full_filename, dpi=1200, bbox_inches="tight") - return str(full_filename) + return filename if not tb.has_run: tb.run() if not name: name = f"Policy Report-{date()}" if not outdir: - outdir = name.replace(" ", "_") + outdir = name.replace(" ", "-").replace(",", "") if author: author = f"Report Prepared by {author.title()}" # create directory to hold report contents @@ -189,9 +189,10 @@ def export_plot(plot, graph): # create differences table if verbose: print("Creating differences table") - diff_table = tb.differences_table( - tb.start_year, "standard_income_bins", "combined" - ).fillna(0) + with pd.option_context("future.no_silent_downcasting", True): + diff_table = tb.differences_table( + tb.start_year, "standard_income_bins", "combined" + ).fillna(0) diff_table.index = DIFF_TABLE_ROW_NAMES decile_diff_table = tb.differences_table( @@ -306,7 +307,7 @@ def export_plot(plot, graph): report_md = write_text(template_path, **text_args) # write PDF, markdown files - filename = name.replace(" ", "-") + filename = name.replace(" ", "-").replace(",", "") pdf_path = Path(output_path, f"{filename}.pdf") md_path = Path(output_path, f"{filename}.md") md_path.write_text(report_md) diff --git a/taxbrain/taxbrain.py b/taxbrain/taxbrain.py index 52d37c6..d256e81 100644 --- a/taxbrain/taxbrain.py +++ b/taxbrain/taxbrain.py @@ -15,6 +15,7 @@ from taxbrain.corporate_incidence import distribute as dist_corp from typing import Union from paramtools import ValidationError +from pathlib import Path class TaxBrain: @@ -30,6 +31,11 @@ class TaxBrain: "Behavioral-Responses": behresp.__version__, } + # add expected TMD filenames as constants + TMD_DATA_FILE = "tmd.csv.gz" + TMD_WEIGHTS_FILE = "tmd_weights.csv.gz" + TMD_GROWFACTORS_FILE = "tmd_growfactors.csv" + def __init__( self, start_year: int, @@ -684,14 +690,20 @@ def _make_calculators(self): if self.microdata == "CPS": records = tc.Records.cps_constructor(data=None, gfactors=gf_base) elif self.microdata == "PUF": - records = tc.Records( + records = tc.Records.puf_constructor( + data="puf.csv", gfactors=gf_base, - weights=tc.Records.PUF_WEIGHTS_FILENAME, + # weights=tc.Records.PUF_WEIGHTS_FILENAME, ) elif self.microdata == "TMD": + gf_base = tc.GrowFactors(self.TMD_GROWFACTORS_FILE) + if self.params["growdiff_baseline"]: + gd_base.apply_to(gf_base) + records = tc.Records.tmd_constructor( - "tmd.csv", - gfactors=gf_base, + data_path=Path(self.TMD_DATA_FILE), + weights_path=Path(self.TMD_WEIGHTS_FILE), + growfactors=gf_base, ) elif isinstance(self.microdata, dict): if self.microdata["growfactors"] is None: @@ -735,14 +747,20 @@ def _make_calculators(self): if self.microdata == "CPS": records = tc.Records.cps_constructor(data=None, gfactors=gf_reform) elif self.microdata == "PUF": - records = tc.Records( + records = tc.Records.puf_constructor( + data="puf.csv", gfactors=gf_reform, - weights=tc.Records.PUF_WEIGHTS_FILENAME, + # weights=tc.Records.PUF_WEIGHTS_FILENAME, ) elif self.microdata == "TMD": + gf_reform = tc.GrowFactors(self.TMD_GROWFACTORS_FILE) + if self.params["growdiff_response"]: + gd_reform.apply_to(gf_reform) + records = tc.Records.tmd_constructor( - "tmd.csv", - gfactors=gf_reform, + data_path=Path(self.TMD_DATA_FILE), + weights_path=Path(self.TMD_WEIGHTS_FILE), + growfactors=gf_reform, ) elif isinstance(self.microdata, dict): if self.microdata["growfactors"] is None: @@ -803,15 +821,20 @@ def _make_stacked_objects(self): if self.microdata == "CPS": records = tc.Records.cps_constructor(data=None, gfactors=gf_base) elif self.microdata == "PUF": - records = tc.Records( - "puf.csv", + records = tc.Records.puf_constructor( + data="puf.csv", gfactors=gf_base, - weights=tc.Records.PUF_WEIGHTS_FILENAME, + # weights=tc.Records.PUF_WEIGHTS_FILENAME, ) elif self.microdata == "TMD": + gf_base = tc.GrowFactors(self.TMD_GROWFACTORS_FILE) + if self.params["growdiff_baseline"]: + gd_base.apply_to(gf_base) + records = tc.Records.tmd_constructor( - "tmd.csv", - gfactors=gf_base, + data_path=Path(self.TMD_DATA_FILE), + weights_path=Path(self.TMD_WEIGHTS_FILE), + growfactors=gf_base, ) elif isinstance(self.microdata, dict): if self.microdata["growfactors"] is None: @@ -857,15 +880,20 @@ def _make_stacked_objects(self): data=None, gfactors=gf_reform ) elif self.microdata == "PUF": - reform_records = tc.Records( - "puf.csv", + reform_records = tc.Records.puf_constructor( + data="puf.csv", gfactors=gf_reform, - weights=tc.Records.PUF_WEIGHTS_FILENAME, + # weights=tc.Records.PUF_WEIGHTS_FILENAME, ) elif self.microdata == "TMD": - records = tc.Records.tmd_constructor( - "tmd.csv", - gfactors=gf_reform, + gf_reform = tc.GrowFactors(self.TMD_GROWFACTORS_FILE) + if self.params["growdiff_response"]: + gd_reform.apply_to(gf_reform) + + reform_records = tc.Records.tmd_constructor( + data_path=Path(self.TMD_DATA_FILE), + weights_path=Path(self.TMD_WEIGHTS_FILE), + growfactors=gf_reform, ) elif isinstance(self.microdata, dict): if self.microdata["growfactors"] is None: diff --git a/taxbrain/tests/conftest.py b/taxbrain/tests/conftest.py index 50f0e0c..56bf9e7 100644 --- a/taxbrain/tests/conftest.py +++ b/taxbrain/tests/conftest.py @@ -12,8 +12,8 @@ def reform_json_str(): reform = """ { "policy": { - "SS_thd50": {"2019": [50000, 100000, 50000, 50000, 50000]}, - "SS_thd85": {"2019": [50000, 100000, 50000, 50000, 50000]}, + "SS_thd1": {"2019": [50000, 100000, 50000, 50000, 50000]}, + "SS_thd2": {"2019": [50000, 100000, 50000, 50000, 50000]}, "SS_Earnings_thd": {"2019": 400000}, "FICA_ss_trt_employee": {"2020": 0.0625, "2021": 0.063, diff --git a/taxbrain/tests/test_brain.py b/taxbrain/tests/test_brain.py index 46c8fd0..dd5235f 100644 --- a/taxbrain/tests/test_brain.py +++ b/taxbrain/tests/test_brain.py @@ -139,6 +139,7 @@ def test_stacked_run_corporate(): def test_weighted_totals(tb_static): + tb_static.run() table = tb_static.weighted_totals("combined") assert isinstance(table, pd.DataFrame) # table.to_csv("expected_weighted_table.csv") @@ -165,6 +166,7 @@ def test_weighted_totals(tb_static): def test_multi_var_table(tb_dynamic): + tb_dynamic.run() with pytest.raises(ValueError): tb_dynamic.multi_var_table(["iitax"], "calc") with pytest.raises(TypeError): @@ -176,6 +178,7 @@ def test_multi_var_table(tb_dynamic): def test_differences_table(tb_dynamic): + tb_dynamic.run() table = tb_dynamic.differences_table(2018, "weighted_deciles", "combined") assert isinstance(table, pd.DataFrame) diff --git a/taxbrain/tests/test_corporate_incidence.py b/taxbrain/tests/test_corporate_incidence.py index 3c13119..9723bee 100644 --- a/taxbrain/tests/test_corporate_incidence.py +++ b/taxbrain/tests/test_corporate_incidence.py @@ -307,22 +307,22 @@ def test_validation(): # expected shares if full burden on shareholders: expected = pd.DataFrame( data=[ - 0.000031, - 0.0, - 0.001008, - 0.003991, - 0.003974, - 0.008655, - 0.013243, - 0.020796, - 0.027376, - 0.052368, - 0.077858, - 0.790699, - 1, - 0.074559, - 0.122593, - 0.593546, + 3.12755650e-05, + 0.00000000e00, + 9.41971262e-04, + 3.66821040e-03, + 3.90232598e-03, + 8.00940431e-03, + 1.28149782e-02, + 2.03518571e-02, + 2.64449904e-02, + 5.06202883e-02, + 7.93136449e-02, + 7.93901054e-01, + 1.00000000e00, + 7.38510850e-02, + 1.25578069e-01, + 5.94471900e-01, ], index=[ "0-10n", diff --git a/taxbrain/tests/test_utils.py b/taxbrain/tests/test_utils.py index 73e73e4..5c52082 100644 --- a/taxbrain/tests/test_utils.py +++ b/taxbrain/tests/test_utils.py @@ -3,16 +3,19 @@ def test_distribution_plot(tb_static): + tb_static.run() fig = taxbrain.distribution_plot(tb_static, 2019) def test_differences_plot(tb_static): + tb_static.run() fig = taxbrain.differences_plot(tb_static, "combined") with pytest.raises(AssertionError): taxbrain.differences_plot(tb_static, "wages") def test_volcano_plot(tb_static): + tb_static.run() fig = taxbrain.volcano_plot(tb_static, 2019) with pytest.raises(ValueError): taxbrain.volcano_plot(tb_static, 2019, min_y=-10000) @@ -27,10 +30,12 @@ def test_volcano_plot(tb_static): def test_lorenz_curve(tb_static): + tb_static.run() fig = taxbrain.lorenz_curve(tb_static, 2019) def test_revenue_plot(tb_static): + tb_static.run() fig = taxbrain.revenue_plot(tb_static) with pytest.raises(ValueError): taxbrain.revenue_plot(tb_static, tax_vars=["income", "combined"])