From 192e857e64fc3e3c23b7378837ec1243642f6f8c Mon Sep 17 00:00:00 2001 From: FeLoe Date: Wed, 21 Feb 2024 11:59:40 +0100 Subject: [PATCH] Update chapter06.qmd Set regex to True (otherwise pandas uses str.replace() instead of re.sub(), at least in Jupyter notebooks and the code does not work) --- content/chapter06.qmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/chapter06.qmd b/content/chapter06.qmd index 1556ee5..4d1b1f1 100644 --- a/content/chapter06.qmd +++ b/content/chapter06.qmd @@ -194,7 +194,7 @@ d2 = pd.read_csv(url) # Option 1: clean with direct assignment # Note that when creating a new column, # you have to use df["col"] rather than df.col -d2["rep2"] = d2.rep.str.replace("[^0-9\\.]", "") +d2["rep2"] = d2.rep.str.replace("[^0-9\\.]", "", regex=True) d2["rep2"] = pd.to_numeric(d2.rep2, errors='coerce') d2["Support2"] = d2.Support.fillna(d2.Support.mean()) @@ -202,7 +202,7 @@ d2["Support2"] = d2.Support.fillna(d2.Support.mean()) # Note the need to use an anonymous function # (lambda) to chain calculations cleaned = d2.assign( - rep2=d2.rep.str.replace("[^0-9\\.]", ""), + rep2=d2.rep.str.replace("[^0-9\\.]", "", regex=True), rep3=lambda d2: pd.to_numeric(d2.rep2, errors='coerce'), Support2=d2.Support.fillna(d2.Support.mean()), )