diff --git a/.gitignore b/.gitignore index 70fe53c..1ffa98f 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,9 @@ ___pycache__ logs/ output/ sfs-output/ -data/kommande.yaml \ No newline at end of file +data/kommande.yaml# Log files from law name matching +logs/missing_law_names.txt +logs/found_law_matches.json +logs/varningar_sammanfattning.txt +logs/html_conversion_latest.log +logs/add_law_names_output.txt diff --git a/data/law-names.json b/data/law-names.json index 686a0e2..17b501b 100644 --- a/data/law-names.json +++ b/data/law-names.json @@ -93,6 +93,12 @@ ], "category": "Speciell avtalsrätt" }, + { + "id": "1941:967", + "name": "värnpliktslagen", + "type": "law", + "alternativeNames": [] + }, { "id": "1942:740", "name": "rättegångsbalken", @@ -128,6 +134,12 @@ ], "category": "Familjerätt" }, + { + "id": "1955:183", + "name": "bankrörelselagen", + "type": "law", + "alternativeNames": [] + }, { "id": "1957:259", "name": "", @@ -177,6 +189,12 @@ ], "category": "Immaterialrätt" }, + { + "id": "1962:661", + "name": "cialförsäkringsbalken", + "type": "regulation", + "alternativeNames": [] + }, { "id": "1962:700", "name": "brottsbalken", @@ -201,12 +219,30 @@ "alternativeNames": [], "category": "Sakrätt" }, + { + "id": "1964:822", + "name": "naturvårdslagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "1965:745", + "name": "studiemedelsförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "1966:209", "name": "virkesmätningslagen", "type": "law", "alternativeNames": [] }, + { + "id": "1967:270", + "name": "skolförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "1967:837", "name": "patentlagen", @@ -416,6 +452,12 @@ "type": "law", "alternativeNames": [] }, + { + "id": "1973:531", + "name": "bostadssaneringslagen", + "type": "law", + "alternativeNames": [] + }, { "id": "1974:152", "name": "regeringsformen", @@ -512,6 +554,12 @@ "type": "law", "alternativeNames": [] }, + { + "id": "1975:927", + "name": "fartygsregisterförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "1976:19", "name": "lagföringslagen", @@ -608,6 +656,12 @@ ], "category": "Skatterätt" }, + { + "id": "1979:1196", + "name": "jordbruksbokföringslagen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "1979:189", "name": "bötesverkställighetslagen", @@ -626,6 +680,12 @@ "type": "law", "alternativeNames": [] }, + { + "id": "1979:412", + "name": "indelningslagen", + "type": "law", + "alternativeNames": [] + }, { "id": "1979:429", "name": "skogsvårdslagen", @@ -633,6 +693,12 @@ "alternativeNames": [], "category": "Speciell förvaltningsrätt" }, + { + "id": "1979:748", + "name": "fondkommissionslagen", + "type": "law", + "alternativeNames": [] + }, { "id": "1980:100", "name": "", @@ -647,6 +713,12 @@ "type": "law", "alternativeNames": [] }, + { + "id": "1980:565", + "name": "ningslagen", + "type": "law", + "alternativeNames": [] + }, { "id": "1980:657", "name": "", @@ -662,6 +734,12 @@ "alternativeNames": [], "category": "Fordringsrätt" }, + { + "id": "1981:1361", + "name": "hemförsäljningslagen", + "type": "law", + "alternativeNames": [] + }, { "id": "1981:774", "name": "utsökningsbalken", @@ -708,12 +786,48 @@ ], "category": "Arbetsrätt" }, + { + "id": "1983:447", + "name": "kommunalskattelagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "1983:850", + "name": "uppbördslagen", + "type": "law", + "alternativeNames": [] + }, { "id": "1983:929", "name": "mönstringslagen", "type": "law", "alternativeNames": [] }, + { + "id": "1983:945", + "name": "vattenlagen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "1984:110", + "name": "omplaceringsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "1984:1118", + "name": "vuxenutbildningslagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "1984:272", + "name": "civilförsvarslagen", + "type": "law", + "alternativeNames": [] + }, { "id": "1984:292", "name": "", @@ -761,6 +875,12 @@ "VitesL" ] }, + { + "id": "1985:288", + "name": "vuxenutbildningsförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "1985:716", "name": "konsumenttjänstlagen", @@ -770,6 +890,42 @@ ], "category": "Speciell avtalsrätt" }, + { + "id": "1985:894", + "name": "kommunalförbundslagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "1986:300", + "name": "sjötrafikförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "1986:357", + "name": "tullförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "1986:617", + "name": "utsökningslagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "1986:678", + "name": "utsökningsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "1987:1068", + "name": "tulltaxeförordningen", + "type": "law", + "alternativeNames": [] + }, { "id": "1987:11", "name": "", @@ -793,6 +949,12 @@ "type": "law", "alternativeNames": [] }, + { + "id": "1987:618", + "name": "bankaktiebolagslagen", + "type": "law", + "alternativeNames": [] + }, { "id": "1987:619", "name": "sparbankslagen", @@ -800,6 +962,12 @@ "alternativeNames": [], "category": "Ekonomisk rätt" }, + { + "id": "1987:620", + "name": "föreningsbankslagen", + "type": "law", + "alternativeNames": [] + }, { "id": "1987:672", "name": "konkurslagen", @@ -809,6 +977,12 @@ ], "category": "Processrätt" }, + { + "id": "1987:743", + "name": "gymnasieförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "1987:822", "name": "", @@ -817,6 +991,12 @@ "CISG" ] }, + { + "id": "1988:180", + "name": "församlingslagen", + "type": "law", + "alternativeNames": [] + }, { "id": "1988:220", "name": "strålskyddslagen", @@ -845,6 +1025,18 @@ "LVM" ] }, + { + "id": "1988:950", + "name": "kulturmiljölagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "1989:844", + "name": "aktiekontolagen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "1989:978", "name": "prisregleringslagen", @@ -857,6 +1049,12 @@ "type": "law", "alternativeNames": [] }, + { + "id": "1990:137", + "name": "tullregisterlagen", + "type": "law", + "alternativeNames": [] + }, { "id": "1990:324", "name": "taxeringslagen", @@ -873,6 +1071,12 @@ "LVU" ] }, + { + "id": "1990:524", + "name": "presstödsförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "1990:746", "name": "", @@ -932,6 +1136,18 @@ ], "category": "Statsrätt" }, + { + "id": "1991:1559", + "name": "grundlagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "1991:1870", + "name": "sjömansskattelagen", + "type": "law", + "alternativeNames": [] + }, { "id": "1991:45", "name": "minerallagen", @@ -968,6 +1184,12 @@ "BRL" ] }, + { + "id": "1991:742", + "name": "byggnadslagen", + "type": "law", + "alternativeNames": [] + }, { "id": "1991:900", "name": "kommunallagen", @@ -1021,12 +1243,24 @@ "Avgift" ] }, + { + "id": "1992:300", + "name": "kyrkolagen", + "type": "law", + "alternativeNames": [] + }, { "id": "1992:497", "name": "lönegarantilagen", "type": "law", "alternativeNames": [] }, + { + "id": "1993:1199", + "name": "fastighetstaxeringsförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "1993:1617", "name": "ordningslagen", @@ -1076,6 +1310,12 @@ "SjöL" ] }, + { + "id": "1994:1194", + "name": "dskoleförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "1994:1220", "name": "stiftelselagen", @@ -1083,6 +1323,14 @@ "alternativeNames": [], "category": "Associationsrätt" }, + { + "id": "1994:1512", + "name": "", + "type": "law", + "alternativeNames": [ + "AVLK" + ] + }, { "id": "1994:193", "name": "", @@ -1124,14 +1372,6 @@ "BVL" ] }, - { - "id": "1994:1512", - "name": "", - "type": "law", - "alternativeNames": [ - "AVLK" - ] - }, { "id": "1995:1000", "name": "pantbankslagen", @@ -1147,6 +1387,12 @@ "VerksF" ] }, + { + "id": "1995:1325", + "name": "beslutsordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "1995:1554", "name": "årsredovisningslagen", @@ -1156,6 +1402,18 @@ ], "category": "Associationsrätt" }, + { + "id": "1995:1559", + "name": "värdepappersbolagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "1995:206", + "name": "särskoleförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "1995:400", "name": "fastighetsmäklarlagen", @@ -1163,6 +1421,12 @@ "alternativeNames": [], "category": "Sakrätt" }, + { + "id": "1995:401", + "name": "specialskoleförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "1995:584", "name": "föräldraledighetslagen", @@ -1170,13 +1434,6 @@ "alternativeNames": [], "category": "Familjerätt" }, - { - "id": "1996:242", - "name": "ärendelagen", - "type": "law", - "alternativeNames": [], - "category": "Processrätt" - }, { "id": "1996:1596", "name": "bibliotekslagen", @@ -1189,6 +1446,13 @@ "type": "law", "alternativeNames": [] }, + { + "id": "1996:242", + "name": "ärendelagen", + "type": "law", + "alternativeNames": [], + "category": "Processrätt" + }, { "id": "1996:627", "name": "säkerhetsskyddslagen", @@ -1375,6 +1639,18 @@ "alternativeNames": [], "category": "Speciell förvaltningsrätt" }, + { + "id": "2000:1086", + "name": "insiderlagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2000:1101", + "name": "marknadsmissbruksförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "2000:1281", "name": "tullagen", @@ -1390,6 +1666,12 @@ ], "category": "Skatterätt" }, + { + "id": "2001:1113", + "name": "socialförsäkrings-balken", + "type": "law", + "alternativeNames": [] + }, { "id": "2001:1227", "name": "", @@ -1427,6 +1709,12 @@ "alternativeNames": [], "category": "Speciell förvaltningsrätt" }, + { + "id": "2002:599", + "name": "rättegångbalken", + "type": "law", + "alternativeNames": [] + }, { "id": "2003:234", "name": "", @@ -1456,6 +1744,12 @@ "LEK" ] }, + { + "id": "2003:438", + "name": "fartygssäkerhetsförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "2003:778", "name": "", @@ -1480,6 +1774,12 @@ "alternativeNames": [], "category": "Marknadsrätt" }, + { + "id": "2004:368", + "name": "bolagsförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "2004:451", "name": "produktsäkerhetslagen", @@ -1494,6 +1794,12 @@ "alternativeNames": [], "category": "Speciell förvaltningsrätt" }, + { + "id": "2004:660", + "name": "vattenförvaltningsförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "2005:104", "name": "försäkringsavtalslagen", @@ -1518,6 +1824,12 @@ ], "category": "Associationsrätt" }, + { + "id": "2005:559", + "name": "aktieboken", + "type": "regulation", + "alternativeNames": [] + }, { "id": "2005:59", "name": "distansavtalslagen", @@ -1570,6 +1882,12 @@ "type": "law", "alternativeNames": [] }, + { + "id": "2007:224", + "name": "högskoleförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "2007:612", "name": "bidragsbrottslagen", @@ -1577,6 +1895,12 @@ "alternativeNames": [], "category": "Straffrätt" }, + { + "id": "2007:90", + "name": "vägmärkesförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "2008:263", "name": "fjärrvärmelagen", @@ -1618,6 +1942,18 @@ "type": "law", "alternativeNames": [] }, + { + "id": "2009:1165", + "name": "barlastvattenlagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2009:212", + "name": "militärtrafikförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "2009:287", "name": "studiestödsdatalagen", @@ -1673,21 +2009,16 @@ "category": "Speciell förvaltningsrätt" }, { - "id": "2010:610", - "name": "fängelselagen", - "type": "law", - "alternativeNames": [], - "category": "Straffrätt" + "id": "2010:1369", + "name": "patientsäkerhetsförordningen", + "type": "regulation", + "alternativeNames": [] }, { - "id": "2010:900", - "name": "plan- och bygglagen", - "type": "law", - "alternativeNames": [ - "PBL", - "bygglagen" - ], - "category": "Allmän förvaltningsrätt" + "id": "2010:1579", + "name": "arbetsordningen", + "type": "regulation", + "alternativeNames": [] }, { "id": "2010:1622", @@ -1704,6 +2035,53 @@ ], "category": "Allmän avtalsrätt" }, + { + "id": "2010:2010", + "name": "fängelseförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2010:2043", + "name": "ringsrörelselagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2010:610", + "name": "fängelselagen", + "type": "law", + "alternativeNames": [], + "category": "Straffrätt" + }, + { + "id": "2010:611", + "name": "häkteslagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2010:659", + "name": "patientlagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2010:770", + "name": "luftfartsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2010:900", + "name": "plan- och bygglagen", + "type": "law", + "alternativeNames": [ + "PBL", + "bygglagen" + ], + "category": "Allmän förvaltningsrätt" + }, { "id": "2011:1244", "name": "skatteförfarandelagen", @@ -1713,6 +2091,102 @@ ], "category": "Skatterätt" }, + { + "id": "2011:1261", + "name": "skatteförfarandeförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2011:13", + "name": "tillsynsboken", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2011:131", + "name": "sameskolförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2011:203", + "name": "budgetlagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2011:210", + "name": "kapitalförsörjningsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2011:257", + "name": "försäkringsrörelseförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2011:594", + "name": "varumärkesförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2011:668", + "name": "fastighetsboken", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2011:927", + "name": "avfallsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2012:145", + "name": "kustbevakningsdatalagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2012:179", + "name": "strafflagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2012:211", + "name": "taxitrafiklagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2012:237", + "name": "trafikförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2012:238", + "name": "taxitrafikförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2013:250", + "name": "industriutsläppsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2013:251", + "name": "miljöprövningsförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "2013:561", "name": "", @@ -1728,6 +2202,18 @@ "alternativeNames": [], "category": "Speciell förvaltningsrätt" }, + { + "id": "2015:433", + "name": "åklagardatalagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2015:503", + "name": "anställningsförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "2016:1145", "name": "", @@ -1736,6 +2222,18 @@ "LOU" ] }, + { + "id": "2016:30", + "name": "utlänningsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2016:964", + "name": "konkurrensskadelagen", + "type": "law", + "alternativeNames": [] + }, { "id": "2017:30", "name": "", @@ -1744,6 +2242,24 @@ "HSL" ] }, + { + "id": "2017:447", + "name": "tullbrottsdatalagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2017:452", + "name": "skattebrottsdatalagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2017:808", + "name": "vapenförordningen", + "type": "regulation", + "alternativeNames": [] + }, { "id": "2017:900", "name": "förvaltningslagen", @@ -1753,6 +2269,43 @@ ], "category": "Allmän förvaltningsrätt" }, + { + "id": "2017:966", + "name": "miljöbedömningsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2018:1174", + "name": "cybersäkerhetslagen", + "type": "law", + "alternativeNames": [], + "category": "IT-rätt" + }, + { + "id": "2018:1202", + "name": "brottsdataförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2018:1212", + "name": "läkemedelsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2018:1251", + "name": "strafftidslagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2018:1696", + "name": "brottsdatalagen", + "type": "law", + "alternativeNames": [] + }, { "id": "2018:218", "name": "dataskyddslagen", @@ -1763,10 +2316,159 @@ "category": "IT-rätt" }, { - "id": "2018:1174", - "name": "cybersäkerhetslagen", + "id": "2018:506", + "name": "strålskyddsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2018:597", + "name": "munallagen", "type": "law", - "alternativeNames": [], - "category": "IT-rätt" + "alternativeNames": [] + }, + { + "id": "2019:313", + "name": "avgiftsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2019:369", + "name": "vägtrafikdatalagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2019:382", + "name": "vägtrafikdataförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2019:383", + "name": "fordonsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2019:66", + "name": "djurskyddsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2019:95", + "name": "strafftidsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2020:486", + "name": "miljö-balken", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2020:833", + "name": "skoleförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2021:955", + "name": "säkerhetsskyddsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2022:1718", + "name": "polisförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2022:365", + "name": "järnvägsmarknadslagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2022:366", + "name": "järnvägstekniklagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2022:367", + "name": "järnvägssäkerhetslagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2022:418", + "name": "järnvägsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2023:38", + "name": "biobankslagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2023:455", + "name": "betalningslagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2024:1020", + "name": "patentförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2024:1082", + "name": "rörelselagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2024:1089", + "name": "klampningslagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2024:710", + "name": "tullbefogenhetslagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2024:759", + "name": "tullbefogenhetsförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2024:782", + "name": "förfarandelagen", + "type": "law", + "alternativeNames": [] + }, + { + "id": "2025:468", + "name": "socialtjänstförordningen", + "type": "regulation", + "alternativeNames": [] + }, + { + "id": "2025:671", + "name": "studiestödsförordningen", + "type": "regulation", + "alternativeNames": [] } ] \ No newline at end of file diff --git a/scripts/add_law_names.py b/scripts/add_law_names.py new file mode 100644 index 0000000..5d82a52 --- /dev/null +++ b/scripts/add_law_names.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 +""" +Script för att lägga till matchade lagnamn i law-names.json med förbättrad filtrering +""" + +import json +from pathlib import Path +from collections import defaultdict + +def is_valid_law_type(rubrik, forfattningstyp): + """ + Kontrollera om detta är en giltig huvudlag/huvudförordning. + Filtrera bort införandelagar, kungörelser, etc. + """ + rubrik_lower = rubrik.lower() + + # Filtrera bort dåliga typer + bad_patterns = [ + 'kungörelse', + 'cirkulär', + 'brev', + 'tillkännagivande', + 'införande av', + 'om införande', + 'tillämpning av', + 'om tillämpning', + 'ändr', # ändringsförfattningar + 'ändring i', + 'om ändring', + ] + + for pattern in bad_patterns: + if pattern in rubrik_lower: + return False + + # Acceptera endast Lag och Förordning + if forfattningstyp not in ['Lag', 'Förordning']: + return False + + return True + +def score_match(law_name, match): + """ + Ge poäng till en matchning baserat på hur bra den är. + Högre poäng = bättre matchning + """ + score = 0 + rubrik = match['rubrik'] + rubrik_lower = rubrik.lower() + law_name_lower = law_name.lower() + search_term = match['search_term'] + + # Stor bonus om exakt lagnamn finns i rubriken + if law_name_lower in rubrik_lower: + score += 30 + + # Bonus om författningstypen matchar lagnamnet + if 'lag' in law_name_lower and match['forfattningstyp'] == 'Lag': + score += 15 + elif 'förordning' in law_name_lower and match['forfattningstyp'] == 'Förordning': + score += 15 + + # Stor bonus om lagnamnet är i början av rubriken (efter författningstyp och nummer) + # T.ex. "Förordning (2019:66) Djurskyddsförordning" + if rubrik_lower.find(law_name_lower) < 50: + score += 10 + + # Bonus om rubriken är enkel och ren (troligen huvudlagen) + # T.ex. "Djurskyddslag (2018:1192)" är bättre än "Förordning om tillämpning av..." + if len(rubrik) < 100 and search_term in rubrik_lower[:50]: + score += 10 + + # Bonus för nyare lagar + try: + year = int(match['id'].split(':')[0]) + if year >= 2010: + score += 5 + elif year >= 2000: + score += 3 + elif year >= 1990: + score += 2 + except: + pass + + # Minus om söktermen är väldigt kort (risk för false positive) + if len(search_term) < 6: + score -= 5 + + return score + +def select_best_matches(matches_file): + """ + Välj den bästa matchningen för varje lagnamn med förbättrad filtrering + """ + with open(matches_file, 'r', encoding='utf-8') as f: + all_matches = json.load(f) + + best_matches = {} + skipped_names = defaultdict(list) + + for law_name, match_list in all_matches.items(): + if not match_list: + continue + + # Filtrera bort dåliga matchningar + filtered = [] + for match in match_list: + # Skippa om söktermen är för kort och lagnamnet inte finns exakt + if len(match['search_term']) < 5 and law_name.lower() not in match['rubrik'].lower(): + skipped_names[law_name].append(('short_search_term', match)) + continue + + # Skippa om det inte är en giltig lagtyp + if not is_valid_law_type(match['rubrik'], match['forfattningstyp']): + skipped_names[law_name].append(('invalid_type', match)) + continue + + filtered.append(match) + + if not filtered: + continue + + # Beräkna poäng för varje matchning + scored_matches = [] + for match in filtered: + score = score_match(law_name, match) + # Behåll endast matchningar med positiv poäng + if score > 10: + scored_matches.append((score, match)) + + if not scored_matches: + continue + + # Sortera efter poäng (högst först) + scored_matches.sort(key=lambda x: x[0], reverse=True) + + # Ta den bästa matchningen + best_score, best_match = scored_matches[0] + best_matches[law_name] = { + 'match': best_match, + 'score': best_score, + 'alternatives': len(scored_matches) + } + + return best_matches, skipped_names + +def add_to_law_names(best_matches, law_names_file, output_file): + """ + Lägg till de bästa matchningarna i law-names.json + """ + # Läs in befintliga lagnamn + with open(law_names_file, 'r', encoding='utf-8') as f: + existing_laws = json.load(f) + + # Skapa en uppslagslista för befintliga ID:n och namn + existing_ids = {law['id'] for law in existing_laws} + existing_names = {law['name'].lower() for law in existing_laws if law.get('name')} + + # Lägg till nya matchningar + added_count = 0 + skipped_count = 0 + + for law_name, match_info in best_matches.items(): + match = match_info['match'] + law_id = match['id'] + + # Skippa om ID:t redan finns + if law_id in existing_ids: + skipped_count += 1 + continue + + # Skippa om namnet redan finns + if law_name.lower() in existing_names: + skipped_count += 1 + continue + + # Skapa ny post (utan rubrik för att matcha befintligt format) + new_entry = { + 'id': law_id, + 'name': law_name, + 'type': 'law' if match['forfattningstyp'] == 'Lag' else 'regulation', + 'alternativeNames': [] + } + + existing_laws.append(new_entry) + existing_ids.add(law_id) + existing_names.add(law_name.lower()) + added_count += 1 + + # Sortera efter ID + existing_laws.sort(key=lambda x: x['id']) + + # Spara uppdaterad fil + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(existing_laws, f, indent=2, ensure_ascii=False) + + return added_count, skipped_count, len(existing_laws) + +def main(): + matches_file = Path("logs/found_law_matches.json") + law_names_file = Path("data/law-names.json") + output_file = Path("data/law-names-updated.json") + + print("Analyserar matchningar med förbättrad filtrering...") + best_matches, skipped = select_best_matches(matches_file) + + print(f"\nValde {len(best_matches)} bästa matchningar") + print(f"Filtrerade bort matchningar för {len(skipped)} lagnamn") + + # Visa de 30 bästa matchningarna + print("\nTopp 30 matchningar (sorterade efter poäng):") + print("=" * 80) + + sorted_matches = sorted(best_matches.items(), key=lambda x: x[1]['score'], reverse=True) + for law_name, info in sorted_matches[:30]: + match = info['match'] + print(f"\n{law_name} (poäng: {info['score']}, alternativ: {info['alternatives']})") + print(f" -> {match['id']}: {match['rubrik'][:70]}...") + + # Lägg till i law-names.json + print("\n" + "=" * 80) + print("Lägger till i law-names.json...") + + added, skipped_count, total = add_to_law_names(best_matches, law_names_file, output_file) + + print(f"\nResultat:") + print(f" Tillagda: {added}") + print(f" Hoppade över (finns redan eller duplicerat namn): {skipped_count}") + print(f" Totalt i fil: {total}") + print(f"\nUppdaterad fil sparad som: {output_file}") + +if __name__ == "__main__": + main() diff --git a/scripts/find_missing_law_names.py b/scripts/find_missing_law_names.py new file mode 100644 index 0000000..22344af --- /dev/null +++ b/scripts/find_missing_law_names.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +""" +Script för att hitta och lägga till saknade lagnamn i law-names.json +genom att söka i alla JSON-filer från SFS-databasen. +""" + +import json +import re +from pathlib import Path +from collections import defaultdict + +def normalize_law_name(name): + """Normalisera lagnamn för bättre matchning""" + if not name: + return "" + # Ta bort extra whitespace och konvertera till lowercase + name = name.lower().strip() + # Ta bort bindestreck som kan vara felstavningar + name = name.replace('-', '') + return name + +def extract_search_term(law_name): + """ + Extrahera sökterm från ett lagnamn. + T.ex. "brottsdatalagen" -> "brottsdata" + """ + # Ta bort vanliga suffix + suffixes = ['lagen', 'förordningen', 'balken', 'ordningen', 'boken'] + search_term = law_name.lower() + + for suffix in suffixes: + if search_term.endswith(suffix): + search_term = search_term[:-len(suffix)] + break + + return search_term.strip() + +def find_matching_laws(missing_names, json_dir): + """ + Sök genom alla JSON-filer och hitta lagar som matchar de saknade namnen + """ + json_files = list(Path(json_dir).glob("*.json")) + matches = defaultdict(list) + + print(f"Söker genom {len(json_files)} JSON-filer...") + + processed = 0 + for json_file in json_files: + try: + with open(json_file, 'r', encoding='utf-8') as f: + data = json.load(f) + + # Kontrollera rubrik + rubrik = data.get('rubrik', '').lower() + beteckning = data.get('beteckning', '') + + if not rubrik or not beteckning: + continue + + # För varje saknat lagnamn, kolla om det matchar + for missing_name in missing_names: + search_term = extract_search_term(missing_name) + + # Skippa för korta söktermer (för många false positives) + if len(search_term) < 4: + continue + + # Kolla om söktermen finns i rubriken + if search_term in rubrik: + matches[missing_name].append({ + 'id': beteckning, + 'rubrik': data.get('rubrik', '').strip(), + 'search_term': search_term, + 'forfattningstyp': data.get('forfattningstypNamn', ''), + 'file': json_file.name + }) + + processed += 1 + if processed % 1000 == 0: + print(f"Bearbetat {processed}/{len(json_files)} filer...") + + except (json.JSONDecodeError, IOError) as e: + print(f"Fel vid läsning av {json_file}: {e}") + continue + + return matches + +def main(): + # Läs in saknade lagnamn + missing_file = Path("logs/missing_law_names.txt") + with open(missing_file, 'r', encoding='utf-8') as f: + missing_names = [line.strip() for line in f if line.strip()] + + print(f"Hittade {len(missing_names)} saknade lagnamn") + + # Sök i JSON-katalogen + json_dir = Path("../sfs-jsondata") + if not json_dir.exists(): + print(f"Fel: Katalogen {json_dir} finns inte!") + return + + matches = find_matching_laws(missing_names, json_dir) + + print(f"\nHittade matchningar för {len(matches)} lagnamn:") + print("=" * 80) + + # Spara resultaten + output_file = Path("logs/found_law_matches.json") + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(matches, f, indent=2, ensure_ascii=False) + + print(f"\nResultat sparat till: {output_file}") + + # Visa statistik + total_matches = sum(len(v) for v in matches.values()) + print(f"\nTotalt: {total_matches} matchningar för {len(matches)} unika lagnamn") + + # Visa några exempel + print("\nExempel på matchningar:") + print("=" * 80) + for name, law_list in list(matches.items())[:15]: + if law_list: + print(f"\n'{name}' (sökterm: '{extract_search_term(name)}'):") + for law in law_list[:2]: # Visa max 2 matchningar per namn + print(f" - {law['id']}: {law['rubrik'][:80]}...") + +if __name__ == "__main__": + main()