thisisparker · afontenot · May 31, 2025
diff --git a/xword_dl/downloader/amuniversaldownloader.py b/xword_dl/downloader/amuniversaldownloader.py
@@ -4,7 +4,6 @@
 import time
 import xml
 
-import puz
 import requests
 import xmltodict
 
@@ -60,26 +59,25 @@ def parse_xword(self, xw_data):
         for field in ['Title', 'Author', 'Editor', 'Copryight']:
             fetched[field] = unquote(xw_data.get(field, '')).strip()
 
-        puzzle = puz.Puzzle()
-        puzzle.title = fetched.get('Title', '')
-        puzzle.author = ''.join([fetched.get('Author', ''),
+        self.puzzle.title = fetched.get('Title', '')
+        self.puzzle.author = ''.join([fetched.get('Author', ''),
                                  ' / Ed. ',
                                  fetched.get('Editor', '')])
-        puzzle.copyright = fetched.get('Copyright', '')
-        puzzle.width = int(xw_data.get('Width'))
-        puzzle.height = int(xw_data.get('Height'))
+        self.puzzle.copyright = fetched.get('Copyright', '')
+        self.puzzle.width = int(xw_data.get('Width'))
+        self.puzzle.height = int(xw_data.get('Height'))
 
         solution = xw_data.get('AllAnswer').replace('-', '.')
 
-        puzzle.solution = solution
+        self.puzzle.solution = solution
 
         fill = ''
         for letter in solution:
             if letter == '.':
                 fill += '.'
             else:
                 fill += '-'
-        puzzle.fill = fill
+        self.puzzle.fill = fill
 
         across_clues = xw_data['AcrossClue'].splitlines()
         down_clues = self.process_clues(xw_data['DownClue'].splitlines())
@@ -93,9 +91,9 @@ def parse_xword(self, xw_data):
 
         clues = [clue['clue'] for clue in clues_sorted]
 
-        puzzle.clues = clues
+        self.puzzle.clues = clues
 
-        return puzzle
+        return self.puzzle
 
 # As of Sept 2023, the JSON data for USA Today is not consistently populated.
 # I'd rather use the JSON data if possible, but until that's sorted, we can
@@ -167,24 +165,22 @@ def parse_xword(self, xw_data):
         except (xml.parsers.expat.ExpatError, KeyError):
             raise XWordDLException('Puzzle data malformed, cannot parse.')
 
-        puzzle = puz.Puzzle()
+        self.puzzle.title = unquote(xw.get('Title',[]).get('@v') or '')
+        self.puzzle.author = unquote(xw.get('Author',[]).get('@v') or '')
+        self.puzzle.copyright = unquote(xw.get('Copyright',[]).get('@v') or '')
 
-        puzzle.title = unquote(xw.get('Title',[]).get('@v') or '')
-        puzzle.author = unquote(xw.get('Author',[]).get('@v') or '')
-        puzzle.copyright = unquote(xw.get('Copyright',[]).get('@v') or '')
+        self.puzzle.width = int(xw.get('Width')['@v'])
+        self.puzzle.height = int(xw.get('Height')['@v'])
 
-        puzzle.width = int(xw.get('Width')['@v'])
-        puzzle.height = int(xw.get('Height')['@v'])
-
-        puzzle.solution = xw.get('AllAnswer',[]).get('@v').replace('-', '.')
-        puzzle.fill = ''.join([c if c == '.' else '-' for c in puzzle.solution])
+        self.puzzle.solution = xw.get('AllAnswer',[]).get('@v').replace('-', '.')
+        self.puzzle.fill = ''.join([c if c == '.' else '-' for c in self.puzzle.solution])
 
         xw_clues = sorted(list(xw['across'].values()) + list(xw['down'].values()),
                           key=lambda c: int(c['@cn']))
 
-        puzzle.clues = [unquote(c.get('@c') or '') for c in xw_clues]
+        self.puzzle.clues = [unquote(c.get('@c') or '') for c in xw_clues]
 
-        return puzzle
+        return self.puzzle
 
 
 class UniversalDownloader(AMUniversalDownloader):

diff --git a/xword_dl/downloader/amuselabsdownloader.py b/xword_dl/downloader/amuselabsdownloader.py
@@ -3,7 +3,6 @@
 import json
 import urllib.parse
 
-import puz
 import requests
 
 import re
@@ -174,12 +173,11 @@ def fetch_data(self, solver_url):
         return xword_data
 
     def parse_xword(self, xw_data):
-        puzzle = puz.Puzzle()
-        puzzle.title = xw_data.get('title', '').strip()
-        puzzle.author = xw_data.get('author', '').strip()
-        puzzle.copyright = xw_data.get('copyright', '').strip()
-        puzzle.width = xw_data.get('w')
-        puzzle.height = xw_data.get('h')
+        self.puzzle.title = xw_data.get('title', '').strip()
+        self.puzzle.author = xw_data.get('author', '').strip()
+        self.puzzle.copyright = xw_data.get('copyright', '').strip()
+        self.puzzle.width = xw_data.get('w')
+        self.puzzle.height = xw_data.get('h')
 
         markup_data = xw_data.get('cellInfos', '')
 
@@ -215,8 +213,8 @@ def parse_xword(self, xw_data):
                     rebus_table += '{:2d}:{};'.format(rebus_index, unidecode(cell))
                     rebus_index += 1
 
-        puzzle.solution = solution
-        puzzle.fill = fill
+        self.puzzle.solution = solution
+        self.puzzle.fill = fill
 
         placed_words = xw_data['placedWords']
 
@@ -225,23 +223,23 @@ def parse_xword(self, xw_data):
 
         clues = [word['clue']['clue'] for word in weirdass_puz_clue_sorting]
 
-        puzzle.clues.extend(clues)
+        self.puzzle.clues.extend(clues)
 
         has_markup = b'\x80' in markup
         has_rebus = any(rebus_board)
 
         if has_markup:
-            puzzle.extensions[b'GEXT'] = markup
-            puzzle._extensions_order.append(b'GEXT')
-            puzzle.markup()
+            self.puzzle.extensions[b'GEXT'] = markup
+            self.puzzle._extensions_order.append(b'GEXT')
+            self.puzzle.markup()
 
         if has_rebus:
-            puzzle.extensions[b'GRBS'] = bytes(rebus_board)
-            puzzle.extensions[b'RTBL'] = rebus_table.encode(puz.ENCODING)
-            puzzle._extensions_order.extend([b'GRBS', b'RTBL'])
-            puzzle.rebus()
+            self.puzzle.extensions[b'GRBS'] = bytes(rebus_board)
+            self.puzzle.extensions[b'RTBL'] = rebus_table.encode(self.puzzle.encoding)
+            self.puzzle._extensions_order.extend([b'GRBS', b'RTBL'])
+            self.puzzle.rebus()
 
-        return puzzle
+        return self.puzzle
 
     def pick_filename(self, puzzle, **kwargs):
         if not self.date and self.id:

diff --git a/xword_dl/downloader/basedownloader.py b/xword_dl/downloader/basedownloader.py
@@ -1,8 +1,8 @@
 import urllib.parse
 from datetime import datetime
 
+import puz
 import requests
-from puz import Puzzle
 
 from ..util import (
     read_config_values,
@@ -42,7 +42,16 @@ def __init__(self, **kwargs):
         self.session.headers.update(self.settings.get('headers', {}))
         self.session.cookies.update(self.settings.get('cookies', {}))
 
-    def pick_filename(self, puzzle: Puzzle, **kwargs) -> str:
+        self.puzzle = puz.Puzzle()
+
+        if 'puzzle_v1' not in kwargs:
+            # this is hack-ily patching constants that puzpy does not
+            # currently provide a method for setting
+            self.puzzle.version = b'2.0'
+            self.puzzle.fileversion = b'2.0\0'
+            self.puzzle.encoding = 'UTF-8'
+
+    def pick_filename(self, puzzle: puz.Puzzle, **kwargs) -> str:
         tokens = {'outlet':  self.outlet or '',
                   'prefix':  self.outlet_prefix or '',
                   'title':   puzzle.title or '',
@@ -77,7 +86,7 @@ def pick_filename(self, puzzle: Puzzle, **kwargs) -> str:
 
         return template
 
-    def download(self, url: str) -> Puzzle:
+    def download(self, url: str) -> puz.Puzzle:
         """Download, parse, and return a puzzle at a given URL."""
 
         solver_url = self.find_solver(url)
@@ -86,7 +95,8 @@ def download(self, url: str) -> Puzzle:
 
         puzzle = sanitize_for_puzfile(
             puzzle,
-            preserve_html=self.settings.get("preserve_html", False)
+            preserve_html=self.settings.get("preserve_html", False),
+            demojize=(self.puzzle.encoding != "UTF-8")
         )
 
         return puzzle
@@ -108,7 +118,7 @@ def fetch_data(self, solver_url: str):
         """
         raise NotImplementedError
 
-    def parse_xword(self, xw_data) -> Puzzle:
+    def parse_xword(self, xw_data) -> puz.Puzzle:
         """Given a blob of crossword data, parse and stuff into puz format.
 
         This method is implemented in subclasses based on the differences in

diff --git a/xword_dl/downloader/compilerdownloader.py b/xword_dl/downloader/compilerdownloader.py
@@ -1,4 +1,3 @@
-import puz
 import requests
 import urllib.parse
 import xmltodict
@@ -50,30 +49,32 @@ def parse_xword(self, xw_data, enumeration=True):
         xw_metadata = xw_puzzle['metadata']
         xw_grid = xw_puzzle['crossword']['grid']
 
-        puzzle = puz.Puzzle()
+        self.puzzle.title = xw_metadata.get('title') or ''
+        self.puzzle.author = xw_metadata.get('creator') or ''
+        self.puzzle.copyright = xw_metadata.get('copyright') or ''
 
-        puzzle.title = xw_metadata.get('title') or ''
-        puzzle.author = xw_metadata.get('creator') or ''
-        puzzle.copyright = xw_metadata.get('copyright') or ''
+        self.puzzle.title = xw_metadata.get('title') or ''
+        self.puzzle.author = xw_metadata.get('creator') or ''
+        self.puzzle.copyright = xw_metadata.get('copyright') or ''
 
-        puzzle.width = int(xw_grid['@width'])
-        puzzle.height = int(xw_grid['@height'])
+        self.puzzle.width = int(xw_grid['@width'])
+        self.puzzle.height = int(xw_grid['@height'])
 
         solution = ''
         fill = ''
         markup = b''
 
         cells = {(int(cell['@x']), int(cell['@y'])): cell for cell in xw_grid['cell']}
 
-        for y in range(1, puzzle.height + 1):
-            for x in range(1, puzzle.width + 1):
+        for y in range(1, self.puzzle.height + 1):
+            for x in range(1, self.puzzle.width + 1):
                 cell = cells[(x, y)]
                 solution += cell.get('@solution', '.')
                 fill += '.' if cell.get('@type') == 'block' else '-'
                 markup += (b'\x80' if (cell.get('@background-shape') == 'circle') else b'\x00')
 
-        puzzle.solution = solution
-        puzzle.fill = fill
+        self.puzzle.solution = solution
+        self.puzzle.fill = fill
 
         xw_clues = xw_puzzle['crossword']['clues']
 
@@ -83,13 +84,13 @@ def parse_xword(self, xw_data, enumeration=True):
                     if c.get("@format") and enumeration else '') for c in
                     sorted(all_clues, key=lambda x: int(x['@number']))]
 
-        puzzle.clues = clues
+        self.puzzle.clues = clues
 
         has_markup = b'\x80' in markup
 
         if has_markup:
-            puzzle.extensions[b'GEXT'] = markup
-            puzzle._extensions_order.append(b'GEXT')
-            puzzle.markup()
+            self.puzzle.extensions[b'GEXT'] = markup
+            self.puzzle._extensions_order.append(b'GEXT')
+            self.puzzle.markup()
 
-        return puzzle
+        return self.puzzle
diff --git a/xword_dl/downloader/guardiandownloader.py b/xword_dl/downloader/guardiandownloader.py
@@ -2,7 +2,6 @@
 import json
 import re
 
-import puz
 import requests
 
 from bs4 import BeautifulSoup, Tag
@@ -54,16 +53,14 @@ def fetch_data(self, solver_url):
         return xw_data
 
     def parse_xword(self, xw_data):
-        puzzle = puz.Puzzle()
+        self.puzzle.author = xw_data.get('creator', {}).get('name') or ''
+        self.puzzle.height = xw_data.get('dimensions').get('rows')
+        self.puzzle.width  = xw_data.get('dimensions').get('cols')
 
-        puzzle.author = xw_data.get('creator', {}).get('name', '')
-        puzzle.height = xw_data.get('dimensions').get('rows')
-        puzzle.width  = xw_data.get('dimensions').get('cols')
-
-        puzzle.title = xw_data.get('name') or ''
+        self.puzzle.title = xw_data.get('name') or ''
 
         if not all(e.get('solution') for e in xw_data['entries']):
-            puzzle.title += ' - no solution provided'
+            self.puzzle.title += ' - no solution provided'
 
         self.date = datetime.datetime.fromtimestamp(
                                         xw_data['date'] // 1000)
@@ -80,21 +77,21 @@ def parse_xword(self, xw_data):
         solution = ''
         fill = ''
 
-        for y in range(puzzle.height):
-            for x in range(puzzle.width):
+        for y in range(self.puzzle.height):
+            for x in range(self.puzzle.width):
                 sol_at_space = grid_dict.get((x,y), '.')
                 solution += sol_at_space
                 fill += '.' if sol_at_space == '.' else '-'
 
-        puzzle.solution = solution
-        puzzle.fill = fill
+        self.puzzle.solution = solution
+        self.puzzle.fill = fill
 
         clues = [e.get('clue') for e in sorted(xw_data.get('entries'),
                     key=lambda x: (x.get('number'), x.get('direction')))]
 
-        puzzle.clues = clues
+        self.puzzle.clues = clues
 
-        return puzzle
+        return self.puzzle
 
 
 class GuardianCrypticDownloader(GuardianDownloader):