Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 18 additions & 22 deletions xword_dl/downloader/amuniversaldownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import time
import xml

import puz
import requests
import xmltodict

Expand Down Expand Up @@ -60,26 +59,25 @@ def parse_xword(self, xw_data):
for field in ['Title', 'Author', 'Editor', 'Copryight']:
fetched[field] = unquote(xw_data.get(field, '')).strip()

puzzle = puz.Puzzle()
puzzle.title = fetched.get('Title', '')
puzzle.author = ''.join([fetched.get('Author', ''),
self.puzzle.title = fetched.get('Title', '')
self.puzzle.author = ''.join([fetched.get('Author', ''),
' / Ed. ',
fetched.get('Editor', '')])
puzzle.copyright = fetched.get('Copyright', '')
puzzle.width = int(xw_data.get('Width'))
puzzle.height = int(xw_data.get('Height'))
self.puzzle.copyright = fetched.get('Copyright', '')
self.puzzle.width = int(xw_data.get('Width'))
self.puzzle.height = int(xw_data.get('Height'))

solution = xw_data.get('AllAnswer').replace('-', '.')

puzzle.solution = solution
self.puzzle.solution = solution

fill = ''
for letter in solution:
if letter == '.':
fill += '.'
else:
fill += '-'
puzzle.fill = fill
self.puzzle.fill = fill

across_clues = xw_data['AcrossClue'].splitlines()
down_clues = self.process_clues(xw_data['DownClue'].splitlines())
Expand All @@ -93,9 +91,9 @@ def parse_xword(self, xw_data):

clues = [clue['clue'] for clue in clues_sorted]

puzzle.clues = clues
self.puzzle.clues = clues

return puzzle
return self.puzzle

# As of Sept 2023, the JSON data for USA Today is not consistently populated.
# I'd rather use the JSON data if possible, but until that's sorted, we can
Expand Down Expand Up @@ -167,24 +165,22 @@ def parse_xword(self, xw_data):
except (xml.parsers.expat.ExpatError, KeyError):
raise XWordDLException('Puzzle data malformed, cannot parse.')

puzzle = puz.Puzzle()
self.puzzle.title = unquote(xw.get('Title',[]).get('@v') or '')
self.puzzle.author = unquote(xw.get('Author',[]).get('@v') or '')
self.puzzle.copyright = unquote(xw.get('Copyright',[]).get('@v') or '')

puzzle.title = unquote(xw.get('Title',[]).get('@v') or '')
puzzle.author = unquote(xw.get('Author',[]).get('@v') or '')
puzzle.copyright = unquote(xw.get('Copyright',[]).get('@v') or '')
self.puzzle.width = int(xw.get('Width')['@v'])
self.puzzle.height = int(xw.get('Height')['@v'])

puzzle.width = int(xw.get('Width')['@v'])
puzzle.height = int(xw.get('Height')['@v'])

puzzle.solution = xw.get('AllAnswer',[]).get('@v').replace('-', '.')
puzzle.fill = ''.join([c if c == '.' else '-' for c in puzzle.solution])
self.puzzle.solution = xw.get('AllAnswer',[]).get('@v').replace('-', '.')
self.puzzle.fill = ''.join([c if c == '.' else '-' for c in self.puzzle.solution])

xw_clues = sorted(list(xw['across'].values()) + list(xw['down'].values()),
key=lambda c: int(c['@cn']))

puzzle.clues = [unquote(c.get('@c') or '') for c in xw_clues]
self.puzzle.clues = [unquote(c.get('@c') or '') for c in xw_clues]

return puzzle
return self.puzzle


class UniversalDownloader(AMUniversalDownloader):
Expand Down
34 changes: 16 additions & 18 deletions xword_dl/downloader/amuselabsdownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import json
import urllib.parse

import puz
import requests

import re
Expand Down Expand Up @@ -174,12 +173,11 @@ def fetch_data(self, solver_url):
return xword_data

def parse_xword(self, xw_data):
puzzle = puz.Puzzle()
puzzle.title = xw_data.get('title', '').strip()
puzzle.author = xw_data.get('author', '').strip()
puzzle.copyright = xw_data.get('copyright', '').strip()
puzzle.width = xw_data.get('w')
puzzle.height = xw_data.get('h')
self.puzzle.title = xw_data.get('title', '').strip()
self.puzzle.author = xw_data.get('author', '').strip()
self.puzzle.copyright = xw_data.get('copyright', '').strip()
self.puzzle.width = xw_data.get('w')
self.puzzle.height = xw_data.get('h')

markup_data = xw_data.get('cellInfos', '')

Expand Down Expand Up @@ -215,8 +213,8 @@ def parse_xword(self, xw_data):
rebus_table += '{:2d}:{};'.format(rebus_index, unidecode(cell))
rebus_index += 1

puzzle.solution = solution
puzzle.fill = fill
self.puzzle.solution = solution
self.puzzle.fill = fill

placed_words = xw_data['placedWords']

Expand All @@ -225,23 +223,23 @@ def parse_xword(self, xw_data):

clues = [word['clue']['clue'] for word in weirdass_puz_clue_sorting]

puzzle.clues.extend(clues)
self.puzzle.clues.extend(clues)

has_markup = b'\x80' in markup
has_rebus = any(rebus_board)

if has_markup:
puzzle.extensions[b'GEXT'] = markup
puzzle._extensions_order.append(b'GEXT')
puzzle.markup()
self.puzzle.extensions[b'GEXT'] = markup
self.puzzle._extensions_order.append(b'GEXT')
self.puzzle.markup()

if has_rebus:
puzzle.extensions[b'GRBS'] = bytes(rebus_board)
puzzle.extensions[b'RTBL'] = rebus_table.encode(puz.ENCODING)
puzzle._extensions_order.extend([b'GRBS', b'RTBL'])
puzzle.rebus()
self.puzzle.extensions[b'GRBS'] = bytes(rebus_board)
self.puzzle.extensions[b'RTBL'] = rebus_table.encode(self.puzzle.encoding)
self.puzzle._extensions_order.extend([b'GRBS', b'RTBL'])
self.puzzle.rebus()

return puzzle
return self.puzzle

def pick_filename(self, puzzle, **kwargs):
if not self.date and self.id:
Expand Down
20 changes: 15 additions & 5 deletions xword_dl/downloader/basedownloader.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import urllib.parse
from datetime import datetime

import puz
import requests
from puz import Puzzle

from ..util import (
read_config_values,
Expand Down Expand Up @@ -42,7 +42,16 @@ def __init__(self, **kwargs):
self.session.headers.update(self.settings.get('headers', {}))
self.session.cookies.update(self.settings.get('cookies', {}))

def pick_filename(self, puzzle: Puzzle, **kwargs) -> str:
self.puzzle = puz.Puzzle()

if 'puzzle_v1' not in kwargs:
# this is hack-ily patching constants that puzpy does not
# currently provide a method for setting
self.puzzle.version = b'2.0'
self.puzzle.fileversion = b'2.0\0'
self.puzzle.encoding = 'UTF-8'

def pick_filename(self, puzzle: puz.Puzzle, **kwargs) -> str:
tokens = {'outlet': self.outlet or '',
'prefix': self.outlet_prefix or '',
'title': puzzle.title or '',
Expand Down Expand Up @@ -77,7 +86,7 @@ def pick_filename(self, puzzle: Puzzle, **kwargs) -> str:

return template

def download(self, url: str) -> Puzzle:
def download(self, url: str) -> puz.Puzzle:
"""Download, parse, and return a puzzle at a given URL."""

solver_url = self.find_solver(url)
Expand All @@ -86,7 +95,8 @@ def download(self, url: str) -> Puzzle:

puzzle = sanitize_for_puzfile(
puzzle,
preserve_html=self.settings.get("preserve_html", False)
preserve_html=self.settings.get("preserve_html", False),
demojize=(self.puzzle.encoding != "UTF-8")
)

return puzzle
Expand All @@ -108,7 +118,7 @@ def fetch_data(self, solver_url: str):
"""
raise NotImplementedError

def parse_xword(self, xw_data) -> Puzzle:
def parse_xword(self, xw_data) -> puz.Puzzle:
"""Given a blob of crossword data, parse and stuff into puz format.

This method is implemented in subclasses based on the differences in
Expand Down
33 changes: 17 additions & 16 deletions xword_dl/downloader/compilerdownloader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import puz
import requests
import urllib.parse
import xmltodict
Expand Down Expand Up @@ -50,30 +49,32 @@ def parse_xword(self, xw_data, enumeration=True):
xw_metadata = xw_puzzle['metadata']
xw_grid = xw_puzzle['crossword']['grid']

puzzle = puz.Puzzle()
self.puzzle.title = xw_metadata.get('title') or ''
self.puzzle.author = xw_metadata.get('creator') or ''
self.puzzle.copyright = xw_metadata.get('copyright') or ''

puzzle.title = xw_metadata.get('title') or ''
puzzle.author = xw_metadata.get('creator') or ''
puzzle.copyright = xw_metadata.get('copyright') or ''
self.puzzle.title = xw_metadata.get('title') or ''
self.puzzle.author = xw_metadata.get('creator') or ''
self.puzzle.copyright = xw_metadata.get('copyright') or ''

puzzle.width = int(xw_grid['@width'])
puzzle.height = int(xw_grid['@height'])
self.puzzle.width = int(xw_grid['@width'])
self.puzzle.height = int(xw_grid['@height'])

solution = ''
fill = ''
markup = b''

cells = {(int(cell['@x']), int(cell['@y'])): cell for cell in xw_grid['cell']}

for y in range(1, puzzle.height + 1):
for x in range(1, puzzle.width + 1):
for y in range(1, self.puzzle.height + 1):
for x in range(1, self.puzzle.width + 1):
cell = cells[(x, y)]
solution += cell.get('@solution', '.')
fill += '.' if cell.get('@type') == 'block' else '-'
markup += (b'\x80' if (cell.get('@background-shape') == 'circle') else b'\x00')

puzzle.solution = solution
puzzle.fill = fill
self.puzzle.solution = solution
self.puzzle.fill = fill

xw_clues = xw_puzzle['crossword']['clues']

Expand All @@ -83,13 +84,13 @@ def parse_xword(self, xw_data, enumeration=True):
if c.get("@format") and enumeration else '') for c in
sorted(all_clues, key=lambda x: int(x['@number']))]

puzzle.clues = clues
self.puzzle.clues = clues

has_markup = b'\x80' in markup

if has_markup:
puzzle.extensions[b'GEXT'] = markup
puzzle._extensions_order.append(b'GEXT')
puzzle.markup()
self.puzzle.extensions[b'GEXT'] = markup
self.puzzle._extensions_order.append(b'GEXT')
self.puzzle.markup()

return puzzle
return self.puzzle
25 changes: 11 additions & 14 deletions xword_dl/downloader/guardiandownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import json
import re

import puz
import requests

from bs4 import BeautifulSoup, Tag
Expand Down Expand Up @@ -54,16 +53,14 @@ def fetch_data(self, solver_url):
return xw_data

def parse_xword(self, xw_data):
puzzle = puz.Puzzle()
self.puzzle.author = xw_data.get('creator', {}).get('name') or ''
self.puzzle.height = xw_data.get('dimensions').get('rows')
self.puzzle.width = xw_data.get('dimensions').get('cols')

puzzle.author = xw_data.get('creator', {}).get('name', '')
puzzle.height = xw_data.get('dimensions').get('rows')
puzzle.width = xw_data.get('dimensions').get('cols')

puzzle.title = xw_data.get('name') or ''
self.puzzle.title = xw_data.get('name') or ''

if not all(e.get('solution') for e in xw_data['entries']):
puzzle.title += ' - no solution provided'
self.puzzle.title += ' - no solution provided'

self.date = datetime.datetime.fromtimestamp(
xw_data['date'] // 1000)
Expand All @@ -80,21 +77,21 @@ def parse_xword(self, xw_data):
solution = ''
fill = ''

for y in range(puzzle.height):
for x in range(puzzle.width):
for y in range(self.puzzle.height):
for x in range(self.puzzle.width):
sol_at_space = grid_dict.get((x,y), '.')
solution += sol_at_space
fill += '.' if sol_at_space == '.' else '-'

puzzle.solution = solution
puzzle.fill = fill
self.puzzle.solution = solution
self.puzzle.fill = fill

clues = [e.get('clue') for e in sorted(xw_data.get('entries'),
key=lambda x: (x.get('number'), x.get('direction')))]

puzzle.clues = clues
self.puzzle.clues = clues

return puzzle
return self.puzzle


class GuardianCrypticDownloader(GuardianDownloader):
Expand Down
Loading
Loading