diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
new file mode 100644
index 00000000..bf8fceb6
--- /dev/null
+++ b/.github/workflows/pypi.yml
@@ -0,0 +1,88 @@
+name: Publish python distribution to PyPI and TestPyPI
+
+on:
+  workflow_dispatch:
+    inputs:
+      pushTestPyPi:
+        description: 'Push package to TestPyPi'
+        required: true
+        type: boolean
+      pushPyPi:
+        description: 'Push package to PyPi'
+        required: true
+        type: boolean
+
+jobs:
+  build-pypi:
+    runs-on: ubuntu-latest
+    container:
+      image: quay.io/pypa/manylinux_2_28_x86_64
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0  # We need full history for setuptools_scm to figure out version
+    - name: Set safe directory (work around checkout not doing that properly for containers)
+      run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
+    - name: Install build dependencies for checktestdata
+      run: yum -y install boost-devel gmp-devel
+    - name: Build sdist (and broken wheel)
+      run: /opt/python/cp311-cp311/bin/python -m build
+    - name: Repair wheel
+      run: auditwheel repair dist/problemtools-*.whl
+    - name: Replace broken wheel with repaired wheel
+      run: |
+        rm -f dist/*.whl
+        cp wheelhouse/*.whl dist
+    - name: Store the distribution packages
+      uses: actions/upload-artifact@v4
+      with:
+        name: python-package-distributions
+        path: dist/
+
+  publish-to-testpypi:
+    name: Publish Python distribution to TestPyPI
+    needs:
+    - build-pypi
+    runs-on: ubuntu-latest
+    if: ${{ inputs.pushTestPyPi }}
+
+    environment:
+      name: testpypi
+      url: https://test.pypi.org/p/problemtools
+
+    permissions:
+      id-token: write  # IMPORTANT: mandatory for trusted publishing
+
+    steps:
+    - name: Download all the dists
+      uses: actions/download-artifact@v4
+      with:
+        name: python-package-distributions
+        path: dist/
+    - name: Publish distribution to TestPyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
+      with:
+        repository-url: https://test.pypi.org/legacy/
+
+  publish-to-pypi:
+    name: Publish Python distribution to PyPI
+    needs:
+    - build-pypi
+    runs-on: ubuntu-latest
+    if: ${{ inputs.pushPyPi }}
+
+    environment:
+      name: pypi
+      url: https://pypi.org/p/problemtools
+
+    permissions:
+      id-token: write  # IMPORTANT: mandatory for trusted publishing
+
+    steps:
+    - name: Download all the dists
+      uses: actions/download-artifact@v4
+      with:
+        name: python-package-distributions
+        path: dist/
+    - name: Publish distribution to TestPyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
new file mode 100644
index 00000000..b3226dfe
--- /dev/null
+++ b/.github/workflows/python-app.yml
@@ -0,0 +1,62 @@
+# This workflow will install Python dependencies, run tests and lint
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python tests
+
+on:
+  push:
+    branches: [ "master", "develop" ]
+  pull_request:
+    branches: [ "master", "develop" ]
+
+permissions:
+  contents: read
+
+jobs:
+  pythontests:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.11", "3.12", "3.13"] # 3.11 is the lowest we support, since we want StrEnum
+    container:
+      image: problemtools/githubci:latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m venv venv
+        venv/bin/python --version
+        venv/bin/pip install mypy ruff pytest
+        if [ -f requirements.txt ]; then venv/bin/pip install -r requirements.txt; fi
+    - name: Lint with ruff
+      run: venv/bin/ruff check --output-format=github
+    - name: Check ruff formatting
+      run: venv/bin/ruff format --check --diff
+    - name: Test with pytest
+      run:  venv/bin/pytest
+    - name: Run mypy
+      run: |
+        venv/bin/mypy --non-interactive --config-file mypy.ini -p problemtools
+
+  packages: # Use a separate job to test debian packaging to speed things up (no need to test this for every python version above)
+    runs-on: ubuntu-latest
+    container:
+      image: problemtools/githubci:latest
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        submodules: true
+    - name: Build debian packages
+      run:  |
+        make builddeb
+    - name: Install debian package
+      run:  dpkg -i ../kattis-problemtools_*.deb
+    - name: Verify examples
+      run: |
+        shopt -s extglob
+        verifyproblem examples/!(README.md)
+      shell: bash
diff --git a/.gitignore b/.gitignore
index ddbe0378..18a3eccb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,13 @@
 *.pyc
 *~
+*.swp
 /.cache/
 /problemtools.egg-info/
 /support/default_validator/default_validator
 /support/interactive/interactive
+build/
+/problemtools/_version.py
+
+venv/
+.pytest_cache/
+.mypy_cache/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..889c8316
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,6 @@
+repos:
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.11.8
+  hooks:
+    - id: ruff
+    - id: ruff-format
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index f81f3fcd..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,5 +0,0 @@
-language: python
-python:
-  - 3.7
-
-script: py.test
diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index e9787418..00000000
--- a/Dockerfile
+++ /dev/null
@@ -1,29 +0,0 @@
-FROM ubuntu:20.04
-
-MAINTAINER austrin@kattis.com
-
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get update && \
-    apt-get install -y \
-            automake \
-            g++ \
-            git \
-            libboost-all-dev \
-            libgmp-dev \
-            libgmp10 \
-            libgmpxx4ldbl \
-            openjdk-8-jdk \
-            python3-minimal \
-            python3-pip \
-            python3-plastex \
-            python3-yaml \
-            sudo \
-            texlive-fonts-recommended \
-            texlive-lang-cyrillic \
-            texlive-latex-extra \
-            texlive-plain-generic \
-            tidy \
-            vim
-
-RUN pip3 install git+https://github.com/kattis/problemtools
diff --git a/LICENSE b/LICENSE
index 84c39ef9..01cc176a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 The MIT License (MIT)
 
-Copyright (c) 2010-2019 Kattis and all respective contributors
+Copyright (c) Kattis and all respective contributors
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/MANIFEST.in b/MANIFEST.in
index 4d57d2ed..06421ee4 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,9 @@
 recursive-include problemtools/config *
 recursive-include problemtools/templates *
 recursive-include problemtools/tests *
+recursive-include examples *
 recursive-include support *
+recursive-include tests *
+global-exclude */__pycache__/*
+global-exclude *.pyc
+recursive-exclude .github *
diff --git a/Makefile b/Makefile
index 296634e5..bd3337bf 100644
--- a/Makefile
+++ b/Makefile
@@ -8,3 +8,7 @@ checktestdata: support/checktestdata/bootstrap
 
 support/checktestdata/bootstrap:
 	git submodule update --init
+
+clean:
+	make -C support clean
+	rm -rf problemtools.egg-info build
diff --git a/README.md b/README.md
index 29140990..210a35bb 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,12 @@
 # Kattis Problem Tools
 
-master:
-[![Master Build Status](https://travis-ci.org/Kattis/problemtools.svg?branch=master)](https://travis-ci.org/Kattis/problemtools).
-develop:
-[![Develop Build Status](https://travis-ci.org/Kattis/problemtools.svg?branch=develop)](https://travis-ci.org/Kattis/problemtools)
+![Build Status](https://github.com/kattis/problemtools/actions/workflows/python-app.yml/badge.svg?branch=master)
+[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
 
-These are tools to manage problem packages using the Kattis problem package
-format.
+These are tools to manage problem packages using the Kattis [problem package
+format](https://www.kattis.com/problem-package-format/). The problem package
+specification is developed in the [problem package format
+repository](https://github.com/Kattis/problem-package-format).
 
 
 ## Programs Provided
@@ -17,10 +17,26 @@ The problem tools provide the following three programs:
  - `problem2pdf`: convert a problem statement to pdf
  - `problem2html`: convert a problem statement to html
 
-Running any of them with command-line option `-h` gives
+Running any of them with the command-line option `-h` gives
 documentation on what arguments they accept.
 
 
+## Format versions
+
+There are currently two versions of the problem package format,
+[legacy](https://www.kattis.com/problem-package-format/spec/legacy.html) and
+[2023-07-draft](https://www.kattis.com/problem-package-format/spec/2023-07-draft.html).
+We have begun work on supporting 2023-07-draft, but there are *many* changes
+between legacy and 2023-07-draft which are not yet implemented. Verifyproblem
+will do its best to parse and verify problems in 2023-07-draft, but key things
+like scoring still behave like legacy. Also, note that 2023-07 is still a draft
+standard.
+
+We advice against packaging production problems in 2023-07-draft. Especially so
+if you plan to have problems installed on [Kattis](https://open.kattis.com), where
+we currently *only* support installing legacy problems.
+
+
 ## Example Problems
 
 A few examples of problem packages can be found in [examples](examples).
@@ -31,20 +47,21 @@ A few examples of problem packages can be found in [examples](examples).
 There are four supported ways of installing and running problemtools.
 (For non-Linux users, "Method 2" below, to use Docker, is probably the least painful.)
 
-### Method 1: Install the Python package
+Note that in all methods except for "Method 2", you must manually install
+dependencies such as LaTeX and tools for any languages you want to use. See
+[Requirements and compatbility](#requirements-and-compatibility) for details.
+
+### Method 1: Install the Python package using pipx
 
 Run
 ```
-pip3 install git+https://github.com/kattis/problemtools
+pipx install problemtools
 ```
 
-Or if you don't want a system-wide installation,
-```
-pip3 install --user git+https://github.com/kattis/problemtools
-```
-With this second option, in order to get the command line scripts, you need
-to make sure that the local user bin path used (e.g., on Linux,
-`$HOME/.local/bin`) is in your `$PATH`.
+In order to get the command line scripts, you need to make sure that the local
+user bin path used (e.g., on Linux, `$HOME/.local/bin`) is in your `$PATH`. See
+[pipx' installation instructions](https://pipx.pypa.io/stable/installation/)
+for information on how to install `pipx` and set up your `$PATH`.
 
 In order for problemtools to build and run properly, you also need to have LaTeX
 and various LaTeX packages installed.  See [Requirements and
@@ -60,34 +77,45 @@ We maintain three official problemtools Docker images on Docker Hub:
 
 - [`problemtools/full`](https://hub.docker.com/r/problemtools/full/): this image contains problemtools along with compilers/interpreters for all supported programming languages.
 
-- [`problemtools/icpc`](https://hub.docker.com/r/problemtools/icpc/): this image contains problemtools along with compilers/interpreters for the programming languages allowed in the International Collegiate Programming Contest (ICPC): C, C++, Java, Python 2+3, and Kotlin.
+- [`problemtools/icpc`](https://hub.docker.com/r/problemtools/icpc/): this image contains problemtools along with compilers/interpreters for the programming languages allowed in the International Collegiate Programming Contest (ICPC): C, C++, Java, Kotlin, and Python 3.  Note that the compiler/interpreter versions used might not be exactly the same as those used in the current ICPC season.
 
-- [`problemtools/minimal`](https://hub.docker.com/r/problemtools/minimal/): this image only contains problemtools, no additional programming languages.  As such as it is not particularly useful on its own, but if you are organizing a contest and want to set up a problemtools environment containing exactly the right set of compilers/interpreters for your contest, this is the recommended starting point.
+- [`problemtools/minimal`](https://hub.docker.com/r/problemtools/minimal/): this image only contains problemtools, no additional programming languages.  As such, it is not particularly useful on its own, but if you are organizing a contest and want to set up a problemtools environment containing exactly the right set of compilers/interpreters for your contest, this is the recommended starting point.
 
-For example, suppose you want to use the `problemtools/icpc` image.  To get started, install the [Docker CLI](https://docs.docker.com/install), and then pull the image:
+For example, suppose you want to use the `problemtools/icpc` image.  To get started (or update to the latest release), install the [Docker CLI](https://docs.docker.com/install), and then pull the image:
 
     docker pull problemtools/icpc
 
-Once the image has finished downloading, you can check that it exists on your system using `docker images`. To launch an interactive container and play around with *verifyproblem*, *problem2pdf*, and *problem2html* run:
+The most convenient way to use the container is by creating shell script(s) similar to this and add it to your `$PATH`. If you call the script `verifyproblem.sh`, you could then `verifyproblem.sh examples/hello` to use the icpc docker image to verify examples/hello:
+```sh
+#!/bin/bash
+
+if [ $1 -a -d $1 ]; then
+    docker run --rm -t -v $(dirname $(readlink -f $1)):/work problemtools/icpc verifyproblem /work/$(basename $1)
+else
+    echo No such directory: $1
+fi
+```
+
+To instead launch an interactive container and play around with *verifyproblem*, *problem2pdf*, and *problem2html* run:
 
     docker run --rm -it problemtools/icpc
 
 By default, docker containers do _NOT_ persist storage between runs, so any files you create or modify will be lost when the container stops running.  Two common ways of dealing with this are:
 
-1) Use a [bind mount](https://docs.docker.com/storage/bind-mounts/) to mount a directory on your machine into the docker container.  This can be done as follows (see Docker documentation for further details):
+1) Use a [bind mount](https://docs.docker.com/storage/bind-mounts/) to mount a directory on your machine into the docker container.  Mounting the current directory to /kattis_work_dir can be done as follows (see Docker documentation for further details):
     ```
-    docker run --rm -it -v ${FULL_PATH_TO_MOUNT}:/kattis_work_dir problemtools/icpc
+    docker run --rm -it -v $(pwd):/kattis_work_dir problemtools/icpc
     ```
-2) Persist any changes you want to keep to a remote file system/source control (e.g. a remote Git repository, note however that you would first need to install Git in the image).
+2) Persist any changes you want to keep to a remote file system/source control (e.g., a remote Git repository; note, however, that you would first need to install Git in the image).
 
 #### Building your own images
 
-If you want a more complete environment in the Docker images (e.g. if
+If you want a more complete environment in the Docker images (e.g., if
 you want to install git or your favorite editor), feel free to extend
 them in whichever way you like.
 
 The `problemtools/{minimal,icpc,full}` images point to the latest
-release versions of problemtools.  If for some reason you want an
+release versions of problemtools.  If, for some reason, you want an
 image containing the latest development version, you have to build it
 yourself from scratch (while there are
 `problemtools/{minimal,icpc,full}:develop` Docker images on Docker
@@ -95,15 +123,20 @@ Hub, these are only updated sporadically for testing purposes and not
 kept up to date).
 
 
-### Method 3: Run directly from the repository.
+### Method 3: Run directly from the repository
 
-If you intend to help develop problemtools, or if you just want a
-bare-bones way of running them, this is your option.
+If you intend to help develop problemtools, or if you just want a bare-bones
+way of running them, this is your option.
 
 For this method, you need to clone the repository (just downloading a
-zip archive of it does not work, because the project has submodules
+zip archive of it does not work because the project has submodules
 that are not included in that zip archive).
 
+Start by setting up your venv, e.g.,
+
+    python3 -m venv venv
+    venv/bin/pip install -r requirements.txt
+
 In order for the tools to work, you first have to compile the various
 support programs, which can be done by running `make` in the root
 directory of problemtools.
@@ -119,11 +152,11 @@ order for problemtools to work correctly.
 
 ### Method 4: Build and install the Debian package
 
-This applies if you are running on Debian or a Debian derivative such
+This applies if you are running on Debian or a Debian derivative, such
 as Ubuntu.
 
 As with method 3, you need to clone the repository (just downloading a
-zip archive of it does not work, because the project has submodules
+zip archive of it does not work because the project has submodules
 that are not included in that zip archive).
 
 Run `make builddeb` in the root of the problemtools repository to
@@ -134,7 +167,7 @@ root of the repository).
 Apart from the build dependencies listed [below](#ubuntu), building
 the Debian package requires that the following tools are installed:
 
-    debhelper dh-python dpkg-dev
+    debhelper dh-virtualenv dpkg-dev
 
 The package can then be installed using (replace `<version>` as appropriate):
 
@@ -178,7 +211,7 @@ problemtools' configuration:
    are not sure whether you should use it, then you probably shouldn't.
    This file can be used to specify the system defaults for those
    problem limits which are not given a fixed default value in the
-   [problem format specification](http://www.problemarchive.org/wiki/index.php/Problem_Format#limits).
+   [problem format specification](https://www.kattis.com/problem-package-format/spec/2023-07-draft.html#limits).
    The system defaults assumed by problemtools can be found in
    (problemtools/config/problem.yaml).  For instance, if you are
    primarily working against a system with a default memory limit of 2 GiB,
@@ -189,35 +222,35 @@ problemtools' configuration:
        memory: 2048 # (unit is MiB)
    ```
 
-   (In principle it is possible to override the defaults of other values than the
-   system-dependent defaults in the problem.yaml metadata files this way, but such
-   usage is very strongly discouraged.)
-
 
 ## Requirements and compatibility
 
 To build and run the tools, you need Python 3 with the YAML and PlasTeX libraries,
-and a LaTeX installation.
+and a LaTeX installation. You must also install language tools (e.g., compilers)
+for any languages used in problem packages.
 
 ### Ubuntu
 
 The dependencies needed to *build/install* problemtools can be installed with:
 
-    sudo apt install automake g++ make libboost-regex-dev libgmp-dev libgmp10 libgmpxx4ldbl python3 python3-pytest python3-setuptools python3-yaml python3-plastex
+    sudo apt install python3-venv automake g++ make libboost-regex-dev libgmp-dev python3 git
 
 And the dependencies needed to *run* problemtools can be installed with:
 
-    sudo apt install ghostscript libgmpxx4ldbl python3-minimal python-pkg-resources python3-plastex python3-yaml texlive-fonts-recommended texlive-lang-cyrillic texlive-latex-extra texlive-plain-generic tidy
+    sudo apt install ghostscript pandoc python3 texlive-fonts-recommended texlive-lang-cyrillic texlive-latex-extra texlive-plain-generic tidy dvisvgm
 
 ### Fedora
 
 On Fedora, these dependencies can be installed with:
 
-    sudo dnf install boost-regex gcc gmp-devel gmp-c++ python3 python3-pyyaml texlive-latex texlive-collection-fontsrecommended texlive-fancyhdr texlive-subfigure texlive-wrapfig texlive-import texlive-ulem texlive-xifthen texlive-overpic texlive-pbox tidy ghostscript
+    sudo dnf install boost-regex gcc gmp-devel gmp-c++ pandoc python3 python3-pyyaml texlive-latex texlive-collection-fontsrecommended texlive-fancyhdr texlive-subfigure texlive-wrapfig texlive-import texlive-ulem texlive-xifthen texlive-overpic texlive-pbox tidy ghostscript
 
 Followed by:
 
-    pip3 install --user plastex
+    pip3 install --user plastex nh3
+
+### Arch
+Package is available on the AUR [kattis-problemtools-git](https://aur.archlinux.org/packages/kattis-problemtools-git). Use your favorite AUR helper or follow the installation instructions found [here](https://wiki.archlinux.org/title/Arch_User_Repository#Installing_and_upgrading_packages).
 
 ### Other platforms
 
diff --git a/admin/.gitignore b/admin/.gitignore
new file mode 100644
index 00000000..a4d089c1
--- /dev/null
+++ b/admin/.gitignore
@@ -0,0 +1 @@
+pypi_dist/
diff --git a/admin/build_pypi_packages.sh b/admin/build_pypi_packages.sh
new file mode 100755
index 00000000..d9567a24
--- /dev/null
+++ b/admin/build_pypi_packages.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+set -e
+
+ALLOW_DIRTY=false
+TAG=develop
+
+echo N.B., this script is solely to allow for local testing of whl/src.
+echo To actually build and push things to pypi, trigger the pypi flow
+echo in github actions, https://github.com/Kattis/problemtools/actions
+
+while getopts "d" opt; do
+    case $opt in
+        d) ALLOW_DIRTY=true ;;
+        \?) echo "Invalid option: -$opt" ;;
+    esac
+done
+
+shift $((OPTIND-1))
+
+if [ "$1" != "" ]; then
+    TAG=$1
+fi
+
+cd $(dirname $(readlink -f $0))
+
+if ! ../venv/bin/twine -h > /dev/null 2> /dev/null; then
+    echo "Did not find twine. Please run ../venv/bin/pip install twine"
+    exit 1
+fi
+
+if [[ -n $(git status -s) ]]; then
+    echo "Repository is dirty."
+    git status -s
+    [[ "${ALLOW_DIRTY}" != "true" ]] && exit 1
+fi
+
+if [[ $(git rev-parse --abbrev-ref HEAD) != ${TAG} && $(git describe --exact-match --tags 2>/dev/null) != ${TAG} ]]; then
+    echo "Repository is currently not on branch/tag ${TAG}."
+    [[ "${ALLOW_DIRTY}" != "true" ]] && exit 1
+fi
+
+echo "Building sdist and manylinux wheel"
+sudo rm -rf ./pypi_dist
+docker run --rm -v $(pwd)/..:/problemtools -v $(pwd)/pypi_dist:/dist quay.io/pypa/manylinux_2_28_x86_64 /bin/bash -c "
+    yum -y install boost-devel gmp-devel ;
+    mkdir /build ;
+    cd /build ;
+    git config --global --add safe.directory /problemtools/.git ;
+    git clone /problemtools ;
+    cd problemtools ;
+    git checkout ${TAG} ;
+    /opt/python/cp311-cp311/bin/python -m build ;
+    auditwheel repair dist/problemtools-*.whl ;
+    cp dist/*.tar.gz /dist ;
+    cp wheelhouse/*.whl /dist"
+sudo chown -R $USER:$USER pypi_dist
+
+../venv/bin/twine check pypi_dist/*
+
+echo "Running verifyproblem from wheel on all examples"
+TEMPDIR=$(mktemp -d)
+python3 -m venv "${TEMPDIR}"
+"${TEMPDIR}/bin/pip" install pypi_dist/problemtools*manylinux*whl
+shopt -s extglob
+if ! "${TEMPDIR}/bin/verifyproblem" ../examples/!(README.md); then
+    echo "Running verifyproblem on all examples failed. Please review output above to debug."
+    rm -rf "${TEMPDIR}"
+    exit 1
+fi
+rm -rf "${TEMPDIR}"
+
+echo "Sucessfully built packages. If you're happy with them, upload:"
+echo "    ../venv/bin/twine upload --verbose pypi_dist/*"
diff --git a/admin/docker/Dockerfile.build b/admin/docker/Dockerfile.build
index e2f7a3bf..b8080fe1 100644
--- a/admin/docker/Dockerfile.build
+++ b/admin/docker/Dockerfile.build
@@ -1,43 +1,24 @@
-# Package for building the problemtools .deb package
-# Ends up in the /usr/local/problemtools_build/deb/ directory
+# Docker image with all packages needed to build a problemtools .deb
 #
-# Setting build argument PROBLEMTOOLS_VERSION causes a specific
-# version of problemtools to be built (default is latest version of
-# develop branch on GitHub)
+# Not uploaded anywhere, only used locally during building
 
-FROM ubuntu:22.04
-
-LABEL maintainer="austrin@kattis.com"
+ARG PROBLEMTOOLS_VERSION=develop
+FROM problemtools/runreqs:${PROBLEMTOOLS_VERSION}
 
+LABEL maintainer="contact@kattis.com"
 ENV DEBIAN_FRONTEND=noninteractive
 
-# Install packages needed for build
-RUN apt update && \
-    apt install -y \
+# Packages required to build and run problemtools
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
+    rm -f /etc/apt/apt.conf.d/docker-clean && \
+    apt-get update && apt-get install -y \
         automake \
+        build-essential \
         debhelper \
-        dh-python \
+        dh-virtualenv \
         dpkg-dev \
         g++ \
         git \
         make \
-        libboost-regex-dev \
-        libgmp-dev \
-        libgmp10 \
-        libgmpxx4ldbl \
-        python3 \
-        python3-pytest \
-        python3-setuptools \
-        python3-yaml \
-        python3-setuptools
-
-RUN mkdir -p /usr/local/problemtools_build
-
-WORKDIR /usr/local/problemtools_build
-RUN git clone --recursive https://github.com/kattis/problemtools
-
-ARG PROBLEMTOOLS_VERSION=develop
-RUN cd problemtools && git checkout ${PROBLEMTOOLS_VERSION} && make builddeb
-
-RUN mkdir -p deb
-RUN mv kattis-problemtools*.deb deb/
+        libboost-regex-dev
diff --git a/admin/docker/Dockerfile.full b/admin/docker/Dockerfile.full
index 40580dd6..75ec8745 100644
--- a/admin/docker/Dockerfile.full
+++ b/admin/docker/Dockerfile.full
@@ -1,32 +1,23 @@
 # Full problemtools docker image, containing problemtools and all
 # supported programming languages.
 #
+#
+# Build requirements:
+# - The problemtools .deb package must be available from the host file
+#   system under a file name matching
+#   artifacts/deb/kattis-problemtools*.deb
+#   (Version of that .deb file should match the build argument
+#    PROBLEMTOOLS_VERSION but this is not checked.)
 
 ARG PROBLEMTOOLS_VERSION=develop
-FROM problemtools/icpc:${PROBLEMTOOLS_VERSION}
-
-LABEL maintainer="austrin@kattis.com"
+FROM problemtools/fulllangs:${PROBLEMTOOLS_VERSION}
 
+LABEL maintainer="contact@kattis.com"
 ENV DEBIAN_FRONTEND=noninteractive
 
-RUN apt-get update && \
-    apt-get install -y \
-            fp-compiler \
-            gfortran \
-            gnucobol \
-            gccgo \
-            ghc haskell-platform \
-            gnustep-devel gnustep gnustep-make gnustep-common gobjc \
-            libgmp3-dev \
-            libmozjs-78-dev \
-            lua5.4 \
-            mono-complete \
-            nodejs \
-            ocaml-nox \
-            php-cli \
-            pypy \
-            rustc \
-            sbcl \
-            scala \
-            swi-prolog \
-   ;
+RUN mkdir -p /usr/local/artifacts
+WORKDIR /usr/local/artifacts
+COPY artifacts/deb .
+RUN dpkg -i kattis-problemtools*.deb
+
+WORKDIR /
diff --git a/admin/docker/Dockerfile.fulllangs b/admin/docker/Dockerfile.fulllangs
new file mode 100644
index 00000000..cc0dd3df
--- /dev/null
+++ b/admin/docker/Dockerfile.fulllangs
@@ -0,0 +1,40 @@
+# Docker image with all packages needed to run a problemtools .deb, plus
+# language support for all supported languages
+#
+# Not uploaded anywhere, only used locally during building
+
+ARG PROBLEMTOOLS_VERSION=develop
+FROM problemtools/icpclangs:${PROBLEMTOOLS_VERSION}
+
+LABEL maintainer="contact@kattis.com"
+ENV DEBIAN_FRONTEND=noninteractive
+
+# All languages, plus curl which we need to fetch pypy2
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
+    rm -f /etc/apt/apt.conf.d/docker-clean && \
+    apt-get update && apt-get install -y \
+        curl \
+        fp-compiler \
+        gfortran \
+        gnucobol \
+        gccgo \
+        ghc \
+        gnustep-devel gnustep gnustep-make gnustep-common gobjc \
+        lua5.4 \
+        mono-complete \
+        nodejs \
+        ocaml-nox \
+        php-cli \
+        rustc \
+        sbcl \
+        scala \
+        swi-prolog
+
+# pypy2 is no longer packaged for Ubuntu, so download tarball (and check a sha256)
+RUN curl -LO https://downloads.python.org/pypy/pypy2.7-v7.3.16-linux64.tar.bz2 \
+    && echo '04b2fceb712d6f811274825b8a471ee392d3d1b53afc83eb3f42439ce00d8e07  pypy2.7-v7.3.16-linux64.tar.bz2' | sha256sum --check \
+    && tar -xf pypy2.7-v7.3.16-linux64.tar.bz2 \
+    && mv pypy2.7-v7.3.16-linux64 /opt/pypy \
+    && ln -s /opt/pypy/bin/pypy /usr/bin/pypy \
+    && rm pypy2.7-v7.3.16-linux64.tar.bz2
diff --git a/admin/docker/Dockerfile.githubci b/admin/docker/Dockerfile.githubci
new file mode 100644
index 00000000..7c4b5217
--- /dev/null
+++ b/admin/docker/Dockerfile.githubci
@@ -0,0 +1,23 @@
+# Docker image with all deb packages needed for our github actions
+#  - Building a problemtools deb
+#  - Running verifyproblem on all examples
+
+ARG PROBLEMTOOLS_VERSION=develop
+FROM problemtools/fulllangs:${PROBLEMTOOLS_VERSION}
+
+LABEL maintainer="contact@kattis.com"
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Packages required to build and run problemtools
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
+    rm -f /etc/apt/apt.conf.d/docker-clean && \
+    apt-get update && apt-get install -y \
+        automake \
+        build-essential \
+        debhelper \
+        dh-virtualenv \
+        dpkg-dev \
+        git \
+        make \
+        libboost-regex-dev
diff --git a/admin/docker/Dockerfile.icpc b/admin/docker/Dockerfile.icpc
index 904529f8..6366fe01 100644
--- a/admin/docker/Dockerfile.icpc
+++ b/admin/docker/Dockerfile.icpc
@@ -1,37 +1,22 @@
 # Basic problemtools docker image, containing problemtools and the
 # "ICPC languages" (C, C++, Java, Kotlin, and Python 3)
 #
+# Build requirements:
+# - The problemtools .deb package must be available from the host file
+#   system under a file name matching
+#   artifacts/deb/kattis-problemtools*.deb
+#   (Version of that .deb file should match the build argument
+#    PROBLEMTOOLS_VERSION but this is not checked.)
 
 ARG PROBLEMTOOLS_VERSION=develop
-FROM problemtools/minimal:${PROBLEMTOOLS_VERSION}
-
-LABEL maintainer="austrin@kattis.com"
+FROM problemtools/icpclangs:${PROBLEMTOOLS_VERSION}
 
+LABEL maintainer="contact@kattis.com"
 ENV DEBIAN_FRONTEND=noninteractive
 
-# Install C++, Java, Kotlin, and PyPy 3 via their ppa repository
-RUN apt update && \
-    apt install -y software-properties-common && \
-    add-apt-repository ppa:pypy/ppa && \
-    apt update && \
-    apt install -y \
-        gcc g++ \
-        openjdk-11-jdk openjdk-11-jre \
-        kotlin \
-        pypy3
-
-# Reconfigure problemtools:
-# - Use PyPy for Python 2 (not available in this image but in the full one)
-# - Use PyPy for Python 3
-RUN mkdir -p /etc/kattis/problemtools
-RUN echo " \n\
-python2: \n\
-    name: 'Python 2 w/PyPy'\n\
-    run: '/usr/bin/pypy \"{mainfile}\"'\n\
- \n\
-python3: \n\
-    name: 'Python 3 w/PyPy'\n\
-    run: '/usr/bin/pypy3 \"{mainfile}\"'\n\
- \n" > /etc/kattis/problemtools/languages.yaml
+RUN mkdir -p /usr/local/artifacts
+WORKDIR /usr/local/artifacts
+COPY artifacts/deb .
+RUN dpkg -i kattis-problemtools*.deb
 
 WORKDIR /
diff --git a/admin/docker/Dockerfile.icpclangs b/admin/docker/Dockerfile.icpclangs
new file mode 100644
index 00000000..c2ce8782
--- /dev/null
+++ b/admin/docker/Dockerfile.icpclangs
@@ -0,0 +1,21 @@
+# Docker image with all packages needed to run a problemtools .deb, plus
+# language support for the "ICPC languages" (C, C++, Java, Kotlin, and Python 3)
+#
+# Not uploaded anywhere, only used locally during building
+
+ARG PROBLEMTOOLS_VERSION=develop
+FROM problemtools/runreqs:${PROBLEMTOOLS_VERSION}
+
+LABEL maintainer="contact@kattis.com"
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
+    rm -f /etc/apt/apt.conf.d/docker-clean && \
+    apt-get update && apt-get install -y \
+        gcc \
+        g++ \
+        kotlin \
+        openjdk-21-jdk \
+        openjdk-21-jre \
+        pypy3
diff --git a/admin/docker/Dockerfile.minimal b/admin/docker/Dockerfile.minimal
index 534e661f..11b7b414 100644
--- a/admin/docker/Dockerfile.minimal
+++ b/admin/docker/Dockerfile.minimal
@@ -10,26 +10,11 @@
 #    PROBLEMTOOLS_VERSION but this is not checked.)
 
 ARG PROBLEMTOOLS_VERSION=develop
-FROM ubuntu:22.04
-
-LABEL maintainer="austrin@kattis.com"
+FROM problemtools/runreqs:${PROBLEMTOOLS_VERSION}
 
+LABEL maintainer="contact@kattis.com"
 ENV DEBIAN_FRONTEND=noninteractive
 
-RUN apt update && \
-    apt install -y \
-        ghostscript \
-        libgmpxx4ldbl \
-        python-pkg-resources \
-        python3-minimal \
-        python3-yaml \
-        python3-plastex \
-        texlive-fonts-recommended \
-        texlive-lang-cyrillic \
-        texlive-latex-extra \
-        texlive-plain-generic \
-        tidy
-
 RUN mkdir -p /usr/local/artifacts
 WORKDIR /usr/local/artifacts
 COPY artifacts/deb .
diff --git a/admin/docker/Dockerfile.runreqs b/admin/docker/Dockerfile.runreqs
new file mode 100644
index 00000000..c27486dd
--- /dev/null
+++ b/admin/docker/Dockerfile.runreqs
@@ -0,0 +1,28 @@
+# Docker image with all packages needed to run a problemtools .deb
+#
+# Not uploaded anywhere, only used locally during building
+
+ARG PROBLEMTOOLS_VERSION=develop
+FROM ubuntu:24.04
+
+LABEL maintainer="contact@kattis.com"
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Packages required to build and run problemtools
+# For libgmp, we technically just need libgmpxx4ldbl here, but for readability
+# (and we need libgmp-dev in other images), we take libgmp-dev here
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
+    rm -f /etc/apt/apt.conf.d/docker-clean && \
+    apt-get update && apt-get install -y \
+        dvisvgm \
+        ghostscript \
+        libgmp-dev \
+        pandoc \
+        python3 \
+        python3-venv \
+        texlive-fonts-recommended \
+        texlive-lang-cyrillic \
+        texlive-latex-extra \
+        texlive-plain-generic \
+        tidy
diff --git a/admin/docker/README.md b/admin/docker/README.md
new file mode 100644
index 00000000..5e2c6c40
--- /dev/null
+++ b/admin/docker/README.md
@@ -0,0 +1,25 @@
+Our docker images. Note that images depend on each other, please use the script
+admin/update_docker.sh to build images in the correct order.
+
+We have 4 images which are only used locally in the build process, and are not
+uploaded to a repository. 
+ - `runreqs`: Base image containing just the things needed to run problemtools
+ - `build`: Base image containing just the things needed to build a deb and run problemtools
+ - `icpclangs`: Base image containing what is needed to run problemtools, plus the "ICPC languages"
+ - `fulllangs`: Base image containing what is needed to run problemtools, plus all supported languages
+
+We have 3 images which are meant for end users:
+ - `minimal`: Image with problemtools installed, but no languages.
+ - `icpc`: Image with problemtools plus the "ICPC languages" installed.
+ - `full`: Image with problemtools and all languages
+
+We have 1 image which is used in our CI (to speed up things - it takes a few
+minutes to apt-get install all languages and runtime requirements):
+ - `githubci`: Image with all languages and everything needed to build a deb and run problemtools
+
+Build dependencies:
+```
+     runreqs   -> icpclangs -> fullangs -> githubci
+     /     \          |           |
+ build    minimal    icpc        full
+```
diff --git a/admin/make_release.sh b/admin/make_release.sh
index 0343ac27..17ad9416 100755
--- a/admin/make_release.sh
+++ b/admin/make_release.sh
@@ -1,10 +1,17 @@
 #!/usr/bin/env bash
 #
-# Uses git flow and gbp tools (available through Ubuntu packages
-# git-flow, git-buildpackage)
+# Uses gbp (available through Ubuntu package git-buildpackage)
 
 set -e
 
+ALLOW_DIRTY=false
+while getopts "d" opt; do
+    case $opt in
+        d) ALLOW_DIRTY=true ;;
+        \?) echo "Invalid option: -$opt" ;;
+    esac
+done
+
 ROOT=$(readlink -f $(dirname $0)/..)
 VERSION=1.$(date +%Y%m%d)
 
@@ -16,20 +23,50 @@ if [ "$(git tag -l v$VERSION)" != "" ]; then
     VERSION=$VERSION-rev$REV
 fi
 
-set -x
 
-git flow release start --showcommands $VERSION
+# Steps:
+#   Pick a version (done by the above loop)
+#   Update debian/changelog using gbp
+#   Create and merge pull request with the updated debian/changelog
+#   Create a github release (using web UI) or gh
+#   Push to pypi using github action
+#   Push to docker using admin/update_docker.sh v$VERSION
+
+CHANGELOG_VERSION=$(dpkg-parsechangelog -l $ROOT/debian/changelog --show-field Version)
+if [[ $CHANGELOG_VERSION == $VERSION ]]; then
+    echo "Debian changelog seems updated"
+else
+    echo "Updating debian changelog (this is surprisingly slow)"
+    EMAIL=$(git config user.email) gbp dch $ROOT --release --new-version=$VERSION --ignore-branch --git-author --debian-tag='v%(version)s' --debian-branch=release/$VERSION --spawn-editor=never
+    echo "Please commit the updated changelog, do a pull request, and get it merged, then run this script again on an up-to-date master branch"
+    exit 0
+fi
 
-# Update _version.py
-$ROOT/admin/update_version.py.sh $VERSION
+cd $(dirname $(readlink -f $0))
 
-# Update debian/changelog
-gbp dch $ROOT --release --new-version=$VERSION --git-author --debian-tag='v%(version)s' --debian-branch=release/$VERSION --spawn-editor=never
+if [[ -n $(git status -s) ]]; then
+    echo "Repository is dirty."
+    git status -s
+    [[ "${ALLOW_DIRTY}" != "true" ]] && exit 1
+fi
 
-git add $ROOT/problemtools/_version.py $ROOT/debian/changelog
-git commit -m "Release of version $VERSION: bump version in problemtools/_version.py and debian/changelog"
+GITTAG=master
+if [[ $(git rev-parse --abbrev-ref HEAD) != ${GITTAG} && $(git describe --exact-match --tags 2>/dev/null) != ${GITTAG} ]]; then
+    echo "Repository is currently not on branch/tag ${GITTAG}."
+    [[ "${ALLOW_DIRTY}" != "true" ]] && exit 1
+fi
+
+THIS_REPO_VERSION=$(git -C $(dirname -- "$0") rev-parse HEAD)
+UPSTREAM_VERSION=$(git -C $(dirname -- "$0") ls-remote upstream master | cut -f1)
+if [[ $THIS_REPO_VERSION != $UPSTREAM_VERSION ]]; then
+    echo "Warning: git head of repo does not match upstream. You likely want to update this repo"
+    [[ "${ALLOW_DIRTY}" != "true" ]] && exit 1
+fi
 
-git flow release finish --showcommands --message "Release $VERSION" $VERSION
 
-echo "After pushing changes to GitHub, please run"
-echo "  $ROOT/admin/update_docker.sh v$VERSION"
+echo "Below is untested, echoing commands instead of running them"
+echo "Creating a draft release on github"
+echo gh -R Kattis/problemtools release create -d v$VERSION
+echo "After finalizing the release on GitHub, please:"
+echo " - trigger the pypi release workflow"
+echo " - run $ROOT/admin/update_docker.sh v$VERSION"
diff --git a/admin/update_docker.sh b/admin/update_docker.sh
index ce7e64e2..fe5d2aba 100755
--- a/admin/update_docker.sh
+++ b/admin/update_docker.sh
@@ -1,51 +1,98 @@
 #!/bin/bash
 set -e
 
-TAG=develop
+ALLOW_DIRTY=false
+GITTAG=master
+DOCKERTAG=develop
 UPDATE_LATEST=false
+
+while getopts "d" opt; do
+    case $opt in
+        d) ALLOW_DIRTY=true ;;
+        \?) echo "Invalid option: -$opt" ;;
+    esac
+done
+
+shift $((OPTIND-1))
+
 if [ "$1" != "" ]; then
-    TAG=$1
+    GITTAG=$1
+    DOCKERTAG=$1
     UPDATE_LATEST=true
 fi
 
-
 cd $(dirname $(readlink -f $0))/docker
-set -x
-
-# Make the build image and extract build artifacts
-# ===============================================
-sudo docker build \
-     -f Dockerfile.build \
-     -t problemtools/build:${TAG} \
-     --no-cache \
-     --build-arg PROBLEMTOOLS_VERSION="${TAG}" \
-     .
+
+if [[ -n $(git status -s) ]]; then
+    echo "Repository is dirty."
+    git status -s
+    [[ "${ALLOW_DIRTY}" != "true" ]] && exit 1
+fi
+
+if [[ $(git rev-parse --abbrev-ref HEAD) != ${GITTAG} && $(git describe --exact-match --tags 2>/dev/null) != ${GITTAG} ]]; then
+    echo "Repository is currently not on branch/tag ${GITTAG}."
+    [[ "${ALLOW_DIRTY}" != "true" ]] && exit 1
+fi
+
+echo "Updating Ubuntu base image"
+docker pull ubuntu:24.04
+
+# Make our internal images, and our githubci image. Order is important, images depend on each other
+echo "Building intermediate images, plus githubci image"
+for IMAGE in runreqs build icpclangs fulllangs githubci; do
+    docker build \
+        -f Dockerfile.${IMAGE} \
+        -t problemtools/${IMAGE}:${DOCKERTAG} \
+        --build-arg PROBLEMTOOLS_VERSION="${DOCKERTAG}" \
+        .
+done
+
+
+echo "Building deb"
 mkdir -p artifacts
-rm -rf artifacts/deb/*
-sudo docker run --rm -v "$(pwd)/artifacts/:/artifacts" problemtools/build:${TAG} cp -r /usr/local/problemtools_build/deb /artifacts
+sudo rm -rf artifacts/deb
+# Use our build image to build a deb
+docker run --rm -v "$(pwd)/../..:/problemtools" -v "$(pwd)/artifacts/deb:/artifacts" problemtools/build:${DOCKERTAG} \
+    /bin/bash -c "
+        set -e ;
+        mkdir /build ;
+        cd /build ;
+        git config --global --add safe.directory /problemtools/.git ;
+        git clone --branch ${GITTAG} /problemtools ;
+        cd problemtools ;
+        make builddeb ;
+        cp ../*.deb /artifacts"
 sudo chown -R $USER:$USER artifacts/
 
 
-# Build the actual problemtools images
-# ===============================================
-for IMAGE in minimal icpc full; do
-    sudo docker build\
-         -f Dockerfile.${IMAGE}\
-         -t problemtools/${IMAGE}:${TAG}\
-         --build-arg PROBLEMTOOLS_VERSION=${TAG}\
-         .
-    if [ "$UPDATE_LATEST" = "true" ]; then
-        sudo docker tag problemtools/${IMAGE}:${TAG} problemtools/${IMAGE}:latest
-    fi
-done
+echo "Testing deb"
+if ! docker run --rm -t -v "$(pwd)/../..:/problemtools" -v "$(pwd)/artifacts/deb:/artifacts" problemtools/fulllangs:${DOCKERTAG} \
+    /bin/bash -c '
+        set -e ;
+        shopt -s extglob ;
+        dpkg -i /artifacts/kattis-problemtools* ;
+        verifyproblem /problemtools/examples/!(README.md)'; then
+            echo Running verifyproblem on all examples failed. Please review output above to debug.;
+            exit 1
+fi
+echo Tests pass
 
 
-# Push to Docker Hub
-# ===============================================
-sudo docker login
+echo "Building complete images with problemtools baked in"
 for IMAGE in minimal icpc full; do
-    sudo docker push problemtools/${IMAGE}:${TAG}
-    if [ "$UPDATE_LATEST" = "true" ]; then
-        sudo docker push problemtools/${IMAGE}:latest
-    fi
+    docker build \
+        -f Dockerfile.${IMAGE} \
+        -t problemtools/${IMAGE}:${DOCKERTAG} \
+        --build-arg PROBLEMTOOLS_VERSION="${DOCKERTAG}" \
+        .
 done
+
+
+if [ "${UPDATE_LATEST}" = "true" ]; then
+    echo "Build complete. If you are happy with the images, run the following:"
+    for IMAGE in minimal icpc full githubci; do
+        echo "    docker tag problemtools/${IMAGE}:${DOCKERTAG} problemtools/${IMAGE}:latest"
+        echo "    docker push problemtools/${IMAGE}:${DOCKERTAG}"
+        echo "    docker push problemtools/${IMAGE}:latest"
+    done
+fi
diff --git a/bin/.run_in_venv.sh b/bin/.run_in_venv.sh
new file mode 100755
index 00000000..dce93afb
--- /dev/null
+++ b/bin/.run_in_venv.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+#
+# Helper script for the other wrapper scripts to check that a venv exists, or
+# give a helpful message if it doesn't.
+
+VENVPATH="$(dirname "$(dirname "$(readlink -f "$0")")")/venv"
+
+if [ ! -x "$VENVPATH/bin/python" ]; then
+    echo "I could not find a python venv at $VENVPATH."
+    echo "To use these wrapper scripts, please set up a venv by:"
+    echo " cd $(dirname "$VENVPATH")"
+    echo " python3 -m venv venv"
+    echo " venv/bin/pip install -r requirements.txt"
+    exit 1
+fi
+
+export PYTHONPATH
+PYTHONPATH="$(dirname "$(dirname "$(readlink -f "$0")")")${PYTHONPATH:+:}$PYTHONPATH"
+exec "$VENVPATH/bin/python" -m "$@"
diff --git a/bin/problem2html.sh b/bin/problem2html.sh
index eaa9d2d9..4068fbb3 100755
--- a/bin/problem2html.sh
+++ b/bin/problem2html.sh
@@ -5,6 +5,4 @@
 # installing problemtools on the system properly, this script should
 # not be used.
 
-export PYTHONPATH
-PYTHONPATH="$(dirname "$(dirname "$(readlink -f "$0")")")${PYTHONPATH:+:}$PYTHONPATH"
-exec python3 -m problemtools.problem2html "$@"
+exec "$(dirname "$(readlink -f "$0")")/.run_in_venv.sh" problemtools.problem2html "$@"
diff --git a/bin/problem2pdf.sh b/bin/problem2pdf.sh
index 949c11e8..8995152d 100755
--- a/bin/problem2pdf.sh
+++ b/bin/problem2pdf.sh
@@ -5,6 +5,4 @@
 # installing problemtools on the system properly, this script should
 # not be used.
 
-export PYTHONPATH
-PYTHONPATH="$(dirname "$(dirname "$(readlink -f "$0")")")${PYTHONPATH:+:}$PYTHONPATH"
-exec python3 -m problemtools.problem2pdf "$@"
+exec "$(dirname "$(readlink -f "$0")")/.run_in_venv.sh" problemtools.problem2pdf "$@"
diff --git a/bin/verifyproblem.sh b/bin/verifyproblem.sh
index 364d70c8..48ce5407 100755
--- a/bin/verifyproblem.sh
+++ b/bin/verifyproblem.sh
@@ -5,6 +5,4 @@
 # installing problemtools on the system properly, this script should
 # not be used.
 
-export PYTHONPATH
-PYTHONPATH="$(dirname "$(dirname "$(readlink -f "$0")")")${PYTHONPATH:+:}$PYTHONPATH"
-exec python3 -m problemtools.verifyproblem "$@"
+exec "$(dirname "$(readlink -f "$0")")/.run_in_venv.sh" problemtools.verifyproblem "$@"
diff --git a/debian/changelog b/debian/changelog
index 9891d94a..8f0eeec6 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,374 @@
+kattis-problemtools (1.20250605) noble; urgency=medium
+
+  [ Tagl ]
+  * Set working directory for submission
+
+  [ Per Austrin ]
+  * small fixes to README.md
+
+  [ Tagl ]
+  * Support overwriting directories within submissions with included directories
+
+  [ JoelNiemela ]
+  * Simplify problem2html argparser and add type annotations
+  * Simplify problem2pdf argparser and add type annotations
+  * Add line break for readability
+  * Add type annotations to verifyproblem
+  * Use long name for langparam
+  * Fix potential error
+  * Use tuple destructuring syntax
+  * Don't remove argparser_basic_arguments
+  * Fix CodeFactor errors
+
+  [ Konráð Elí Sigurgeirsson ]
+  * Update README.md
+
+  [ Joel Niemelä ]
+  * Update README.md
+
+  [ JoelNiemela ]
+  * Update kotlin to version 1.8.10
+
+  [ Harry Zhang ]
+  * Fix not showing WA test case verifyproblem.py
+
+  [ Gunnar Kreitz ]
+  * Fix crash in verifyproblem when in or ans files are not utf-8
+
+  [ Pehr Söderman ]
+  * Restructure problem2html
+  * Restructure restrucure problem2pdf
+  * Improve debugging output
+  * Parse args properly.
+  * Reintroduce the get_subgroup, as it is needed for addproblem.
+  * Reintroduce get_attachment_paths, as it is needed for addproblem.
+
+  [ Tobias Meggendorfer ]
+  * Improved logging, fix unicode error, include_dir buildrun
+  * Guard better agains non-unicode feedback
+  * Revert delayed import
+  * Fix missed init, add test case
+
+  [ Maarten Sijm ]
+  * Explicitly set language versions for Java and Kotlin in languages.yaml
+
+  [ Tobias Meggendorfer ]
+  * Restore old format
+
+  [ Gunnar Kreitz ]
+  * Add github action to run pytest and flake8
+  * Remove travis config, point badges to github badge
+  * Check that all symlinks point to something existing within the problem package
+  * Forbid all absolute symlinks
+
+  [ Simon Lindholm ]
+  * Type annotation fixes
+  * Add -j flag for multi-threaded validation
+  * Compile submissions early, improve cleanup
+
+  [ Pehr Söderman ]
+  * Adding sanity checks for file sizes.
+  * Add UUID as an optional field in problem.yaml
+  * Include examples in the manifest
+  * Fixing running of tests
+  * Build debian packages in ci
+  * Cache and add packages
+  * Warn if sample is empty
+
+  [ Fredrik Niemelä ]
+  * Change team to user for default validator
+
+  [ Pehr Söderman ]
+  * Add a dependency on dvisvgm, which was missing
+
+  [ matistjati ]
+  * Add markdown support
+  * Added display math
+  * Add dependencies for markdown
+  * Style markdown tables
+  * Remove temp files
+  * Statement fix
+  * Some refactoring
+  * Added image support in markdown
+  * Added footnote support
+  * Code cleanup
+  * md -> html works
+
+  [ Gunnar Kreitz ]
+  * Remove non-standard judgeerror.txt from example problems
+
+  [ matistjati ]
+  * Make md styling more constistent with latex
+
+  [ Pehr Söderman ]
+  * Bump the language versions for c and c++
+  * Bump the language versions for Java
+
+  [ matistjati ]
+  * md->pdf and Reorganize code
+
+  [ Pehr Söderman ]
+  * GCC should use gnu17
+
+  [ matistjati ]
+  * Better md->pdf tables
+  * Interactive samples for pdf
+  * Remove bplusa
+  * PDF problem name
+  * Add dependencies
+  * Add problem names
+  * Added problem name to test hello package
+  * Improve security by running pandoc without shell capabilities
+  * Refactoring
+  * Even more refactoring
+  * Remove python3-markdown dependency
+  * Add problem id to pdf and small fixes
+
+  [ Pehr Söderman ]
+  * Update languages.yaml
+
+  [ Tagl ]
+  * Run interactive validation with submission's working directory
+
+  [ Matistjati ]
+  * Change Rust compilation flags
+
+  [ Hugo Söderbergh ]
+  * Remove deprecated functionality
+  * add build to .gitignore
+
+  [ JoelNiemela ]
+  * Add special case error message when user output file is empty
+  * Modify error message according to github comment
+
+  [ Hugo Söderbergh ]
+  * add command-line argument, begin generalizing Problem class
+  * small fix
+  * abstract problems further
+  * catch general exception for detecting problem-format
+  * Add some documentation
+  * New abstraction, ProblemPart which makes it easier to implement parts of problems
+  * Problem is no longer an abstract class
+  * ProblemStatement now exists for old and new format
+  * Add TODO for ProblemStatement
+  * Fix issues with ProblemStatement
+  * Add some documentation and some final fixes
+  * Small change
+  * Allow to give class-type for part in Problem.get
+  * Whoops small bug crashed code
+  * Fix bug that crashed multithreading for testcase-validation
+  * Mark ProblemPart.depends_on() as staticmethod
+
+  [ Matistjati ]
+  * Disable html
+  * Change to wikimedia example image
+  * Sanitize image sources
+  * Remove SVG dependency
+  * Better markdown styling
+  * Better sample styling
+  * Add \nextsample and \remainingsamples
+  * Better pdf error handling
+  * Use {{nextsample}} instead of \nextsample
+  * Relax image checking (implied by global regex on filenames)
+  * Add svg dependency
+
+  [ Gunnar Kreitz ]
+  * Explicitly install build-essential, as deb building blows up on it not being installed
+
+  [ Hugo Söderbergh ]
+  * fix issues with PR
+
+  [ Gunnar Kreitz ]
+  * Remove test of verifyproblem.generators (which has been removed)
+
+  [ Hugo Söderbergh ]
+  * Remove bad break-statement and increase consistency in dictionary access
+  * more concise regex
+  * Make Problem constructor default to legacy format
+  * make tests pass
+
+  [ Matistjati ]
+  * Add back warning/error logging
+
+  [ Gunnar Kreitz ]
+  * Add mypy to github workflow
+  * Change type from list to tuple, helping mypy and being clearer
+  * Fix name of exception (old one also worked, as parser does import * from Scanner, but felt weird)
+  * Add type annotations and abstract class markers
+  * Add getProblemPart for when we need to access problem.classes
+  * Add python tooling files (and vim swp files) to gitignore
+  * Fix signatures of run in VIVA and checktestdata to match superclass
+  * Fix/ignore type errors to let mypy catch errors everywhere but generatedata.py
+
+  [ ElliotRipa ]
+  * Make cls templates able work with either problem format
+  * Allow problem statement to use either problem format
+  * Make template.py detect format version instead
+  * Provisional updates
+  * Add formatversion.py
+  * Minor fixes in imports
+  * Move version specific functionality to separate file
+  * Change to flag '-v' for format-version
+  * Add missing parentheses
+  * Use dictionary instead of data objects for format data
+  * Make problem2html.py use -v to specify format version
+  * Add constants for version names
+  * Rollback problemset_0.1.cls
+  * Move initialisation of FORMAT_DATA to setup
+  * Make formatversion.py use dataobjects instead of dicts
+  * Fix documentation
+  * Remove unnecessary initialisation
+
+  [ Matistjati ]
+  * Start sanitization + apply feedback
+  * Better sanitization + lots of tests
+  * problem_statement -> statement
+  * Better md -> pdf sample rendering
+  * Another escape
+  * More careful with images
+  * Make samplexss more focused
+  * Experimentally reuse normal LaTeX rendering
+  * Use problemtools problem2pdf to handle md -> pdf
+  * Cleanup
+  * librsvg out of focus for this PR
+  * Ensure nh3
+  * Remove ghostscript sanitization. If it wasn't used before, it probably isn't needed
+  * Add nh3 to deb build
+  * Linting
+  * Add back ghostscript sanitization
+  * Remove unnecessary test
+
+  [ Gunnar Kreitz ]
+  * Add make clean to clean up support and the mess left by setuptools
+  * Change debian packaging to dh_virtualenv
+  * Update readme, adjusting installation instructions so we can use pip dependencies
+  * Convert from setup.py to pyproject.toml (and use setuptools-scm for versioning)
+  * Hook sdist to make python -m build work on a clean checkout
+  * Update wrapper scripts and README
+  * Update CI workflow to match readme for build requirements (plus build-essentials)
+  * Force dh_virtualenv to use builtin venv (debugging CI crash)
+  * Restructure CI/CD to separate deb building from python unit tests
+  * Stop exposing __version__, users should use importlib.metadata.version instead
+  * Hardcode path to python, as dh_virtualenv fails to discover it in CI
+  * Clean up version parsing. Accept 2023-07-draft and 2023-07 version strings
+  * Add pydantic models for parsing problem.yaml
+  * Limit problem.yaml config to only system defaults
+  * Use new metadata parsing mechanism, and start parsing config for 2023-07.
+  * Bump python version to 3.11
+  * Move tests to outside of the package
+  * Update manifest to include tests support files in sdist, and remove some clutter
+  * Remove old hack for plasTeX argument (we require >= 3.0 now)
+  * Clean up incompletely removed plastex_escape hack. Remove unused variable
+  * Clean up unused variables, old io import, and multiple commands on lines
+  * Clean up unused import and comparison with None
+  * Ruff format
+  * Clean up imports
+  * Fix minor things flagged by ruff
+  * Remove unused variables in tests
+  * Add ruff configuration
+  * Apply ruff formatting
+  * Add ruff pre-commit hook
+  * Replace flake8 with ruff (both linting and formatting)
+  * Fix incorrect formatting of pydantic errors
+  * Move is_interactive and is_scoring to be read from problem metadata directly
+  * Let Problem read and store problem format information. Warn about incomplete 2023-07 support
+  * Fix validator discovery for 2023-07. Run through all validation for 2023-07 (even if broken)
+
+  [ Matistjati ]
+  * Add nh3 as dependency
+  * Fix test import path
+  * Apply ruff formatting
+  * More robust footnote finding
+  * Don't double-escape HTML in samples
+  * Ghostscript fixes and tests
+
+  [ Gunnar Kreitz ]
+  * Fix loading a problem with empty problem.yaml and with no statements
+  * Add utility method to load problem metadata, including names from statements when needed
+  * Use load_metadata in verifyproblem. Add temporary fallback conf to fix crashes when failing to load metadata
+  * Use load_metadata in statement_util
+  * Add apt-get update in workflow to unbreak CI
+
+  [ Matistjati ]
+  * Convert some example problems to 2023-07-draft
+  * Add uuid to guess and oddecho
+  * Better formatting and error for output_validators
+
+  [ Pehr Söderman ]
+  * Add missing build requirements to debian build
+  * Update pyproject
+
+  [ Matistjati ]
+  * Remove now-duplicated import
+
+  [ Gunnar Kreitz ]
+  * Remove (AFAICT, broken) support for ancient tex statements (0.1)
+  * Fix bug where we complained about missing show_test_data_groups for non-legacy
+  * Default language to en. Remove unused --format-version
+  * Pass Template a filename to render, and pass that through to the latex template
+  * Rename problem.md to problem.en.md in tests to follow 2023-07-draft
+  * Refactor of rendering: unify statement finding code, and use Path more
+  * Use statement_util to find statements. Add more checks. Try rendering even when there are multiple statements in a language
+  * Make mypy more picky, also checking PlasTeX usage
+  * Simplify temporary file usage in markdown -> pdf flow
+  * Fix bug where problem2html cd:s to bad directory, crashing validation of multiple problems
+
+  [ Pehr Söderman ]
+  * Update link to kattis controlled domain.
+
+  [ Gunnar Kreitz ]
+  * Replace formatversion.FormatData with a StrEnum
+  * Add some documentation in the readme regarding current state of format versions
+  * Add colorlog to get colors for warnings and errors #312
+  * Add Swedish problem names
+  * Fix the logging plasTeX destroys
+  * Remove accidental commit
+  * Fix misleading error when missing problem statemetns
+  * Improve image handling in markdown statements
+  * Change URL to one that passes filename suffix filter
+  * Restructure error counters. Fix errors happening prior to check being ignored in count.
+  * Fix --bail_on_error and --werror being ignored before check
+  * Refactor problem loading so we can do fatal errors in setup
+  * Check file and directory names per standard
+  * Make missing/compilation failure in grader/output validator fatal
+  * Restore old API for accessing parts of a problem. Simplify part setup
+  * Change type of attachments.attachments from list[str] to list[Path]
+  * Convert Problem.metadata to a property to align better with other naming
+  * Expose computed timelim
+  * Add back problemtools.run.get_tool_path to API
+  * Large restructure of how our docker images are built.
+  * Remove old Dockerfile in root, unused afaict
+  * Add marker to let mypy use our type annotations
+  * Replace authors with Kattis AB (pypi only shows one). Set required python version
+  * Add script to build packages for pypi
+  * Fix incorrect error when verifying different. Add helpful hint when directory for wrong version exists
+  * Use problemtools/githubci image in workflow. Run verifyproblem on all examples.
+  * Check for incompatible types. Warn for unimplemented types
+  * Check format of interaction samples. #277 Don't warn about empty sample when it contains interactions.
+  * Add type methods for all types. Add convenience methods on Problem for easier access
+  * Improve warning for non-standard output validator languages #258
+  * Remove generatedata (never made it into the standard)
+  * Fix pytest dropping a guess.pdf in working directory. Check PDF magic bytes
+  * Add -d flag to update_docker to allow easier testing locally
+  * Fix broken git clone command (`${TAG}` expanded to empty string)
+  * Fix silly error in docker file, causing apt-get update not to run
+  * Add -d option to allow building in a dirty rep (to facilitate development of build scripts)
+  * Workflow that builds and pushes a package to testpypi
+  * Fix version computation when we build pypi packages
+  * Fix syntax error in github workflow file
+  * Fix bug where we crashed if we attempted to load/check twice
+  * Error if problem name exits in a language without a statement
+  * Add utility function uses_default_validator for output validation. Warn/error on multiple validators
+  * Fix new mypy error in mypy 1.16
+  * Fix missing support for imgbasedir in md2html
+  * Fix typo in Dockerfile.full causing it to lack a lot of languages
+  * Remove year from license - IANAL, but AFAICT it's not needed
+  * Add warning to pypi package script pointing to the github action now that that's set up
+  * Change update_docker to default to building from master (but keep :develop tag on docker)
+  * Add convenient way to run docker. Document need to install languages.
+
+ -- Gunnar Kreitz <gkreitz@kattis.com>  Thu, 05 Jun 2025 10:59:57 +0200
+
 kattis-problemtools (1.20231016) jammy; urgency=medium
 
   [ Don-Khue Le ]
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index ec635144..00000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-9
diff --git a/debian/control b/debian/control
index 0a635887..a6b27d74 100644
--- a/debian/control
+++ b/debian/control
@@ -2,13 +2,13 @@ Source: kattis-problemtools
 Section: devel
 Priority: optional
 Maintainer: Per Austrin <austrin@kattis.com>
-Build-Depends: debhelper (>= 8.0.0), g++ (>= 4.8), dh-python, python3, python3-setuptools, python3-pytest, python3-yaml, python3-setuptools, python3-pytest, libboost-regex-dev, libgmp-dev, automake, autoconf
+Build-Depends: debhelper-compat (= 13), g++ (>= 4.8), dh-virtualenv, python3, libboost-regex-dev, libgmp-dev, automake, autoconf, git, python3-venv, dpkg-dev
 Standards-Version: 3.9.4
 Homepage: https://github.com/Kattis/problemtools
 
 Package: kattis-problemtools
 Architecture: any
-Depends: ${shlibs:Depends}, ${python3:Depends}, ${misc:Depends}, python3-plastex, python3-pkg-resources, texlive-plain-generic, texlive-fonts-recommended, texlive-latex-extra, texlive-lang-cyrillic, tidy, ghostscript
+Depends: ${shlibs:Depends}, ${misc:Depends}, pandoc, python3, texlive-plain-generic, texlive-fonts-recommended, texlive-latex-extra, texlive-lang-cyrillic, tidy, ghostscript, dvisvgm
 Recommends: gcc, g++
 Description: Kattis Problem Tools
  These are tools to manage and verify problem packages in the
diff --git a/debian/kattis-problemtools.links b/debian/kattis-problemtools.links
new file mode 100644
index 00000000..39354ac1
--- /dev/null
+++ b/debian/kattis-problemtools.links
@@ -0,0 +1,3 @@
+opt/venvs/kattis-problemtools/bin/verifyproblem usr/bin/verifyproblem
+opt/venvs/kattis-problemtools/bin/problem2pdf usr/bin/problem2pdf
+opt/venvs/kattis-problemtools/bin/problem2html usr/bin/problem2html
diff --git a/debian/rules b/debian/rules
index 76a72e36..2c10725f 100755
--- a/debian/rules
+++ b/debian/rules
@@ -4,12 +4,17 @@
 # Uncomment this to turn on verbose mode.
 #export DH_VERBOSE=1
 
-# Uncomment this to turn off cleanup.
-export PYBUILD_DISABLE=clean
+%:
+	dh $@ --with python-virtualenv
 
-export PYBUILD_AFTER_CLEAN=make -C support distclean
-export PYBUILD_TEST_PYTEST=1
-export no_proxy=github.com
+override_dh_virtualenv:
+	dh_virtualenv --builtin-venv --python /usr/bin/python3
 
-%:
-	dh $@ --with python3 --buildsystem=pybuild
+override_dh_strip:
+	dh_strip --exclude=/PIL/ --exclude=/pillow.libs/
+
+override_dh_shlibdeps:
+	dh_shlibdeps -X/x86/ -X/PIL/.libs/ -X/pillow.libs/
+
+override_dh_dwz:
+	dh_dwz --exclude=/PIL/ --exclude=/pillow.libs/
diff --git a/examples/README.md b/examples/README.md
index 2f6107a3..9d7f9ee5 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -24,4 +24,5 @@ more than one language.
 ## oddecho
 
 This is an example of a *scoring* problem where submissions can get
-different scores depending on which test groups they solve. It also demonstrates how an input validator might check different constraints for different test groups.
+different scores depending on which test groups they solve. It also demonstrates how an input validator might check different constraints for different test groups. The swedish statement showcases how to use images, footnotes
+and tables in Markdown.
diff --git a/examples/different/output_validators/different_validator/validate.h b/examples/different/output_validators/different_validator/validate.h
index 00c896a7..4f653ff1 100644
--- a/examples/different/output_validators/different_validator/validate.h
+++ b/examples/different/output_validators/different_validator/validate.h
@@ -56,7 +56,6 @@ const int EXITCODE_AC = 42;
 const int EXITCODE_WA = 43;
 const std::string FILENAME_AUTHOR_MESSAGE = "teammessage.txt";
 const std::string FILENAME_JUDGE_MESSAGE = "judgemessage.txt";
-const std::string FILENAME_JUDGE_ERROR = "judgeerror.txt";
 const std::string FILENAME_SCORE = "score.txt";
 
 #define USAGE "%s: judge_in judge_ans feedback_dir < author_out\n"
@@ -107,7 +106,7 @@ void wrong_answer(const std::string &msg, ...) {
 void judge_error(const std::string &msg, ...) {
     va_list pvar;
     va_start(pvar, msg);
-    vreport_feedback(FILENAME_JUDGE_ERROR, msg, pvar);
+    vreport_feedback(FILENAME_JUDGE_MESSAGE, msg, pvar);
     assert(0);
 }
 
diff --git a/examples/different/problem.yaml b/examples/different/problem.yaml
index 279a8acb..c67e0aa3 100644
--- a/examples/different/problem.yaml
+++ b/examples/different/problem.yaml
@@ -5,6 +5,8 @@
 ## Author of the problem (default: null)
 # author:
 
+name: A Different Problem
+
 ## Where the problem was first used (default: null)
 source: Kattis
 # source_url:
diff --git a/examples/guess/output_validators/guess_validator/validate.cc b/examples/guess/output_validator/guess_validator/validate.cc
similarity index 100%
rename from examples/guess/output_validators/guess_validator/validate.cc
rename to examples/guess/output_validator/guess_validator/validate.cc
diff --git a/examples/guess/output_validators/guess_validator/validate.h b/examples/guess/output_validator/guess_validator/validate.h
similarity index 97%
rename from examples/guess/output_validators/guess_validator/validate.h
rename to examples/guess/output_validator/guess_validator/validate.h
index 00c896a7..4f653ff1 100644
--- a/examples/guess/output_validators/guess_validator/validate.h
+++ b/examples/guess/output_validator/guess_validator/validate.h
@@ -56,7 +56,6 @@ const int EXITCODE_AC = 42;
 const int EXITCODE_WA = 43;
 const std::string FILENAME_AUTHOR_MESSAGE = "teammessage.txt";
 const std::string FILENAME_JUDGE_MESSAGE = "judgemessage.txt";
-const std::string FILENAME_JUDGE_ERROR = "judgeerror.txt";
 const std::string FILENAME_SCORE = "score.txt";
 
 #define USAGE "%s: judge_in judge_ans feedback_dir < author_out\n"
@@ -107,7 +106,7 @@ void wrong_answer(const std::string &msg, ...) {
 void judge_error(const std::string &msg, ...) {
     va_list pvar;
     va_start(pvar, msg);
-    vreport_feedback(FILENAME_JUDGE_ERROR, msg, pvar);
+    vreport_feedback(FILENAME_JUDGE_MESSAGE, msg, pvar);
     assert(0);
 }
 
diff --git a/examples/guess/problem.yaml b/examples/guess/problem.yaml
index fcb51934..8744f5e7 100644
--- a/examples/guess/problem.yaml
+++ b/examples/guess/problem.yaml
@@ -1,10 +1,16 @@
+problem_format_version: 2023-07-draft
+uuid: 5ca6ba5b-36d5-4eff-8aa7-d967cbc4375e
 source: Kattis
 license: cc by-sa
 
-validation: custom interactive
+type: interactive
+name:
+  en: Guess the Number
+  sv: Gissa talet
 
 # Override standard limits: say that the TLE solutions provided should
 # be at least 4 times above the time limit in order for us to be
 # happy.
 limits:
-  time_safety_margin: 4
+  time_multipliers:
+    time_limit_to_tle: 4
diff --git a/examples/guess/problem_statement/problem.en.tex b/examples/guess/statement/problem.en.tex
similarity index 100%
rename from examples/guess/problem_statement/problem.en.tex
rename to examples/guess/statement/problem.en.tex
diff --git a/examples/guess/statement/problem.sv.md b/examples/guess/statement/problem.sv.md
new file mode 100644
index 00000000..9c49030c
--- /dev/null
+++ b/examples/guess/statement/problem.sv.md
@@ -0,0 +1,20 @@
+Jag tänker på ett hemligt tal mellan $1$ and $100$, kan du gissa vilket?
+Givet en gissning kommer jag att berätta om din gissning 
+var för stor, för liten eller rätt. Du får bara $10$ gissningar, använd 
+dem klokt!
+
+
+# Interaktion
+Ditt program ska skriva ut gissningar om talet.
+En gissning är en rad som enbart innehåller ett heltal mellan $1$ och $1000$.
+Efter varje gissning måste du flusha standard out.
+
+Efter varje gissning kan du läs svaret på standard in.
+Detta svar är ett av tre ord:
+
+- `lower` om talet jag tänker på är lägre än din gissning,
+- `higher` om talet jag tänker på är högre än din gissning, eller
+- `correct` om din gissning är korrekt.
+
+Efter att ha gissat rätt ska du avsluta ditt program.
+Om du gissar fel $10$ gånger får du inga fler chanser och ditt program kommer avbrytas.
diff --git a/examples/hello/problem.yaml b/examples/hello/problem.yaml
index 194b060f..bc12a981 100644
--- a/examples/hello/problem.yaml
+++ b/examples/hello/problem.yaml
@@ -1,5 +1,6 @@
 source: Kattis
 license: public domain
+name: Hello World!
 
 # Fix memory limit at 512 MB.  (Note that for most problems, this
 # should not be done.  It is only done in this case because we include
diff --git a/examples/hello/submissions/accepted/hello.rs b/examples/hello/submissions/accepted/hello.rs
new file mode 100644
index 00000000..47ad8c63
--- /dev/null
+++ b/examples/hello/submissions/accepted/hello.rs
@@ -0,0 +1,3 @@
+fn main() {
+    println!("Hello World!");
+}
diff --git a/examples/oddecho/input_format_validators/validator/validator.cpp b/examples/oddecho/input_validators/validator/validator.cpp
similarity index 100%
rename from examples/oddecho/input_format_validators/validator/validator.cpp
rename to examples/oddecho/input_validators/validator/validator.cpp
diff --git a/examples/oddecho/input_format_validators/validator/validator.h b/examples/oddecho/input_validators/validator/validator.h
similarity index 100%
rename from examples/oddecho/input_format_validators/validator/validator.h
rename to examples/oddecho/input_validators/validator/validator.h
diff --git a/examples/oddecho/problem.yaml b/examples/oddecho/problem.yaml
index 1fcd5e21..06b2e7df 100644
--- a/examples/oddecho/problem.yaml
+++ b/examples/oddecho/problem.yaml
@@ -1,7 +1,9 @@
+problem_format_version: 2023-07-draft
+uuid: 025dfeea-eb85-4532-94d1-3108ec03c80f
 license: cc by-sa
-author: Johan Sannemo
+credits: Johan Sannemo
 source: Principles of Algorithmic Problem Solving
 type: scoring
-name: Echo
-grading:
-    show_test_data_groups: true
+name:
+  en: Odd Echo
+  sv: Udda eko
diff --git a/examples/oddecho/statement/cave.jpg b/examples/oddecho/statement/cave.jpg
new file mode 100644
index 00000000..670bbeda
Binary files /dev/null and b/examples/oddecho/statement/cave.jpg differ
diff --git a/examples/oddecho/problem_statement/problem.en.tex b/examples/oddecho/statement/problem.en.tex
similarity index 100%
rename from examples/oddecho/problem_statement/problem.en.tex
rename to examples/oddecho/statement/problem.en.tex
diff --git a/examples/oddecho/statement/problem.sv.md b/examples/oddecho/statement/problem.sv.md
new file mode 100644
index 00000000..55f9806f
--- /dev/null
+++ b/examples/oddecho/statement/problem.sv.md
@@ -0,0 +1,33 @@
+**EKO! Eko! Ek...**
+
+![CC-BY-SA 2.0 By William Craig on wikimedia.org](cave.jpg)
+
+Du älskar att skrika i grottor för att höra dina ord ekade tillbaka till dig. Tyvärr, som en hårt arbetande mjukvaruingenjör, har du
+inte tid för att komma ut och skrika i grottor så ofta. Istället skulle du vilja implementera ett program som fungerar som en ersättning för en grotta.
+
+Ibland vill du mata in några ord i programmet och få dem ekade tillbaka till dig. Men, som det är välkänt, om du skriker för snabbt i en grotta kan ekot störa de nya ord du säger. [^1] Mer specifikt, vartannat ord du säger kommer att störa ekot av ditt tidigare ord. Därför kommer endast det första, tredje, femte och så vidare ordet faktiskt att producera ett eko.
+
+Din uppgift är att skriva ett program som simulerar detta beteende.
+
+## Indata
+
+Den första raden av indata innehåller ett heltal $N$ ($1 \le N \le 10$).
+
+De följande $N$ raderna innehåller vardera ett ord. Varje ord är högst $100$ bokstäver långt och innehåller endast bokstäverna `a-z`.
+
+## Utdata
+
+Skriv ut de ord som har udda index (dvs. första, tredje, femte och så vidare) i inmatningen.
+
+
+## Poängsättning
+
+Din lösning kommer att testas på en mängd testfallsgrupper.
+För att få poäng för en grupp så måste du klara alla testfall i gruppen.
+
+| Grupp | Poäng | Begränsningar            |
+|-------|-------|--------------------------|
+| 1     | 1     | $N$ är alltid $5$        |
+| 2     | 1     | Inga ytterligare begränsningar |
+
+[^1]: [https://sv.wikipedia.org/wiki/Interferens](https://sv.wikipedia.org/wiki/Interferens)
diff --git a/mypy.ini b/mypy.ini
new file mode 100644
index 00000000..1a4c5a69
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,6 @@
+[mypy]
+ignore_missing_imports = False
+follow_untyped_imports = True
+install_types = True
+check_untyped_defs = True
+ignore_errors = False
diff --git a/problemtools/ProblemPlasTeX/ProblemsetMacros.py b/problemtools/ProblemPlasTeX/ProblemsetMacros.py
index 31794c98..b5bd4f41 100644
--- a/problemtools/ProblemPlasTeX/ProblemsetMacros.py
+++ b/problemtools/ProblemPlasTeX/ProblemsetMacros.py
@@ -1,16 +1,15 @@
 import sys
 import os
 import os.path
-import io
 from plasTeX.DOM import Node
 from plasTeX.Base import Command
 from plasTeX.Base import DimenCommand
 from plasTeX.Logging import getLogger
-import plasTeX.Packages.graphics as graphics
 
 log = getLogger()
 status = getLogger('status')
 
+
 # Ugly hack: assume textwidth is 600pt.  True for Kattis but not in
 # general.
 class textwidth(DimenCommand):
@@ -25,7 +24,7 @@ def clean_width(width):
     nodes = width.childNodes
     if len(nodes) != 2 or nodes[1].nodeName != 'textwidth':
         return width
-    return '%.2f%%' % (100*float(nodes[0]))
+    return '%.2f%%' % (100 * float(nodes[0]))
 
 
 # \problemheader
@@ -33,9 +32,8 @@ class problemheader(Command):
     args = 'title id:str'
 
     def invoke(self, tex):
-        res = Command.invoke(self, tex)
-        timelimfile = os.path.join(os.path.dirname(tex.filename),
-                                   '..', '.timelimit')
+        super().invoke(tex)
+        timelimfile = os.path.join(os.path.dirname(tex.filename), '..', '.timelimit')
         if os.path.isfile(timelimfile):
             self.attributes['timelim'] = open(timelimfile, 'r').read()
 
@@ -45,10 +43,10 @@ class sampletable(Command):
     args = 'header1 file1:str header2 file2:str'
 
     def read_sample_file(self, filename):
-        return io.open(filename, 'r', encoding='utf-8').read()
+        return open(filename, 'r', encoding='utf-8').read()
 
     def invoke(self, tex):
-        res = Command.invoke(self, tex)
+        super().invoke(tex)
         dir = os.path.dirname(tex.filename)
         file1 = os.path.join(dir, self.attributes['file1'])
         file2 = os.path.join(dir, self.attributes['file2'])
@@ -67,28 +65,32 @@ class sampletableinteractive(Command):
     args = 'header read write file:str'
 
     def read_sample_interaction(self, filename):
-        data = io.open(filename, 'r', encoding='utf-8').read()
+        data = open(filename, 'r', encoding='utf-8').read()
         messages = []
-        cur_msg = []
+        cur_msg: list[str] = []
         cur_mode = None
         for line in data.split('\n'):
-            if not line: continue
-            if line[0] == '<': mode = 'read'
-            elif line[0] == '>': mode = 'write'
-            else: continue
+            if not line:
+                continue
+            if line[0] == '<':
+                mode = 'read'
+            elif line[0] == '>':
+                mode = 'write'
+            else:
+                continue
             line = line[1:]
             if mode != cur_mode:
-                if cur_mode: messages.append({'mode': cur_mode,
-                                              'data': '\n'.join(cur_msg)})
+                if cur_mode:
+                    messages.append({'mode': cur_mode, 'data': '\n'.join(cur_msg)})
                 cur_msg = []
             cur_msg.append(line)
             cur_mode = mode
-        if cur_mode: messages.append({'mode': cur_mode,
-                                      'data': '\n'.join(cur_msg)})
+        if cur_mode:
+            messages.append({'mode': cur_mode, 'data': '\n'.join(cur_msg)})
         return messages
 
     def invoke(self, tex):
-        res = Command.invoke(self, tex)
+        super().invoke(tex)
         dir = os.path.dirname(tex.filename)
         file = os.path.join(dir, self.attributes['file'])
         try:
@@ -104,21 +106,19 @@ def invoke(self, tex):
 # \includegraphics implementation)
 class _graphics_command(Command):
     def invoke(self, tex):
-        res = Command.invoke(self, tex)
+        res = super().invoke(tex)
 
         # Overcome plasTeX bug by looking for love in the right place
+        assert self.ownerDocument is not None  # Keep mypy happy
         basetex = self.ownerDocument.userdata['base_tex_instance']
         f = self.attributes['file']
-        ext = self.ownerDocument.userdata.getPath(
-                      'packages/graphicx/extensions',
-                      ['.png', '.jpg', '.jpeg', '.gif', '.pdf'])
-        paths = self.ownerDocument.userdata.getPath(
-                        'packages/graphicx/paths', [os.path.dirname(basetex.filename)])
-        img = None
+        ext = self.ownerDocument.userdata.getPath('packages/graphicx/extensions', ['.png', '.jpg', '.jpeg', '.gif', '.pdf'])
+        paths = self.ownerDocument.userdata.getPath('packages/graphicx/paths', [os.path.dirname(basetex.filename)])
+        img: str | None = None
         # Check for file using graphicspath
         for p in paths:
-            for e in ['']+ext:
-                fname = os.path.join(p, f+e)
+            for e in [''] + ext:
+                fname = os.path.join(p, f + e)
                 if os.path.isfile(fname):
                     img = os.path.abspath(fname)
                     break
@@ -127,14 +127,14 @@ def invoke(self, tex):
 
         # Check for file using kpsewhich
         if img is None:
-            for e in ['']+ext:
+            for e in [''] + ext:
                 try:
-                    img = os.path.abspath(basetex.kpsewhich(f+e))
+                    img = os.path.abspath(basetex.kpsewhich(f + e))
                     break
                 except (OSError, IOError):
                     pass
 
-        if not os.path.isfile(img):
+        if img is None or not os.path.isfile(img):
             log.warning('Could not identify image "%s"' % f)
 
         self.imageoverride = img
@@ -147,17 +147,20 @@ class illustration(_graphics_command):
 
     def invoke(self, tex):
         res = _graphics_command.invoke(self, tex)
-        self.style['width'] = '%.2f%%' % (100*self.attributes['width'])
+        self.style['width'] = '%.2f%%' % (100 * self.attributes['width'])
         return res
 
+
 # Dummy for \fontencoding to suppress warnings
 class fontencoding(Command):
     args = 'charset:str'
 
+
 # Dummy for \selectfont to suppress warnings.
 class selectfont(Command):
     pass
 
+
 # Dummy for \ExecuteOptions to suppress warnings.
 class ExecuteOptions(Command):
     pass
diff --git a/problemtools/ProblemPlasTeX/__init__.py b/problemtools/ProblemPlasTeX/__init__.py
index f0a608e7..b136ab7a 100644
--- a/problemtools/ProblemPlasTeX/__init__.py
+++ b/problemtools/ProblemPlasTeX/__init__.py
@@ -2,7 +2,6 @@
 import os
 import shutil
 import subprocess
-import plasTeX.Renderers
 from plasTeX.Renderers.PageTemplate import Renderer
 from plasTeX.Filenames import Filenames
 from plasTeX.Imagers import Image
@@ -10,15 +9,15 @@
 
 log = getLogger()
 
+
 # Adapted from plasTeX.Imagers.Imager class
 class ImageConverter(object):
     fileExtension = '.png'
     imageAttrs = ''
     imageUnits = ''
 
-    imageTypes = ['.png', '.jpg', '.jpeg', '.gif'] #, '.svg']
-    imageConversion = {'.pdf': ['.png',
-                                ['gs', '-dUseCropBox', '-sDEVICE=pngalpha', '-r300', '-o']]}
+    imageTypes = ['.png', '.jpg', '.jpeg', '.gif']  # , '.svg']
+    imageConversion = {'.pdf': ('.png', ['gs', '-dUseCropBox', '-sDEVICE=pngalpha', '-r300', '-o'])}
 
     def __init__(self, document):
         self.config = document.config
@@ -28,10 +27,13 @@ def __init__(self, document):
         self.staticimages = {}
 
         # Filename generator
-        self.newFilename = Filenames(self.config['images'].get('filenames'),
-                                     None,
-                                     variables={'jobname': document.userdata.get('jobname', '')},
-                                     extension=self.fileExtension, invalid={})
+        self.newFilename = Filenames(
+            self.config['images'].get('filenames'),
+            None,
+            variables={'jobname': document.userdata.get('jobname', '')},
+            extension=self.fileExtension,
+            invalid={},
+        )
 
     def close(self):
         return
@@ -55,14 +57,14 @@ def getImage(self, node):
             if oldext in self.imageConversion:
                 # Need to convert image
                 newext = self.imageConversion[oldext][0]
-                path = os.path.splitext(path)[0]+newext
+                path = os.path.splitext(path)[0] + newext
                 cmd = self.imageConversion[oldext][1] + [path, name]
                 status = subprocess.call(cmd)
                 if status:
                     log.warning('Failed to convert %s image "%s to %s', oldext, name, newext)
             else:
                 # Just copy it
-                path = os.path.splitext(path)[0]+oldext
+                path = os.path.splitext(path)[0] + oldext
                 shutil.copyfile(name, path)
             img = Image(path, self.ownerDocument.config['images'])
             self.staticimages[name] = img
@@ -74,25 +76,25 @@ def getImage(self, node):
         return None
 
 
-
-
 class ProblemRenderer(Renderer):
-    """ Renderer for ProblemHTML documents """
+    """Renderer for ProblemHTML documents"""
 
     fileExtension = '.html'
     imageTypes = ['.png', '.jpg', '.jpeg', '.gif']
     vectorImageTypes = ['.svg']
 
-    def render(self, document):
-        templatepaths = [os.path.join(os.path.dirname(__file__), '../templates/html'),
-                         os.path.join(os.path.dirname(__file__), '../../templates/html'),
-                         '/usr/lib/problemtools/templates/html']
+    def render(self, document, postProcess=None):
+        templatepaths = [
+            os.path.join(os.path.dirname(__file__), '../templates/html'),
+            os.path.join(os.path.dirname(__file__), '../../templates/html'),
+            '/usr/lib/problemtools/templates/html',
+        ]
         templatepath = None
         for p in templatepaths:
             if os.path.isdir(p):
                 templatepath = p
                 break
-        if templatepath == None:
+        if templatepath is None:
             raise Exception('Could not find templates needed for conversion to HTML')
 
         # Ugly but unfortunately PlasTeX is quite inflexible when it comes to
@@ -113,8 +115,7 @@ def processFileContent(self, document, s):
         s = Renderer.processFileContent(self, document, s)
 
         # Force XHTML syntax on empty tags
-        s = re.compile(r'(<(?:hr|br|img|link|meta)\b.*?)\s*/?\s*(>)',
-                       re.I|re.S).sub(r'\1 /\2', s)
+        s = re.compile(r'(<(?:hr|br|img|link|meta)\b.*?)\s*/?\s*(>)', re.I | re.S).sub(r'\1 /\2', s)
 
         # Remove empty paragraphs
         s = re.compile(r'<p>\s*</p>', re.I).sub(r'', s)
diff --git a/problemtools/ProblemPlasTeX/graphicx.py b/problemtools/ProblemPlasTeX/graphicx.py
index 6fa954d4..e9669397 100644
--- a/problemtools/ProblemPlasTeX/graphicx.py
+++ b/problemtools/ProblemPlasTeX/graphicx.py
@@ -1,9 +1,10 @@
 import plasTeX.Packages.graphics as graphics
-from ProblemsetMacros import _graphics_command, clean_width
+from problemtools.ProblemPlasTeX.ProblemsetMacros import _graphics_command, clean_width
 
 # Reimplementation of graphicx package because plasTeX is broken and
 # annoying.
 
+
 class includegraphics(_graphics_command):
     args = '* [ options:dict ] file:str'
     packageName = 'graphicx'
@@ -21,8 +22,10 @@ def invoke(self, tex):
                 self.style['width'] = clean_width(width)
         return res
 
+
 class DeclareGraphicsExtensions(graphics.DeclareGraphicsExtensions):
     packageName = 'graphicx'
 
+
 class graphicspath(graphics.graphicspath):
     packageName = 'graphicx'
diff --git a/problemtools/ProblemPlasTeX/import.py b/problemtools/ProblemPlasTeX/import.py
index 5005936a..e2d4e14f 100644
--- a/problemtools/ProblemPlasTeX/import.py
+++ b/problemtools/ProblemPlasTeX/import.py
@@ -6,6 +6,7 @@
 log = getLogger()
 status = getLogger('status')
 
+
 # (Partial) implementation of import.sty because plasTeX does not ship
 # with an implementation.  Only implement \import command which is the
 # only one we'll use.
diff --git a/problemtools/ProblemPlasTeX/listingsutf8.py b/problemtools/ProblemPlasTeX/listingsutf8.py
index 2a2d1f56..d022ef03 100644
--- a/problemtools/ProblemPlasTeX/listingsutf8.py
+++ b/problemtools/ProblemPlasTeX/listingsutf8.py
@@ -4,23 +4,21 @@
 import os
 import io
 
-import ProblemsetMacros
-
 log = getLogger()
 
 # Implementation of (parts) of listingsutf8 package since PlasTeX does
 # not have one
 
+
 class lstinputlisting(Command):
     args = '* [ options:dict ] file:str'
 
-    def read_file(self, filename):
-        data = io.open(filename, 'r', encoding='utf-8').read()
-        data = ProblemsetMacros.plastex_escape(data)
-        return data
+    def read_file(self, filename) -> str:
+        return io.open(filename, 'r', encoding='utf-8').read()
 
-    def invoke(self, tex):
-        res = Command.invoke(self, tex)
+    def invoke(self, tex) -> None:
+        super().invoke(tex)
+        assert self.ownerDocument is not None  # Keep mypy happy
         basetex = self.ownerDocument.userdata['base_tex_instance']
         f = self.attributes['file']
         # Maybe more paths to look in?
diff --git a/problemtools/ProblemPlasTeX/ulem.py b/problemtools/ProblemPlasTeX/ulem.py
index c2d9f79f..f891f86b 100644
--- a/problemtools/ProblemPlasTeX/ulem.py
+++ b/problemtools/ProblemPlasTeX/ulem.py
@@ -1,11 +1,29 @@
 from plasTeX.Base.LaTeX.FontSelection import TextCommand
 
-class uline(TextCommand): pass
-class uuline(TextCommand): pass
-class uwave(TextCommand): pass
-class sout(TextCommand): pass
-class xout(TextCommand): pass
-class dashuline(TextCommand): pass
-class dotuline(TextCommand): pass
 
+class uline(TextCommand):
+    pass
 
+
+class uuline(TextCommand):
+    pass
+
+
+class uwave(TextCommand):
+    pass
+
+
+class sout(TextCommand):
+    pass
+
+
+class xout(TextCommand):
+    pass
+
+
+class dashuline(TextCommand):
+    pass
+
+
+class dotuline(TextCommand):
+    pass
diff --git a/problemtools/__init__.py b/problemtools/__init__.py
index 8dee4bf8..e69de29b 100644
--- a/problemtools/__init__.py
+++ b/problemtools/__init__.py
@@ -1 +0,0 @@
-from ._version import __version__
diff --git a/problemtools/_version.py b/problemtools/_version.py
deleted file mode 100644
index 98cf094a..00000000
--- a/problemtools/_version.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# Auto-generated from git changelog, do not edit!
-__version__ = '1.20231016'
diff --git a/problemtools/config.py b/problemtools/config.py
index bcc74896..8cd470c9 100644
--- a/problemtools/config.py
+++ b/problemtools/config.py
@@ -24,12 +24,11 @@ def load_config(configuration_file):
             try:
                 with open(path, 'r') as config:
                     new_config = yaml.safe_load(config.read())
-            except (yaml.parser.ParserError, yaml.parser.ScannerError) as err:
+            except (yaml.parser.ParserError, yaml.scanner.ScannerError) as err:
                 raise ConfigError('Config file %s: failed to parse: %s' % (path, err))
         if res is None:
             if new_config is None:
-                raise ConfigError('Base configuration file %s not found in %s'
-                                  % (configuration_file, path))
+                raise ConfigError('Base configuration file %s not found in %s' % (configuration_file, path))
             res = new_config
         elif new_config is not None:
             __update_dict(res, new_config)
@@ -43,11 +42,11 @@ def __config_file_paths():
     priority (i.e., any config in the last path should take precedence
     over the others).
     """
-    return [os.path.join(os.path.dirname(__file__), 'config'),
-            os.path.join('/etc', 'kattis', 'problemtools'),
-            os.path.join(os.environ.get('XDG_CONFIG_HOME',
-                                        os.path.join(os.path.expanduser('~'), '.config')),
-                         'problemtools')]
+    return [
+        os.path.join(os.path.dirname(__file__), 'config'),
+        os.path.join('/etc', 'kattis', 'problemtools'),
+        os.path.join(os.environ.get('XDG_CONFIG_HOME', os.path.join(os.path.expanduser('~'), '.config')), 'problemtools'),
+    ]
 
 
 def __update_dict(orig, update):
@@ -58,10 +57,8 @@ def __update_dict(orig, update):
 
     For all other entries (k, v), orig[k] is set to v.
     """
-    for (key, value) in update.items():
-        if (key in orig and
-            isinstance(value, collections.abc.Mapping) and
-            isinstance(orig[key], collections.abc.Mapping)):
+    for key, value in update.items():
+        if key in orig and isinstance(value, collections.abc.Mapping) and isinstance(orig[key], collections.abc.Mapping):
             __update_dict(orig[key], value)
         else:
             orig[key] = value
diff --git a/problemtools/config/languages.yaml b/problemtools/config/languages.yaml
index caa6dbbc..de1a6411 100644
--- a/problemtools/config/languages.yaml
+++ b/problemtools/config/languages.yaml
@@ -98,14 +98,14 @@ c:
     name: 'C'
     priority: 950
     files: '*.c'
-    compile: '/usr/bin/gcc -g -O2 -std=gnu99 -static -o {binary} {files} -lm'
+    compile: '/usr/bin/gcc -g -O2 -std=gnu17 -static -o {binary} {files} -lm'
     run: '{binary}'
 
 cpp:
     name: 'C++'
     priority: 1000
     files: '*.cc *.C *.cpp *.cxx *.c++'
-    compile: '/usr/bin/g++ -g -O2 -std=gnu++17 -static -o {binary} {files}'
+    compile: '/usr/bin/g++ -g -O2 -std=gnu++23 -static -o {binary} {files} -lrt -Wl,--whole-archive  -lpthread -Wl,--no-whole-archive'
     run: '{binary}'
 
 csharp:
@@ -147,7 +147,7 @@ java:
     name: 'Java'
     priority: 800
     files: '*.java'
-    compile: '/usr/bin/javac -encoding UTF-8 -sourcepath {path} -d {path} {files}'
+    compile: '/usr/bin/javac -source 21 -encoding UTF-8 -sourcepath {path} -d {path} {files}'
     run: '/usr/bin/java -Dfile.encoding=UTF-8 -XX:+UseSerialGC -Xss64m -Xms{memlim}m -Xmx{memlim}m -cp {path} {mainclass}'
 
 javascript:
@@ -161,7 +161,7 @@ kotlin:
     name: 'Kotlin'
     priority: 250
     files: '*.kt'
-    compile: '/usr/bin/kotlinc -d {path}/ -- {files}'
+    compile: '/usr/bin/kotlinc -language-version 1.3 -d {path}/ -- {files}'
     run: '/usr/bin/kotlin -Dfile.encoding=UTF-8 -J-XX:+UseSerialGC -J-Xss64m -J-Xms{memlim}m -J-Xmx{memlim}m -cp {path}/ {Mainclass}Kt'
 
 lisp:
@@ -209,27 +209,27 @@ prolog:
 
 # Python2 with shebang comes before default python3.
 python2_with_shebang:
-    name: 'Python 2'
+    name: 'Python 2 (w/PyPy)'
     priority: 860
     files: '*.py *.py2'
     shebang: '^#!.*python2\b'
-    compile: '/usr/bin/python2 -m py_compile {files}'
-    run: '/usr/bin/python2 "{mainfile}"'
+    compile: '/usr/bin/pypy -m py_compile {files}'
+    run: '/usr/bin/pypy "{mainfile}"'
 
 python3:
-    name: 'Python 3'
+    name: 'Python 3 (w/PyPy3)'
     priority: 850
     files: '*.py *.py3'
-    compile: '/usr/bin/python3 -m py_compile {files}'
-    run: '/usr/bin/python3 "{mainfile}"'
+    compile: '/usr/bin/pypy3 -m py_compile {files}'
+    run: '/usr/bin/pypy3 "{mainfile}"'
 
 # Python2 without shebang comes after python3.
 python2:
-    name: 'Python 2'
+    name: 'Python 2 (w/PyPy)'
     priority: 840
     files: '*.py2'
-    compile: '/usr/bin/python2 -m py_compile {files}'
-    run: '/usr/bin/python2 "{mainfile}"'
+    compile: '/usr/bin/pypy -m py_compile {files}'
+    run: '/usr/bin/pypy "{mainfile}"'
 
 ruby:
     name: 'Ruby'
@@ -245,8 +245,8 @@ rust:
     name: 'Rust'
     priority: 575
     files: '*.rs'
-    compile: '/usr/bin/rustc -o{binary} -O --crate-type bin --edition=2018 {files}'
-    run: '{binary}'
+    compile: '/usr/bin/rustc -C opt-level=3 -C target-cpu=native --crate-type bin --edition 2021 {mainfile} -o {mainfile}.out'
+    run: '{mainfile}.out'
 
 scala:
     name: 'Scala'
diff --git a/problemtools/config/problem.yaml b/problemtools/config/problem.yaml
index 79c9a1d5..c1d7c238 100644
--- a/problemtools/config/problem.yaml
+++ b/problemtools/config/problem.yaml
@@ -1,31 +1,9 @@
-type: pass-fail
-author: ''
-source: ''
-source_url: ''
-license: unknown
-rights_owner: ''
-
-validation: default
-validator_flags: ''
-
 limits:
-   time_multiplier: 5
-   time_safety_margin: 2
    memory: 1024
    output: 8
    code: 128
    compilation_time: 60
+   compilation_memory: 1024
    validation_time: 60
    validation_memory: 1024
    validation_output: 8
-
-keywords: ''
-
-grading:
-   objective: max
-   show_test_data_groups: False
-
-languages: all
-
-# These are in the spec but currently unsupported
-libraries: ''
diff --git a/problemtools/formatversion.py b/problemtools/formatversion.py
new file mode 100644
index 00000000..757746e2
--- /dev/null
+++ b/problemtools/formatversion.py
@@ -0,0 +1,47 @@
+import yaml
+from enum import StrEnum
+from pathlib import Path
+
+
+class FormatVersion(StrEnum):
+    LEGACY = 'legacy'
+    V_2023_07 = '2023-07-draft'  # When 2023-07 is finalized, replace this and update _missing_
+
+    @property
+    def statement_directory(self) -> str:
+        match self:
+            case FormatVersion.LEGACY:
+                return 'problem_statement'
+            case FormatVersion.V_2023_07:
+                return 'statement'
+
+    @property
+    def statement_extensions(self) -> list[str]:
+        match self:
+            case FormatVersion.LEGACY:
+                return ['tex']
+            case FormatVersion.V_2023_07:
+                return ['md', 'tex']
+
+    @property
+    def output_validator_directory(self) -> str:
+        match self:
+            case FormatVersion.LEGACY:
+                return 'output_validators'
+            case FormatVersion.V_2023_07:
+                return 'output_validator'
+
+    # Support 2023-07 and 2023-07-draft strings.
+    # This method should be replaced with an alias once we require python 3.13
+    @classmethod
+    def _missing_(cls, value):
+        if value == '2023-07':
+            return cls.V_2023_07
+        return None
+
+
+def get_format_version(problem_root: Path) -> FormatVersion:
+    """Loads the version from the problem in problem_root"""
+    with open(problem_root / 'problem.yaml') as f:
+        config: dict = yaml.safe_load(f) or {}
+    return FormatVersion(config.get('problem_format_version', FormatVersion.LEGACY))
diff --git a/problemtools/generatedata.py b/problemtools/generatedata.py
deleted file mode 100644
index 3f8abaa5..00000000
--- a/problemtools/generatedata.py
+++ /dev/null
@@ -1,275 +0,0 @@
-#! /usr/bin/env python3
-# -*- coding: utf-8 -*-
-import sys
-import os
-import glob
-import tempfile
-import shutil
-import yaml
-from argparse import ArgumentParser
-from multiprocessing import Pool, cpu_count
-
-from .verifyproblem import Generators, ProblemAspect, Problem, is_RTE, argparser_basic_arguments, initialize_logging
-
-ALL_EXTENSIONS = ['in', 'ans'] + Generators._VISUALIZER_EXTENSIONS
-
-def argparser():
-    parser = ArgumentParser(description='Generate test data for a problem package in the Kattis problem format.')
-    parser.add_argument('-g', '--generate',
-                        action='store_true',
-                        help='generate test data')
-    parser.add_argument('-c', '--clean',
-                        action='store_true',
-                        help='clean up generated files')
-    parser.add_argument('-C', '--clean_all',
-                        action='store_true',
-                        help='clean up generated and unrecognized files')
-    parser.add_argument('-n', '--dry_run',
-                        action='store_true',
-                        help='don\'t actually do anything')
-    parser.add_argument('-j', '--parallelism',
-                        type=int,
-                        default=None,
-                        help='level of parallelism')
-    argparser_basic_arguments(parser)
-    parser.add_argument('problemdir', nargs='+')
-    return parser
-
-
-def clean(prob, args):
-    ProblemAspect.errors = 0
-    ProblemAspect.warnings = 0
-    base_path = os.path.join(prob.probdir, 'data')
-
-    testcases = {
-        case['path']: case
-        for case in prob.generators._testcases
-    }
-
-    def walk(name, path):
-        case_count = 0
-        cases = set()
-        empty = True
-        for fname in sorted(os.listdir(path)):
-            curpath = os.path.join(path, fname)
-            nice_path = os.path.relpath(curpath, base_path)
-            if '.' in fname:
-                fname, ext = fname.split('.', 1)
-            else:
-                ext = ''
-            curname = '%s/%s' % (name, fname)
-
-            if os.path.isdir(curpath):
-                next_empty, next_cases = walk(curname, curpath)
-                case_count += next_cases
-                if next_empty:
-                    if not args.dry_run:
-                        os.rmdir(curpath)
-                else:
-                    empty = False
-            else:
-                remove = args.clean_all
-                is_case = False
-                if (fname, ext) == ('testdata', 'yaml'):
-                    if curname + '.yaml' in prob.generators._testdata_yaml:
-                        remove = True
-                elif curname in testcases:
-                    is_case = True
-                    case = testcases[curname]
-                    if ext == 'in':
-                        remove = not case['manual']
-                    elif ext == 'ans':
-                        remove = case['solution'] is not None
-                    elif ext in Generators._VISUALIZER_EXTENSIONS:
-                        remove = case['visualizer'] is not None
-
-                if remove:
-                    prob.generators.msg('Removing %s' % nice_path)
-                    if not args.dry_run:
-                        os.unlink(curpath)
-                    if is_case and curname not in cases:
-                        cases.add(curname)
-                        case_count += 1
-                else:
-                    empty = False
-
-        return empty, case_count
-
-    cases_cleaned = 0
-    for directory in prob.generators._data_directories:
-        path = os.path.join(base_path, directory)
-        if os.path.isdir(path):
-            cases_cleaned += walk('data/%s' % directory, path)[1]
-    return cases_cleaned, ProblemAspect.errors, ProblemAspect.warnings
-
-
-class GenerateState:
-    prob = None
-    args = None
-
-def generate_case(case_idx):
-    ProblemAspect.errors = 0
-    ProblemAspect.warnings = 0
-    prob = GenerateState.prob
-    args = GenerateState.args
-    case = prob.generators._testcases[case_idx]
-
-    steps = [
-        ('input', True, None, '.in'),
-        ('solution', False, '.in', '.ans'),
-        ('visualizer', False, '.in', None),
-    ]
-
-    try:
-        tmp_dir = tempfile.mkdtemp(prefix='gencase', dir=prob.tmpdir)
-        staging_dir = os.path.join(tmp_dir, 'staging')
-        os.mkdir(staging_dir)
-        out_dir = os.path.join(*([prob.probdir] + case['path'].split('/')[:-1]))
-        name = case['path'].split('/')[-1]
-        ok = args.dry_run or os.path.isdir(out_dir)
-        for (gen_type, mandatory, in_ext, out_ext) in steps:
-            if not ok:
-                break
-            prog = case.get(gen_type)
-            if prog is None:
-                ok = not mandatory
-                continue
-            prog, pargs = prog
-            prog = prob.generators._generators.get(prog)
-            if prog is None:
-                ok = not mandatory
-                continue
-
-            if gen_type == 'input':
-                prob.generators.msg('Generating %s' % case['path'].replace('data/', '', 1))
-
-            if isinstance(prog, str):
-                assert gen_type == 'input'
-                assert prog.endswith('.in')
-                for ext in ALL_EXTENSIONS:
-                    path = prog[:-2] + ext
-                    if os.path.isfile(path):
-                        shutil.copyfile(path, os.path.join(staging_dir, '%s.%s' % (name, ext)))
-            else:
-                errfile = os.path.join(tmp_dir, 'error')
-                params = {'args': pargs, 'errfile': errfile}
-                if in_ext is not None:
-                    params['infile'] = os.path.join(staging_dir, name + in_ext)
-                if out_ext is not None:
-                    outfile = os.path.join(tmp_dir, 'output')
-                    params['outfile'] = outfile
-
-                oldwd = os.getcwd()
-                os.chdir(staging_dir)
-                status, _ = prog.run(**params)
-                os.chdir(oldwd)
-                if is_RTE(status):
-                    ok = not mandatory
-                    stderr = None
-                    if os.path.isfile(errfile):
-                        with open(errfile, 'r') as f:
-                            stderr = f.read()
-                    prob.generators.error('Generator of type %s crashed with status %s' % (gen_type, status), stderr)
-                    continue
-
-                if out_ext is not None:
-                    dest = os.path.join(staging_dir, name + out_ext)
-                    if not os.path.isfile(dest):
-                        shutil.copyfile(outfile, dest)
-        if ok:
-            for fname in os.listdir(staging_dir):
-                if '.' not in fname:
-                    continue
-                curname, ext = fname.split('.', 1)
-                if curname == name and ext in ALL_EXTENSIONS:
-                    fpath = os.path.join(staging_dir, fname)
-                    if os.path.isfile(fpath) and not args.dry_run:
-                        shutil.copyfile(fpath, os.path.join(out_dir, fname))
-        return ok, ProblemAspect.errors, ProblemAspect.warnings
-    finally:
-        shutil.rmtree(tmp_dir)
-
-
-def generate(prob, args):
-
-    # Create directory structure
-    created = set()
-    for case in prob.generators._testcases:
-        path = os.path.join(*([prob.probdir] + case['path'].split('/')[:-1]))
-        if path not in created:
-            created.add(path)
-            if not os.path.isdir(path) and not args.dry_run:
-                try:
-                    os.makedirs(path)
-                except Exception as e:
-                    prob.generators.error('Could not create path %s' % path, e)
-
-    # Populate testdata.yaml files
-    for path, content in prob.generators._testdata_yaml.items():
-        prob.generators.msg('Generating %s' % path.replace('data/', '', 1))
-        path = os.path.join(*([prob.probdir] + path.split('/')))
-        if not args.dry_run:
-            try:
-                with open(path, 'w') as f:
-                    yaml.dump(content, f)
-            except Exception as e:
-                prob.generators.error('Could not write %s' % path, e)
-
-    # Generate test cases in parallel
-    GenerateState.prob = prob
-    GenerateState.args = args
-    pool = Pool(args.parallelism)
-    res = pool.map_async(generate_case, range(len(prob.generators._testcases)))
-    while not res.ready():
-        # Use async polling for better KeyboardInterrupt handling
-        res.wait(1)
-    res = res.get()
-    return [ sum( r[tp] for r in res ) for tp in range(3) ]
-
-
-def main():
-    args = argparser().parse_args()
-    args.parts = ['generators']
-    if args.clean_all:
-        args.clean = True
-    if not args.clean:
-        args.generate = True
-    args.compile_generators = args.generate
-    if args.parallelism is None:
-        args.parallelism = cpu_count()
-    initialize_logging(args)
-
-    total_errors = 0
-    for problemdir in args.problemdir:
-        print('Loading problem %s' % os.path.basename(os.path.realpath(problemdir)))
-        with Problem(problemdir) as prob:
-            prob.check(args)
-            errors = ProblemAspect.errors
-            warnings = ProblemAspect.warnings
-
-            if prob.shortname is None:
-                # Skip invalid problem
-                continue
-
-            def p(x):
-                return '' if x == 1 else 's'
-
-            status = ''
-            if args.clean:
-                cnt, clean_errors, clean_warnings = clean(prob, args)
-                status += '%d case%s cleaned, ' % (cnt, p(cnt))
-                errors += clean_errors
-                warnings += clean_warnings
-            if args.generate:
-                cnt, gen_errors, gen_warnings = generate(prob, args)
-                status += '%d case%s generated, ' % (cnt, p(cnt))
-                errors += gen_errors
-                warnings += gen_warnings
-
-            print("%s processed: %s%d error%s, %d warning%s" % (prob.shortname, status, errors, p(errors), warnings, p(warnings)))
-            total_errors += errors
-
-    sys.exit(1 if total_errors > 0 else 0)
-
-if __name__ == '__main__':
-    main()
diff --git a/problemtools/languages.py b/problemtools/languages.py
index 8c0c72c2..1bcb61f6 100644
--- a/problemtools/languages.py
+++ b/problemtools/languages.py
@@ -2,17 +2,18 @@
 This module contains functionality for reading and using configuration
 of programming languages.
 """
+
 import fnmatch
 import re
 import string
 
 from . import config
 
+
 class LanguageConfigError(Exception):
     """Exception class for errors in language configuration."""
-    pass
-
 
+    pass
 
 
 class Language(object):
@@ -42,7 +43,6 @@ def __init__(self, lang_id, lang_spec):
         self.run = None
         self.update(lang_spec)
 
-
     def get_source_files(self, file_list):
         """Given a list of files, determine which ones would be considered
         source files for the language.
@@ -50,12 +50,14 @@ def get_source_files(self, file_list):
         Args:
             file_list (list of str): list of file names
         """
-        return [file_name for file_name in file_list
-                if (any(fnmatch.fnmatch(file_name, glob)
-                        for glob in self.files)
-                    and
-                    self.__matches_shebang(file_name))]
-
+        return [
+            file_name
+            for file_name in file_list
+            if (
+                any(fnmatch.fnmatch(file_name, glob) for glob in self.files)  # type: ignore[union-attr]
+                and self.__matches_shebang(file_name)
+            )
+        ]
 
     def update(self, values):
         """Update a language specification with new values.
@@ -66,23 +68,17 @@ def update(self, values):
         """
 
         # Check that all provided values are known keys
-        for unknown in set(values)-set(Language.__KEYS):
-            raise LanguageConfigError(
-                'Unknown key "%s" specified for language %s'
-                % (unknown, self.lang_id))
+        for unknown in set(values) - set(Language.__KEYS):
+            raise LanguageConfigError('Unknown key "%s" specified for language %s' % (unknown, self.lang_id))
 
-        for (key, value) in values.items():
+        for key, value in values.items():
             # Check type
             if key == 'priority':
                 if not isinstance(value, int):
-                    raise LanguageConfigError(
-                        'Language %s: priority must be integer but is %s.'
-                        % (self.lang_id, type(value)))
+                    raise LanguageConfigError('Language %s: priority must be integer but is %s.' % (self.lang_id, type(value)))
             else:
                 if not isinstance(value, str):
-                    raise LanguageConfigError(
-                        'Language %s: %s must be string but is %s.'
-                        % (self.lang_id, key, type(value)))
+                    raise LanguageConfigError('Language %s: %s must be string but is %s.' % (self.lang_id, key, type(value)))
 
             # Save the value
             if key == 'shebang':
@@ -97,7 +93,6 @@ def update(self, values):
 
         self.__check()
 
-
     def __check(self):
         """Check that the language specification is valid (all mandatory
         fields provided, all metavariables used in compile/run
@@ -105,46 +100,33 @@ def __check(self):
         """
         # Check that all mandatory fields are provided
         if self.name is None:
-            raise LanguageConfigError(
-                'Language %s has no name' % self.lang_id)
+            raise LanguageConfigError(f'Language {self.lang_id} has no name')
         if self.priority is None:
-            raise LanguageConfigError(
-                'Language %s has no priority' % self.lang_id)
+            raise LanguageConfigError(f'Language {self.lang_id} has no priority')
         if self.files is None:
-            raise LanguageConfigError(
-
-        'Language %s has no files glob' % self.lang_id)
+            raise LanguageConfigError(f'Language {self.lang_id} has no files glob')
         if self.run is None:
-            raise LanguageConfigError(
-                'Language %s has no run command' % self.lang_id)
+            raise LanguageConfigError(f'Language {self.lang_id} has no run command')
 
         # Check that all variables appearing are valid
         variables = Language.__variables_in_command(self.run)
         if self.compile is not None:
             variables = variables | Language.__variables_in_command(self.compile)
         for unknown in variables - set(Language.__VARIABLES):
-            raise LanguageConfigError(
-                'Unknown variable "{%s}" used for language %s'
-                % (unknown, self.lang_id))
+            raise LanguageConfigError('Unknown variable "{%s}" used for language %s' % (unknown, self.lang_id))
 
         # Check for uniquely defined entry point
         entry = variables & set(['binary', 'mainfile', 'mainclass', 'Mainclass'])
         if len(entry) == 0:
-            raise LanguageConfigError(
-                'No entry point variable used for language %s' % self.lang_id)
+            raise LanguageConfigError('No entry point variable used for language %s' % self.lang_id)
         if len(entry) > 1:
-            raise LanguageConfigError(
-                'More than one entry point type variable used for language %s'
-                % self.lang_id)
-
+            raise LanguageConfigError('More than one entry point type variable used for language %s' % self.lang_id)
 
     @staticmethod
     def __variables_in_command(cmd):
         """List all meta-variables appearing in a string."""
         formatter = string.Formatter()
-        return set(field for _, field, _, _ in formatter.parse(cmd)
-                   if field is not None)
-
+        return set(field for _, field, _, _ in formatter.parse(cmd) if field is not None)
 
     def __matches_shebang(self, filename):
         """Check if a file matches the shebang rule for the language."""
@@ -155,10 +137,6 @@ def __matches_shebang(self, filename):
         return self.shebang.search(shebang_line) is not None
 
 
-
-
-
-
 class Languages(object):
     """A set of languages."""
 
@@ -174,7 +152,6 @@ def __init__(self, data=None):
         if data is not None:
             self.update(data)
 
-
     def detect_language(self, file_list):
         """Auto-detect language for a set of files.
 
@@ -186,7 +163,7 @@ def detect_language(self, file_list):
             list of files did not match any language in the set.
         """
         result = None
-        src = []
+        src: list[str] = []
         prio = 1e99
         for lang in self.languages.values():
             lang_src = lang.get_source_files(file_list)
@@ -198,9 +175,7 @@ def detect_language(self, file_list):
 
     def get(self, lang_id):
         if not isinstance(lang_id, str):
-            raise LanguageConfigError(
-                'Config file error: language IDs must be strings, but %s is %s.'
-                % (lang_id, type(lang_id)))
+            raise LanguageConfigError('Config file error: language IDs must be strings, but %s is %s.' % (lang_id, type(lang_id)))
         return self.languages.get(lang_id, None)
 
     def update(self, data):
@@ -213,21 +188,19 @@ def update(self, data):
                 for that language will be overridden and updated.
         """
         if not isinstance(data, dict):
-            raise LanguageConfigError(
-                'Config file error: content must be a dictionary, but is %s.'
-                % (type(data)))
+            raise LanguageConfigError('Config file error: content must be a dictionary, but is %s.' % (type(data)))
 
-        for (lang_id, lang_spec) in data.items():
+        for lang_id, lang_spec in data.items():
             if not isinstance(lang_id, str):
                 raise LanguageConfigError(
-                    'Config file error: language IDs must be strings, but %s is %s.'
-                    % (lang_id, type(lang_id)))
+                    'Config file error: language IDs must be strings, but %s is %s.' % (lang_id, type(lang_id))
+                )
 
             if not isinstance(lang_spec, (dict, Language)):
                 raise LanguageConfigError(
                     'Config file error: language spec must be a dictionary, but spec of language %s is %s.'
-                    % (lang_id, type(lang_spec)))
-
+                    % (lang_id, type(lang_spec))
+                )
 
             if isinstance(lang_spec, Language):
                 self.languages[lang_id] = lang_spec
@@ -236,12 +209,12 @@ def update(self, data):
             else:
                 self.languages[lang_id].update(lang_spec)
 
-        priorities = {}
-        for (lang_id, lang) in self.languages.items():
+        priorities: dict[int, Language] = {}
+        for lang_id, lang in self.languages.items():
             if lang.priority in priorities:
                 raise LanguageConfigError(
-                    'Languages %s and %s both have priority %d.'
-                    % (lang_id, priorities[lang.priority], lang.priority))
+                    'Languages %s and %s both have priority %d.' % (lang_id, priorities[lang.priority], lang.priority)
+                )
             priorities[lang.priority] = lang_id
 
 
diff --git a/problemtools/md2html.py b/problemtools/md2html.py
new file mode 100644
index 00000000..445eca0f
--- /dev/null
+++ b/problemtools/md2html.py
@@ -0,0 +1,151 @@
+#! /usr/bin/env python3
+# -*- coding: utf-8 -*-
+import argparse
+import hashlib
+import html
+import os
+from pathlib import Path
+import re
+import shutil
+import string
+import subprocess
+
+import nh3
+
+from . import statement_util
+
+
+def convert(problem_root: Path, options: argparse.Namespace, statement_file: Path) -> bool:
+    """Convert a Markdown statement to HTML. Writes output to current working directory.
+
+    Args:
+        problem: path to problem directory
+        options: command-line arguments. See problem2html.py
+    """
+    destfile = string.Template(options.destfile).safe_substitute(problem=problem_root.name)
+    imgbasedir = string.Template(options.imgbasedir).safe_substitute(problem=problem_root.name)
+
+    command = ['pandoc', str(statement_file), '-t', 'html', '--mathjax']
+    statement_html = subprocess.run(command, capture_output=True, text=True, shell=False, check=True).stdout
+
+    statement_html = sanitize_html(statement_file.parent, statement_html, imgbasedir)
+
+    templatepaths = [
+        os.path.join(os.path.dirname(__file__), 'templates/markdown_html'),
+        '/usr/lib/problemtools/templates/markdown_html',
+    ]
+    templatepath = next(
+        (p for p in templatepaths if os.path.isdir(p) and os.path.isfile(os.path.join(p, 'default-layout.html'))), None
+    )
+
+    if templatepath is None:
+        raise FileNotFoundError('Could not find directory with markdown templates')
+
+    with open(Path(templatepath) / 'default-layout.html', 'r', encoding='utf-8') as template_file:
+        template = template_file.read()
+
+    problem_name = statement_util.get_yaml_problem_name(problem_root, options.language)
+    substitution_params = {
+        'statement_html': statement_html,
+        'language': options.language,
+        'title': html.escape(problem_name) if problem_name else 'Missing problem name',
+        'problemid': html.escape(problem_root.name),
+    }
+
+    statement_html = template % substitution_params
+
+    samples = statement_util.format_samples(problem_root)
+    # Insert samples at {{nextsample}} and {{remainingsamples}}
+    statement_html, remaining_samples = statement_util.inject_samples(statement_html, samples)
+
+    # Insert the remaining samples at the bottom
+    # However, footnotes should be below samples
+    sample_insertion_position = statement_util.find_footnotes(statement_html)
+    if sample_insertion_position is None:
+        # No footnotes, so insert at the end
+        sample_insertion_position = statement_html.rfind('</body>')
+    statement_html = (
+        statement_html[:sample_insertion_position] + ''.join(remaining_samples) + statement_html[sample_insertion_position:]
+    )
+
+    with open(destfile, 'w', encoding='utf-8', errors='xmlcharrefreplace') as output_file:
+        output_file.write(statement_html)
+
+    if options.css:
+        shutil.copyfile(os.path.join(templatepath, 'problem.css'), 'problem.css')
+
+    return True
+
+
+def sanitize_html(statement_dir: Path, statement_html: str, imgbasedir: str) -> str:
+    # Allow footnote ids (the anchor points you jump to)
+    def is_fn_id(s):
+        pattern_id_top = r'^fn\d+$'
+        pattern_id_bottom = r'^fnref\d+$'
+        return bool(re.fullmatch(pattern_id_top, s)) or bool(re.fullmatch(pattern_id_bottom, s))
+
+    allowed_classes = ('sample', 'problemheader', 'problembody', 'sampleinteractionwrite', 'sampleinteractionread')
+
+    # Annoying: nh3 will ignore exceptions in attribute_filter
+    image_fail_reason: list[Exception] = []
+
+    def attribute_filter(tag, attribute, value):
+        if attribute == 'class' and value in allowed_classes:
+            return value
+        # Never versions of Pandoc will give class="footnotes footnotes-end-of-document"
+        # We don't want to blindly allow any class with footnotes in it, so only allow footnotes
+        if attribute == 'class' and 'footnotes' in value:
+            return 'footnotes'
+        if tag == 'a' and attribute == 'href':
+            return value
+        if tag in ('li', 'a') and attribute == 'id' and is_fn_id(value):
+            return value
+        if tag == 'img' and attribute == 'src':
+            try:
+                statement_util.assert_image_is_valid(statement_dir, value)
+            except Exception as e:
+                nonlocal image_fail_reason
+                image_fail_reason.append(e)
+                return None
+            return copy_image(statement_dir, value, imgbasedir)
+        return None
+
+    statement_html = nh3.clean(
+        statement_html,
+        link_rel='noopener nofollow noreferrer',
+        attribute_filter=attribute_filter,
+        tags=nh3.ALLOWED_TAGS | {'img', 'a', 'section'},
+        attributes={
+            'table': {'class'},
+            'aside': {'class'},
+            'div': {'class'},
+            'section': {'class'},
+            'img': {'src'},
+            'a': {'href', 'id'},
+            'li': {'id'},
+        },
+    )
+
+    if image_fail_reason:
+        # We don't have a great way to emit multiple errors from here, so just re-raise the first error
+        raise image_fail_reason[0]
+
+    return statement_html
+
+
+def copy_image(statement_dir: Path, img_src: str, imgbasedir: str) -> str:
+    """Copy image to working directory (with new filename) and returns the new filename
+
+    Args:
+        statement_dir: the directory with problem statement files
+        img_src: the image source as in the Markdown statement
+    """
+
+    # We rename to sha256 of contents, and preserve the suffix. This flattens
+    # the directory structure to a single folders in a simple way.
+    with open(statement_dir / img_src, 'rb') as f:
+        filename = hashlib.file_digest(f, 'sha256').hexdigest() + Path(img_src).suffix
+
+    if not os.path.isfile(filename):  # check if already copied
+        shutil.copyfile(statement_dir / img_src, filename)
+    return imgbasedir + filename
diff --git a/problemtools/metadata.py b/problemtools/metadata.py
new file mode 100644
index 00000000..25535327
--- /dev/null
+++ b/problemtools/metadata.py
@@ -0,0 +1,361 @@
+import copy
+import datetime
+import re
+from dataclasses import dataclass, field
+from enum import StrEnum
+from pathlib import Path
+from typing import Any, Literal, Self, Type, Union
+from uuid import UUID
+
+from pydantic import BaseModel, ConfigDict, Field
+import yaml
+
+from . import config
+from . import statement_util
+from .formatversion import FormatVersion
+
+
+class ProblemType(StrEnum):
+    PASS_FAIL = 'pass-fail'
+    SCORING = 'scoring'
+    MULTI_PASS = 'multi-pass'
+    INTERACTIVE = 'interactive'
+    SUBMIT_ANSWER = 'submit-answer'
+
+
+class License(StrEnum):
+    UNKNOWN = 'unknown'
+    PUBLIC_DOMAIN = 'public domain'
+    CC0 = 'cc0'
+    CC_BY = 'cc by'
+    CC_BY_SA = 'cc by-sa'
+    EDUCATIONAL = 'educational'
+    PERMISSION = 'permission'
+
+
+@dataclass
+class Person:
+    name: str
+    email: str | None = None
+    orcid: str | None = None
+    kattis: str | None = None
+
+    @classmethod
+    def from_string(cls: Type[Self], s: str) -> Self:
+        match = re.match(r'^(.*?)\s+<(.*)>$', s.strip())
+        if match:
+            return cls(name=match.group(1), email=match.group(2))
+        return cls(name=s)
+
+
+@dataclass
+class Source:
+    name: str
+    url: str | None = None
+
+
+@dataclass
+class TimeMultipliers:
+    ac_to_time_limit: float = 2.0
+    time_limit_to_tle: float = 1.5
+
+
+@dataclass
+class Limits:
+    memory: int
+    output: int
+    code: int
+    compilation_time: int
+    compilation_memory: int
+    validation_time: int
+    validation_memory: int
+    validation_output: int
+    time_multipliers: TimeMultipliers = field(default_factory=TimeMultipliers)
+    time_limit: float | None = None
+    time_resolution: float = 1.0
+    validation_passes: int = 2
+
+
+@dataclass
+class Credits:
+    """
+    Credits format where all persons have been converted to Person objects.
+    For use in our internal representation.
+    """
+
+    authors: list[Person] = field(default_factory=list)
+    contributors: list[Person] = field(default_factory=list)
+    testers: list[Person] = field(default_factory=list)
+    translators: dict[str, list[Person]] = field(default_factory=dict)
+    packagers: list[Person] = field(default_factory=list)
+    acknowledgements: list[Person] = field(default_factory=list)
+
+
+@dataclass
+class InputCredits:
+    """
+    A more permissive dataclass for credits, as the input in 2023-07 looks.
+    For use when validating input.
+    """
+
+    # Type in the input format is messy
+    PersonOrPersons = Union[str | list[Union[Person, str]]]
+
+    authors: PersonOrPersons = field(default_factory=list)
+    contributors: PersonOrPersons = field(default_factory=list)
+    testers: PersonOrPersons = field(default_factory=list)
+    translators: dict[str, PersonOrPersons] = field(default_factory=dict)
+    packagers: PersonOrPersons = field(default_factory=list)
+    acknowledgements: PersonOrPersons = field(default_factory=list)
+
+
+class Metadata2023_07(BaseModel):
+    """
+    The metadata for a problem as input in version 2023-07-draft.
+    """
+
+    problem_format_version: str
+    name: dict[str, str] | str
+    uuid: UUID | None = None  # UUID *is* mandatory, but we deal with that in verifyproblem for better UX
+    type: list[ProblemType] | ProblemType = ProblemType.PASS_FAIL
+    version: str | None = None
+    credits: dict | str | None = None
+    source: list[Union[str, Source]] | Source | str = []
+    license: License = License.UNKNOWN
+    rights_owner: str | None = None
+    embargo_until: datetime.datetime | None = None
+    limits: Limits
+    keywords: list[str] = []
+    languages: list[str] | Literal['all'] = 'all'
+    allow_file_writing: bool = True
+    constants: dict[str, int | float | str] = {}
+
+    model_config = ConfigDict(extra='forbid')
+
+
+@dataclass
+class LegacyGrading:
+    objective: Literal['max', 'min'] = 'max'
+    show_test_data_groups: bool = False
+    # These 3 fields predate the version called "legacy"
+    accept_score: float | None = None
+    reject_score: float | None = None
+    range: str | None = None
+    on_reject: Literal['first_error', 'worst_error', 'grade'] | None = None
+
+
+@dataclass
+class LegacyLimits:
+    memory: int
+    output: int
+    code: int
+    compilation_time: int
+    compilation_memory: int
+    validation_time: int
+    validation_memory: int
+    validation_output: int
+    time_multiplier: float = 5.0
+    time_safety_margin: float = 2.0
+
+
+class MetadataLegacy(BaseModel):
+    """
+    The metadata for a problem as input in version legacy (plus a few fields
+    which pre-date the version called legacy).
+    """
+
+    problem_format_version: FormatVersion = FormatVersion.LEGACY
+    type: Literal['pass-fail'] | Literal['scoring'] = 'pass-fail'
+    name: str | None = None
+    uuid: UUID | None = None
+    author: str | None = None
+    source: str | None = None
+    source_url: str | None = None
+    license: License = License.UNKNOWN
+    rights_owner: str | None = None
+    limits: LegacyLimits
+    validation: str = 'default'
+    validator_flags: str = ''
+    grading: LegacyGrading = LegacyGrading()
+    keywords: str = ''
+
+    model_config = ConfigDict(extra='forbid')
+
+
+class Metadata(BaseModel):
+    """
+    The metadata for a problem, as used internally in problemtools. Closely
+    follows the 2023-07-draft version, but is more fully parsed, and adds
+    a few legacy fields to represent information not in 2023-07.
+
+    Metadata serializes to a valid 2023-07-draft configuration.
+    """
+
+    problem_format_version: FormatVersion
+    type: list[ProblemType]
+    name: dict[str, str]
+    uuid: UUID | None
+    version: str | None
+    credits: Credits
+    source: list[Source]
+    license: License
+    rights_owner: str | None
+    embargo_until: datetime.datetime | None
+    limits: Limits
+    keywords: list[str]
+    languages: list[str] | Literal['all']
+    allow_file_writing: bool
+    constants: dict
+    legacy_grading: LegacyGrading = Field(default_factory=LegacyGrading, exclude=True)
+    legacy_validation: str = Field(default='', exclude=True)
+    legacy_validator_flags: str = Field(default='', exclude=True)
+    legacy_custom_score: bool = Field(default=False, exclude=True)  # True iff legacy_validation is custom and score.
+
+    model_config = ConfigDict(extra='forbid')
+
+    def is_pass_fail(self) -> bool:
+        return not self.is_scoring()
+
+    def is_scoring(self) -> bool:
+        return ProblemType.SCORING in self.type
+
+    def is_interactive(self) -> bool:
+        return ProblemType.INTERACTIVE in self.type
+
+    def is_multi_pass(self) -> bool:
+        return ProblemType.MULTI_PASS in self.type
+
+    def is_submit_answer(self) -> bool:
+        return ProblemType.SUBMIT_ANSWER in self.type
+
+    @classmethod
+    def from_legacy(cls: Type[Self], legacy: MetadataLegacy, names_from_statements: dict[str, str]) -> Self:
+        metadata = legacy.model_dump()
+        metadata['type'] = [metadata['type']]
+        # Support for *ancient* problems where names_from_statements is empty
+        if names_from_statements:
+            metadata['name'] = names_from_statements
+        elif metadata['name']:
+            metadata['name'] = {'en': metadata['name']}
+        else:
+            metadata['name'] = {}
+        metadata['version'] = None
+
+        def parse_author_field(author: str) -> list[Person]:
+            authors = re.split(r',\s*|\s+and\s+|\s+&\s+', author)
+            authors = [x.strip(' \t\r\n') for x in authors]
+            authors = [x for x in authors if len(x) > 0]
+            return [Person.from_string(author) for author in authors]
+
+        metadata['credits'] = {}
+        if metadata['author'] is not None:
+            metadata['credits']['authors'] = parse_author_field(metadata['author'])
+        del metadata['author']
+        metadata['source'] = [] if metadata['source'] is None else [Source(metadata['source'], metadata['source_url'])]
+        del metadata['source_url']
+        metadata['embargo_until'] = None
+        metadata['limits']['time_multipliers'] = {
+            'ac_to_time_limit': metadata['limits']['time_multiplier'],
+            'time_limit_to_tle': metadata['limits']['time_safety_margin'],
+        }
+        del metadata['limits']['time_multiplier']
+        del metadata['limits']['time_safety_margin']
+        metadata['keywords'] = metadata['keywords'].split()
+        metadata['languages'] = 'all'
+        metadata['allow_file_writing'] = True
+        metadata['constants'] = {}
+
+        # The interactive flag from validation now lives in type, copy it over.
+        validation = metadata['validation'].split()
+        if validation[0] == 'custom':
+            if 'interactive' in validation[1:]:
+                metadata['type'].append('interactive')
+            if 'score' in validation[1:]:
+                metadata['legacy_custom_score'] = True
+        # Copy over the legacy info that does not fit cleanly
+        for key in 'grading', 'validator_flags', 'validation':
+            metadata[f'legacy_{key}'] = metadata[key]
+            del metadata[key]
+        return cls.model_validate(metadata)
+
+    @classmethod
+    def from_2023_07(cls: Type[Self], md2023_07: Metadata2023_07) -> Self:
+        metadata = md2023_07.model_dump()
+        metadata['type'] = [metadata['type']] if isinstance(metadata['type'], str) else metadata['type']
+        metadata['name'] = {'en': metadata['name']} if isinstance(metadata['name'], str) else metadata['name']
+
+        def parse_source(source: str | Source) -> Source:
+            return Source(name=source, url=None) if isinstance(source, str) else source
+
+        # Convenience function to deal with the fact that lists of persons/sources are
+        # either a string, or a list of strings or dicts (if dicts, pydantic
+        # already parsed those for us).
+        def parse_list(callback, lst: str | list) -> list:
+            if isinstance(lst, str):
+                return [callback(lst)]
+            return list(map(callback, lst))
+
+        metadata['source'] = parse_list(parse_source, metadata['source'])
+
+        def parse_person(person: str | Person) -> Person:
+            return Person.from_string(person) if isinstance(person, str) else person
+
+        if metadata['credits'] is None:
+            metadata['credits'] = {}
+        elif isinstance(metadata['credits'], str):
+            metadata['credits'] = {'authors': [parse_person(metadata['credits'])]}
+        else:
+            for key in metadata['credits']:
+                if key == 'translators':  # special case, we nest deeper here
+                    for lang in metadata['credits'][key]:
+                        metadata['credits'][key][lang] = parse_list(parse_person, metadata['credits'][key][lang])
+                else:
+                    metadata['credits'][key] = parse_list(parse_person, metadata['credits'][key])
+        return cls.model_validate(metadata)
+
+
+def parse_metadata(
+    version: FormatVersion,
+    problem_yaml_data: dict[str, Any],
+    names_from_statements: dict[str, str] | None = None,
+) -> Metadata:
+    """
+    Parses a data structure from problem.yaml into a Metadata model
+    :raises pydantic.ValidationError: We intentionally leak pydantic's exception on errors, as it's well designed
+    """
+
+    # We need to mix in the system default config values before doing model validation
+    data = copy.deepcopy(problem_yaml_data)
+    # Check if the user has done something silly like making limits a string. If so, we
+    # don't merge in anything, and let pydantic complain later.
+    if isinstance(data.get('limits', {}), dict):
+        system_defaults = config.load_config('problem.yaml')
+        data['limits'] = system_defaults['limits'] | data.get('limits', {})
+
+    if version is FormatVersion.LEGACY:
+        legacy_model = MetadataLegacy.model_validate(data)
+        return Metadata.from_legacy(legacy_model, names_from_statements or {})
+    else:
+        assert version is FormatVersion.V_2023_07
+        model_2023_07 = Metadata2023_07.model_validate(data)
+        return Metadata.from_2023_07(model_2023_07)
+
+
+def load_metadata(problem_root: Path) -> tuple[Metadata, dict]:
+    """
+    Loads metadata from a problem directory.
+
+    Returns Metadata as well as the raw parsed yaml. The latter is likely only of use to verifyproblem.
+    Leaks exceptions, which is a bit of a mess. Unclear how to best deal with error handling.
+    """
+    with (problem_root / 'problem.yaml').open() as f:
+        data = yaml.safe_load(f)
+        if data is None:  # Loading empty yaml returns None
+            data = {}
+
+    version = FormatVersion(data.get('problem_format_version', FormatVersion.LEGACY))
+    if version is FormatVersion.LEGACY:
+        names_from_statements = statement_util.load_names_from_statements(problem_root, version)
+    else:
+        names_from_statements = None
+    return parse_metadata(version, data, names_from_statements), data
diff --git a/problemtools/problem2html.py b/problemtools/problem2html.py
index b3f10b1c..671935dd 100644
--- a/problemtools/problem2html.py
+++ b/problemtools/problem2html.py
@@ -1,70 +1,29 @@
 #! /usr/bin/env python3
 # -*- coding: utf-8 -*-
-import re
+import argparse
 import os.path
+import re
 import string
-import argparse
-import logging
 import subprocess
+import sys
+from pathlib import Path
 
-import plasTeX.TeX
-import plasTeX.Logging
-
-from .ProblemPlasTeX import ProblemRenderer
-from .ProblemPlasTeX import ProblemsetMacros
-from . import template
+from . import tex2html
+from . import md2html
+from . import statement_util
 
 
-def convert(problem, options=None):
-    problem = os.path.realpath(problem)
+def convert(options: argparse.Namespace, force_statement_file: Path | None = None) -> None:
+    problem_root = Path(options.problem).resolve(strict=True)
 
-    problembase = os.path.splitext(os.path.basename(problem))[0]
-    destdir = string.Template(options.destdir).safe_substitute(problem=problembase)
-    destfile = string.Template(options.destfile).safe_substitute(problem=problembase)
-    imgbasedir = string.Template(options.imgbasedir).safe_substitute(problem=problembase)
-
-    if options.quiet:
-        plasTeX.Logging.disableLogging()
+    if force_statement_file:  # Used by verifyproblem to test rendering even if there are multiple statements in a language
+        statement_file = force_statement_file
     else:
-        plasTeX.Logging.getLogger().setLevel(getattr(logging, options.loglevel.upper()))
-        plasTeX.Logging.getLogger('status').setLevel(getattr(logging, options.loglevel.upper()))
-
-    texfile = problem
-    # Set up template if necessary
-    with template.Template(problem, language=options.language) as templ:
-        texfile = open(templ.get_file_name(), 'r')
-
-        origcwd = os.getcwd()
-
-        # Setup parser and renderer etc
-
-        # plasTeX version 3 changed the name of this argument (and guarding against this
-        # by checking plasTeX.__version__ fails on plastex v3.0 which failed to update
-        # __version__)
-        try:
-            tex = plasTeX.TeX.TeX(myfile=texfile)
-        except Exception:
-            tex = plasTeX.TeX.TeX(file=texfile)
+        statement_file = statement_util.find_statement(problem_root, options.language)
 
-        ProblemsetMacros.init(tex)
-
-        tex.ownerDocument.config['general']['copy-theme-extras'] = options.css
-        if not options.headers:
-            tex.ownerDocument.userdata['noheaders'] = True
-        tex.ownerDocument.config['files']['filename'] = destfile
-        tex.ownerDocument.config['images']['filenames'] = 'img-$num(4)'
-        tex.ownerDocument.config['images']['enabled'] = False
-        tex.ownerDocument.config['images']['imager'] = 'none'
-        tex.ownerDocument.config['images']['base-url'] = imgbasedir
-        # tell plasTeX where to search for problemtools' built-in packages
-        tex.ownerDocument.config['general']['packages-dirs'] = [os.path.join(os.path.dirname(__file__), 'ProblemPlasTeX')]
-
-        renderer = ProblemRenderer()
-
-        if not options.quiet:
-            print('Parsing TeX source...')
-        doc = tex.parse()
-        texfile.close()
+    destdir = string.Template(options.destdir).safe_substitute(problem=problem_root.name)
+    destfile = string.Template(options.destfile).safe_substitute(problem=problem_root.name)
+    origcwd = os.getcwd()
 
     # Go to destdir
     if destdir:
@@ -75,12 +34,13 @@ def convert(problem, options=None):
     try:
         if not options.quiet:
             print('Rendering!')
-        renderer.render(doc)
-
-        # Annoying: I have not figured out any way of stopping the plasTeX
-        # renderer from generating a .paux file
-        if os.path.isfile('.paux'):
-            os.remove('.paux')
+        match statement_file.suffix:
+            case '.md':
+                md2html.convert(problem_root, options, statement_file)
+            case '.tex':
+                tex2html.convert(problem_root, options, statement_file)
+            case _:
+                raise NotImplementedError('Unsupported file type, expected md or tex: {statement_file.name}')
 
         if options.tidy:
             with open(os.devnull, 'w') as devnull:
@@ -92,13 +52,13 @@ def convert(problem, options=None):
 
         # identify any large generated files (especially images)
         if not options.quiet:
-            for path, dirs, files in os.walk('.'):
+            for path, _dirs, files in os.walk('.'):
                 for f in files:
                     file_size_kib = os.stat(os.path.join(path, f)).st_size // 1024
                     if file_size_kib > 1024:
-                        print(f"WARNING: FILE {f} HAS SIZE {file_size_kib} KiB; CONSIDER REDUCING IT")
+                        print(f'WARNING: FILE {f} HAS SIZE {file_size_kib} KiB; CONSIDER REDUCING IT')
                     elif file_size_kib > 300:
-                        print(f"Warning: file {f} has size {file_size_kib} KiB; consider reducing it")
+                        print(f'Warning: file {f} has size {file_size_kib} KiB; consider reducing it')
 
         if options.bodyonly:
             content = open(destfile).read()
@@ -109,46 +69,48 @@ def convert(problem, options=None):
         # restore cwd
         os.chdir(origcwd)
 
-    return True
-
-
-class ConvertOptions:
-    available = [
-        ['bodyonly', 'store_true', '-b', '--body-only',
-         'only generate HTML body, no HTML headers', False],
-        ['css', 'store_false', '-c', '--no-css',
-         "don't copy CSS file to output directory", True],
-        ['headers', 'store_false', '-H', '--headers',
-         "don't generate problem headers (title, problem id, time limit)", True],
-        ['tidy', 'store_false', '-m', '--messy',
-         "don't run tidy to postprocess the HTML", True],
-        ['destdir', 'store', '-d', '--dest-dir',
-         "output directory", '${problem}_html'],
-        ['destfile', 'store', '-f', '--dest-file',
-         "output file name", 'index.html'],
-        ['language', 'store', '-l', '--language',
-         'choose alternate language (2-letter code)', None],
-        ['loglevel', 'store', '-L', '--log-level',
-         'set log level (debug, info, warning, error, critical)', 'warning'],
-        ['quiet', 'store_true', '-q', '--quiet',
-         "quiet", False],
-        ]
-
-    def __init__(self):
-        for (dest, _, _, _, _, default) in ConvertOptions.available:
-            setattr(self, dest, default)
-        self.imgbasedir = ''
-
-
-def main():
-    options = ConvertOptions()
+
+def get_parser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    for (dest, action, short, _long, _help, default) in ConvertOptions.available:
-        parser.add_argument(short, _long, dest=dest, help=_help, action=action, default=default)
+
+    parser.add_argument(
+        '-b', '--body-only', dest='bodyonly', action='store_true', help='only generate HTML body, no HTML headers', default=False
+    )
+    parser.add_argument(
+        '-c', '--no-css', dest='css', action='store_false', help="don't copy CSS file to output directory", default=True
+    )
+    parser.add_argument(
+        '-H',
+        '--headers',
+        dest='headers',
+        action='store_false',
+        help="don't generate problem headers (title, problem id, time limit)",
+        default=True,
+    )
+    parser.add_argument(
+        '-m', '--messy', dest='tidy', action='store_false', help="don't run tidy to postprocess the HTML", default=True
+    )
+    parser.add_argument('-d', '--dest-dir', dest='destdir', help='output directory', default='${problem}_html')
+    parser.add_argument('-f', '--dest-file', dest='destfile', help='output file name', default='index.html')
+    parser.add_argument('-l', '--language', dest='language', help='choose language (2-letter code)', default='en')
+    parser.add_argument(
+        '-L', '--log-level', dest='loglevel', help='set log level (debug, info, warning, error, critical)', default='warning'
+    )
+    parser.add_argument('-q', '--quiet', dest='quiet', action='store_true', help='quiet', default=False)
+    parser.add_argument('-i', '--imgbasedir', dest='imgbasedir', default='')
     parser.add_argument('problem', help='the problem to convert')
 
-    options = parser.parse_args(namespace=options)
-    convert(options.problem, options)
+    return parser
+
+
+def main() -> None:
+    parser = get_parser()
+    options = parser.parse_args()
+    try:
+        convert(options)
+    except Exception as e:
+        print(e)
+        sys.exit(1)
 
 
 if __name__ == '__main__':
diff --git a/problemtools/problem2pdf.py b/problemtools/problem2pdf.py
index 1e039a19..077c18c0 100644
--- a/problemtools/problem2pdf.py
+++ b/problemtools/problem2pdf.py
@@ -1,24 +1,101 @@
 #! /usr/bin/env python3
 # -*- coding: utf-8 -*-
-import os.path
+import argparse
+import os
+import re
 import shutil
 import string
-import argparse
 import subprocess
-from . import template
-
+import sys
+import tempfile
+from pathlib import Path
 
-def convert(problem, options=None):
-    if options is None:
-        options = ConvertOptions()
-
-    problem = os.path.realpath(problem)
-    problembase = os.path.splitext(os.path.basename(problem))[0]
-    destfile = string.Template(options.destfile).safe_substitute(problem=problembase)
+from . import template
+from . import statement_util
+
+
+def convert(options: argparse.Namespace, force_statement_file: Path | None = None) -> bool:
+    problem_root = Path(options.problem).resolve(strict=True)
+
+    if force_statement_file:  # Used by verifyproblem to test rendering even if there are multiple statements in a language
+        statement_file = force_statement_file
+    else:
+        statement_file = statement_util.find_statement(problem_root, options.language)
+
+    match statement_file.suffix:
+        case '.md':
+            return md2pdf(options, statement_file)
+        case '.tex':
+            return latex2pdf(options, statement_file)
+        case _:
+            raise NotImplementedError('Unsupported file type, expected md or tex: {statement_file.name}')
+
+
+def md2pdf(options: argparse.Namespace, statement_file: Path) -> bool:
+    """Renders a Markdown document to pdf. Uses pandoc md -> tex, then
+    reuses the normal tex -> pdf pipeline
+    """
+    problem_root = Path(options.problem).resolve(strict=True)
+
+    statement_util.assert_images_are_valid_md(statement_file)
+
+    command = ['pandoc', str(statement_file), '-t', 'latex']
+    try:
+        tex = subprocess.run(command, capture_output=True, text=True, shell=False, check=True).stdout
+    except subprocess.CalledProcessError as e:
+        print(f'Error compiling Markdown to pdf: {e.stderr}')
+        return False
+
+    def format_latex_tables(latex_doc):
+        # Match table environments produced by pandoc
+        pattern = r"""
+            (\\begin\{longtable\}\[\]\{@\{\})
+            ([a-z])
+            ([a-z]*)
+            (@\{\}\})
+        """
+
+        def replacer(match):
+            prefix = match.group(1)[:-3]
+            first_col = match.group(2)
+            other_cols = match.group(3)
+            suffix = match.group(4)[3:]
+
+            # Combine columns with | separators
+            cols = [first_col] + list(other_cols)
+            return f'{prefix}|{"|".join(cols)}|{suffix} \\hline'
+
+        return re.sub(pattern, replacer, latex_doc, flags=re.VERBOSE)
+
+    # Add solid outline to tables
+    tex = format_latex_tables(tex)
+    tex = tex.replace(r'\toprule', '')
+    tex = tex.replace(r'\midrule', '')
+    tex = tex.replace(r'\endhead', '')
+    tex = tex.replace(r'\bottomrule', '')
+    tex = tex.replace(r'\tabularnewline', r'\\ \hline')
+
+    # Fix sample inclusions commands
+    # Currently does not work, as normal problemtools tex -> pdf does not support it
+    tex = tex.replace(r'\{\{nextsample\}\}', r'\nextsample')
+    tex = tex.replace(r'\{\{remainingsamples\}\}', r'\remainingsamples')
+
+    problem_name = statement_util.get_yaml_problem_name(problem_root, options.language)
+    tex = r'\problemname{' + problem_name + '}\n' + tex
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.tex', dir=statement_file.parent) as temp_tex_file:
+        temp_tex_file.write(tex)
+        temp_tex_file.flush()
+        return latex2pdf(options, Path(temp_tex_file.name))
+
+    return False
+
+
+def latex2pdf(options: argparse.Namespace, statement_file: Path) -> bool:
+    problem_root = Path(options.problem).resolve(strict=True)
+    destfile = string.Template(options.destfile).safe_substitute(problem=problem_root.name)
 
-    texfile = problem
     # Set up template if necessary
-    with template.Template(problem, language=options.language) as templ:
+    with template.Template(problem_root, statement_file, options.language) as templ:
         texfile = templ.get_file_name()
 
         origcwd = os.getcwd()
@@ -45,35 +122,54 @@ def convert(problem, options=None):
         if status == 0 and not options.nopdf:
             shutil.move(os.path.splitext(texfile)[0] + '.pdf', destfile)
 
-    return status == 0
-
-
-class ConvertOptions:
-    available = [
-        ['destfile', 'store', '-o', '--output',
-         "output file name", '${problem}.pdf'],
-        ['quiet', 'store_true', '-q', '--quiet',
-         "quiet", False],
-        ['language', 'store', '-l', '--language',
-         'choose alternate language (2-letter code)', None],
-        ['nopdf', 'store_true', '-n', '--no-pdf',
-         'run pdflatex in -draftmode', False],
-        ]
-
-    def __init__(self):
-        for (dest, _, _, _, _, default) in ConvertOptions.available:
-            setattr(self, dest, default)
-
-
-def main():
+    if status:
+        return False
+
+    # We only sanitize if a PDF was created
+    if not options.nopdf:
+        try:
+            with tempfile.NamedTemporaryFile(suffix='.pdf') as f:
+                command = [
+                    'gs',
+                    '-q',
+                    '-dBATCH',
+                    '-sDEVICE=pdfwrite',
+                    '-dNOPAUSE',
+                    '-dCompatibilityLevel=1.7',
+                    f'-sOutputFile={f.name}',
+                    destfile,
+                ]
+                gs_status = subprocess.run(command, capture_output=True, text=True, shell=False, check=True)
+                if gs_status.returncode != 0:
+                    return False
+                shutil.copy(f.name, destfile)
+        except subprocess.CalledProcessError as e:
+            print(f'Error sanitizing PDF: {e} {e.stderr}')
+            raise
+
+    return True
+
+
+def get_parser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 
-    for (dest, action, short, _long, _help, default) in ConvertOptions.available:
-        parser.add_argument(short, _long, dest=dest, help=_help, action=action, default=default)
+    parser.add_argument('-o', '--output', dest='destfile', help='output file name', default='${problem}.pdf')
+    parser.add_argument('-q', '--quiet', dest='quiet', action='store_true', help='quiet', default=False)
+    parser.add_argument('-l', '--language', dest='language', help='choose language (2-letter code)', default='en')
+    parser.add_argument('-n', '--no-pdf', dest='nopdf', action='store_true', help='run pdflatex in -draftmode', default=False)
     parser.add_argument('problem', help='the problem to convert')
 
+    return parser
+
+
+def main() -> None:
+    parser = get_parser()
     options = parser.parse_args()
-    convert(options.problem, options)
+    try:
+        convert(options)
+    except Exception as e:
+        print(e)
+        sys.exit(1)
 
 
 if __name__ == '__main__':
diff --git a/problemtools/tests/__init__.py b/problemtools/py.typed
similarity index 100%
rename from problemtools/tests/__init__.py
rename to problemtools/py.typed
diff --git a/problemtools/run/__init__.py b/problemtools/run/__init__.py
index 79de6047..6f5791dc 100644
--- a/problemtools/run/__init__.py
+++ b/problemtools/run/__init__.py
@@ -1,22 +1,23 @@
 """Package for managing execution of external programs in Kattis
 Problemtools.
 """
+
 import re
 import os
 
 from .buildrun import BuildRun
 from .checktestdata import Checktestdata
-from .errors import ProgramError
-from .executable import Executable
+from .errors import ProgramError as ProgramError
 from .program import Program
 from .source import SourceCode
 from .viva import Viva
-from .tools import get_tool_path, get_tool
+from .tools import get_tool as get_tool, get_tool_path as get_tool_path
 from . import rutil
 
 
-def find_programs(path, pattern='.*', language_config=None, work_dir=None,
-                  include_dir=None, allow_validation_script=False):
+def find_programs(
+    path, pattern='.*', language_config=None, work_dir=None, include_dir=None, allow_validation_script=False
+) -> list[Program]:
     """Find all programs in a directory.
 
     Args:
@@ -51,18 +52,19 @@ def find_programs(path, pattern='.*', language_config=None, work_dir=None,
     for name in sorted(os.listdir(path)):
         if re.match(pattern, name):
             fullpath = os.path.join(path, name)
-            run = get_program(fullpath,
-                              language_config=language_config,
-                              work_dir=work_dir,
-                              include_dir=include_dir,
-                              allow_validation_script=allow_validation_script)
+            run = get_program(
+                fullpath,
+                language_config=language_config,
+                work_dir=work_dir,
+                include_dir=include_dir,
+                allow_validation_script=allow_validation_script,
+            )
             if run is not None:
                 ret.append(run)
     return ret
 
 
-def get_program(path, language_config=None, work_dir=None, include_dir=None,
-                allow_validation_script=False):
+def get_program(path, language_config=None, work_dir=None, include_dir=None, allow_validation_script=False) -> Program | None:
     """Get a Program object for a program
 
     Args:
@@ -102,13 +104,18 @@ def get_program(path, language_config=None, work_dir=None, include_dir=None,
         files = [path]
     else:
         build = os.path.join(path, 'build')
-        if os.path.isfile(build) and os.access(path, os.X_OK):
+        if os.path.isfile(build) and os.access(build, os.X_OK):
             return BuildRun(path, work_dir)
         files = rutil.list_files_recursive(path)
 
     if language_config is not None:
         lang = language_config.detect_language(files)
         if lang is not None:
-            return SourceCode(path, lang,
-                              work_dir=work_dir, include_dir=include_dir)
+            if include_dir is not None:
+                lang_dir = os.path.join(include_dir, lang.lang_id)
+                build = os.path.join(lang_dir, 'build')
+                if os.path.isfile(build) and os.access(build, os.X_OK):
+                    return BuildRun(path, work_dir=work_dir, include_dir=lang_dir)
+
+            return SourceCode(path, lang, work_dir=work_dir, include_dir=include_dir)
     return None
diff --git a/problemtools/run/buildrun.py b/problemtools/run/buildrun.py
index 208527c0..404bcf5e 100644
--- a/problemtools/run/buildrun.py
+++ b/problemtools/run/buildrun.py
@@ -12,12 +12,13 @@
 from .program import Program
 from . import rutil
 
+log = logging.getLogger(__file__)
+
 
 class BuildRun(Program):
-    """Class for build/run-script program.
-    """
+    """Class for build/run-script program."""
 
-    def __init__(self, path, work_dir=None):
+    def __init__(self, path, work_dir=None, include_dir=None):
         """Instantiate BuildRun object.
 
         Args:
@@ -25,15 +26,11 @@ def __init__(self, path, work_dir=None):
             work_dir (str): name of temp directory in which to run the
                 scripts (if None, will make new temp directory).
         """
+        super().__init__()
+
         if not os.path.isdir(path):
             raise ProgramError('%s is not a directory' % path)
 
-        build = os.path.join(path, 'build')
-        if not os.path.isfile(build):
-            raise ProgramError('%s does not have a build script' % path)
-        if not os.access(build, os.X_OK):
-            raise ProgramError('%s/build is not executable' % path)
-
         if work_dir is None:
             work_dir = tempfile.mkdtemp()
 
@@ -47,34 +44,35 @@ def __init__(self, path, work_dir=None):
             os.makedirs(self.path)
 
         rutil.add_files(path, self.path)
+        if include_dir is not None and os.path.isdir(include_dir):
+            rutil.add_files(include_dir, self.path)
 
+        # Check for existence of build script after copying include_dir, since that could contain the script
+        build = os.path.join(self.path, 'build')
+        if not os.path.isfile(build):
+            raise ProgramError('%s does not have a build script' % path)
+        if not os.access(build, os.X_OK):
+            raise ProgramError('%s/build is not executable' % path)
 
-    def __str__(self):
+    def __str__(self) -> str:
         """String representation"""
         return '%s/' % (self.path)
 
-
-    _compile_result = None
-    def compile(self):
+    def do_compile(self) -> tuple[bool, str | None]:
         """Run the build script."""
-        if self._compile_result is not None:
-            return self._compile_result
-
         with open(os.devnull, 'w') as devnull:
             status = subprocess.call(['./build'], stdout=devnull, stderr=devnull, cwd=self.path)
         run = os.path.join(self.path, 'run')
 
         if status:
             logging.debug('Build script failed (status %d) when compiling %s\n', status, self.name)
-            self._compile_result = (False, 'build script failed with exit code %d' % (status))
+            return (False, 'build script failed with exit code %d' % (status))
         elif not os.path.isfile(run) or not os.access(run, os.X_OK):
-            self._compile_result = (False, 'build script did not produce an executable called "run"')
+            return (False, 'build script did not produce an executable called "run"')
         else:
-            self._compile_result = (True, None)
-        return self._compile_result
-
+            return (True, None)
 
-    def get_runcmd(self, cwd=None, memlim=None):
+    def get_runcmd(self, cwd=None, memlim=None) -> list[str]:
         """Run command for the program.
 
         Args:
@@ -84,7 +82,6 @@ def get_runcmd(self, cwd=None, memlim=None):
         path = self.path if cwd is None else os.path.relpath(self.path, cwd)
         return [os.path.join(path, 'run')]
 
-
-    def should_skip_memory_rlimit(self):
+    def should_skip_memory_rlimit(self) -> bool:
         """Ugly hack (see program.py for details)."""
         return True
diff --git a/problemtools/run/checktestdata.py b/problemtools/run/checktestdata.py
index 0fd4a4d7..939c1e30 100644
--- a/problemtools/run/checktestdata.py
+++ b/problemtools/run/checktestdata.py
@@ -9,8 +9,8 @@
 
 
 class Checktestdata(Executable):
-    """Wrapper class for running Checktestdata scripts.
-    """
+    """Wrapper class for running Checktestdata scripts."""
+
     _CTD_PATH = get_tool_path('checktestdata')
 
     def __init__(self, path):
@@ -20,33 +20,26 @@ def __init__(self, path):
             path (str): path to .ctd source file
         """
         if Checktestdata._CTD_PATH is None:
-            raise ProgramError(
-                'Could not locate the Checktestdata program to run %s' % path)
-        super(Checktestdata, self).__init__(Checktestdata._CTD_PATH,
-                                            args=[path])
-
+            raise ProgramError('Could not locate the Checktestdata program to run %s' % path)
+        super().__init__(Checktestdata._CTD_PATH, args=[path])
 
-    def __str__(self):
+    def __str__(self) -> str:
         """String representation"""
         return '%s' % (self.args[0])
 
-
-    _compile_result = None
-    def compile(self):
+    def do_compile(self) -> tuple[bool, str | None]:
         """Syntax-check the Checktestdata script
 
         Returns:
             (False, None) if the Checktestdata script has syntax errors and
             (True, None) otherwise
         """
-        if self._compile_result is None:
-            (status, _) = super(Checktestdata, self).run()
-            self._compile_result = ((os.WIFEXITED(status) and os.WEXITSTATUS(status) in [0, 1]), None)
-        return self._compile_result
-
+        (status, _) = super().run()
+        return ((os.WIFEXITED(status) and os.WEXITSTATUS(status) in [0, 1]), None)
 
-    def run(self, infile='/dev/null', outfile='/dev/null',
-            errfile='/dev/null', args=None, timelim=1000):
+    def run(
+        self, infile='/dev/null', outfile='/dev/null', errfile='/dev/null', args=None, timelim=1000, memlim=1024, work_dir=None
+    ):
         """Run the Checktestdata script to validate an input file.
 
         Args:
@@ -66,15 +59,13 @@ def run(self, infile='/dev/null', outfile='/dev/null',
                 runtime (float): runtime of the Checktestdata process
                     in seconds
         """
-        (status, runtime) = super(Checktestdata, self).run(infile=infile,
-                                                           outfile=outfile,
-                                                           errfile=errfile,
-                                                           args=args,
-                                                           timelim=timelim)
+        (status, runtime) = super(Checktestdata, self).run(
+            infile=infile, outfile=outfile, errfile=errfile, args=args, timelim=timelim, memlim=memlim, work_dir=work_dir
+        )
         # This is ugly, switches the accept exit status and our accept
         # exit status 42.
         if os.WIFEXITED(status) and os.WEXITSTATUS(status) == 0:
-            return (42<<8, runtime)
+            return (42 << 8, runtime)
         if os.WIFEXITED(status) and os.WEXITSTATUS(status) == 42:
             return (0, runtime)
         return (status, runtime)
diff --git a/problemtools/run/errors.py b/problemtools/run/errors.py
index b71bc1c9..4b8d1db9 100644
--- a/problemtools/run/errors.py
+++ b/problemtools/run/errors.py
@@ -2,7 +2,8 @@
 Error handling.
 """
 
+
 class ProgramError(Exception):
-    """Base exception class for errors within the run package.
-    """
+    """Base exception class for errors within the run package."""
+
     pass
diff --git a/problemtools/run/executable.py b/problemtools/run/executable.py
index b2538e22..8f3a67e7 100644
--- a/problemtools/run/executable.py
+++ b/problemtools/run/executable.py
@@ -1,13 +1,15 @@
 """
 Implementation of programs provided by an executable file.
 """
+
 import os
 from .program import Program
 from .errors import ProgramError
 
+
 class Executable(Program):
-    """Class for executable files.
-    """
+    """Class for executable files."""
+
     def __init__(self, path, args=None):
         """Instantiate executable object.
 
@@ -17,6 +19,8 @@ def __init__(self, path, args=None):
             args: list of additional command line arguments that
                 should be passed to the program every time it is executed.
         """
+        super().__init__()
+
         if not os.path.isfile(path) or not os.access(path, os.X_OK):
             raise ProgramError('%s is not an executable program' % path)
         self.path = path
@@ -26,14 +30,8 @@ def __str__(self):
         """String representation"""
         return '%s' % (self.path)
 
-    def compile(self):
-        """Dummy implementation of the compile method -- nothing to check!
-        """
-        return (True, None)
-
     def get_runcmd(self, cwd=None, memlim=None):
-        """Command to run the program.
-        """
+        """Command to run the program."""
         return [self.path] + self.args
 
     def should_skip_memory_rlimit(self):
diff --git a/problemtools/run/limit.py b/problemtools/run/limit.py
index db062dbf..5e56f6cc 100644
--- a/problemtools/run/limit.py
+++ b/problemtools/run/limit.py
@@ -4,6 +4,7 @@
 
 import resource
 
+
 def check_limit_capabilities(logger):
     """Check if the problemtools process is run with appropriate
     capabilities to set rlimits, and if not, issue warnings.
@@ -16,19 +17,21 @@ def check_limit_capabilities(logger):
     """
     (_, cpu_hard) = resource.getrlimit(resource.RLIMIT_CPU)
     if cpu_hard != resource.RLIM_INFINITY:
-        logger.warning("Hard CPU rlimit of %d, runs involving higher CPU limits than this may behave incorrectly."
-                       % cpu_hard)
+        logger.warning('Hard CPU rlimit of %d, runs involving higher CPU limits than this may behave incorrectly.' % cpu_hard)
 
     (_, stack_hard) = resource.getrlimit(resource.RLIMIT_STACK)
     if stack_hard != resource.RLIM_INFINITY:
-        logger.warning("Hard stack rlimit of %d so I can't set it to unlimited. I will keep it at %d. If you experience unexpected issues (in particular run-time errors) this may be the cause."
-                       % (stack_hard, stack_hard))
+        logger.warning(
+            "Hard stack rlimit of %d so I can't set it to unlimited. I will keep it at %d. If you experience unexpected issues (in particular run-time errors) this may be the cause."
+            % (stack_hard, stack_hard)
+        )
 
     (_, mem_hard) = resource.getrlimit(resource.RLIMIT_AS)
     if mem_hard != resource.RLIM_INFINITY:
-        logger.warning("Hard memory rlimit of %.0f MB, runs involving a higher memory limit may behave incorrectly.  If you experience unexpected issues (in particular run-time errors) this may be the cause."
-                       % (mem_hard/1024.0/1024.0))
-
+        logger.warning(
+            'Hard memory rlimit of %.0f MB, runs involving a higher memory limit may behave incorrectly.  If you experience unexpected issues (in particular run-time errors) this may be the cause.'
+            % (mem_hard / 1024.0 / 1024.0)
+        )
 
 
 def try_limit(limit, soft, hard):
@@ -49,7 +52,6 @@ def try_limit(limit, soft, hard):
     resource.setrlimit(limit, (soft, hard))
 
 
-
 def __limit_less(lim1, lim2):
     """Helper function for comparing two rlimit values, handling "unlimited" correctly.
 
diff --git a/problemtools/run/program.py b/problemtools/run/program.py
index b237a507..ae27c377 100644
--- a/problemtools/run/program.py
+++ b/problemtools/run/program.py
@@ -1,26 +1,40 @@
-"""Abstract base class for programs.
-"""
+"""Abstract base class for programs."""
+
 import os
 from . import limit
 import resource
 import signal
 import logging
+import threading
 
 from .errors import ProgramError
 
-class Program(object):
-    """Abstract base class for programs.
-    """
-    runtime = 0
+from abc import ABC, abstractmethod
+
+log = logging.getLogger(__name__)
+
+
+class Program(ABC):
+    """Abstract base class for programs."""
+
+    def __init__(self) -> None:
+        self.runtime = 0
+        self._compile_lock = threading.Lock()
+        self._compile_result: tuple[bool, str | None] | None = None
+
+    @abstractmethod
+    def get_runcmd(self, cwd=None, memlim=None) -> list[str]:
+        pass
 
-    def run(self, infile='/dev/null', outfile='/dev/null', errfile='/dev/null',
-            args=None, timelim=1000, memlim=1024):
+    def run(
+        self, infile='/dev/null', outfile='/dev/null', errfile='/dev/null', args=None, timelim=1000, memlim=1024, work_dir=None
+    ):
         """Run the program.
 
         Args:
             infile (str): name of file to pass on stdin
             outfile (str): name of file to send stdout to
-            errfile (str): name of file to send stderr ro
+            errfile (str): name of file to send stderr to
             args (list of str): additional command-line arguments to
                 pass to the program
             timelim (int): CPU time limit in seconds
@@ -39,20 +53,29 @@ def run(self, infile='/dev/null', outfile='/dev/null', errfile='/dev/null',
         if self.should_skip_memory_rlimit():
             memlim = None
 
-        status, runtime = self.__run_wait(runcmd + args,
-                                          infile, outfile, errfile,
-                                          timelim, memlim)
+        status, runtime = self.__run_wait(runcmd + args, infile, outfile, errfile, timelim, memlim, work_dir)
 
         self.runtime = max(self.runtime, runtime)
 
         return status, runtime
 
-    def code_size(self):
+    def compile(self) -> tuple[bool, str | None]:
+        with self._compile_lock:
+            if self._compile_result is None:
+                self._compile_result = self.do_compile()
+            return self._compile_result
+
+    def do_compile(self) -> tuple[bool, str | None]:
+        """Actually compile the program, if needed. Subclasses should override this method.
+        Do not call this manually -- use compile() instead."""
+        return (True, None)
+
+    def code_size(self) -> int:
         """Subclasses should override this method with the total size of the
         source code."""
         return 0
 
-    def should_skip_memory_rlimit(self):
+    def should_skip_memory_rlimit(self) -> bool:
         """Ugly workaround to accommodate Java -- the JVM will crash and burn
         if there is a memory rlimit applied and this will probably not
         change anytime soon [time of writing this: 2017-02-05], see
@@ -67,11 +90,9 @@ def should_skip_memory_rlimit(self):
         """
         return False
 
-
     @staticmethod
-    def __run_wait(argv, infile, outfile, errfile, timelim, memlim):
-        logging.debug('run "%s < %s > %s 2> %s"',
-                      ' '.join(argv), infile, outfile, errfile)
+    def __run_wait(argv, infile, outfile, errfile, timelim, memlim, working_directory=None):
+        log.debug('run "%s < %s > %s 2> %s"', ' '.join(argv), infile, outfile, errfile)
         pid = os.fork()
         if pid == 0:  # child
             try:
@@ -84,38 +105,35 @@ def __run_wait(argv, infile, outfile, errfile, timelim, memlim):
                 #
                 # This *shouldn't* cause any verdict changes given the setup for
                 # interactive problems, but reset them anyway, for sanity.
-                if hasattr(signal, "SIGPIPE"):
+                if hasattr(signal, 'SIGPIPE'):
                     signal.signal(signal.SIGPIPE, signal.SIG_DFL)
-                if hasattr(signal, "SIGXFZ"):
+                if hasattr(signal, 'SIGXFZ'):
                     signal.signal(signal.SIGXFZ, signal.SIG_DFL)
-                if hasattr(signal, "SIGXFSZ"):
+                if hasattr(signal, 'SIGXFSZ'):
                     signal.signal(signal.SIGXFSZ, signal.SIG_DFL)
 
                 if timelim is not None:
                     limit.try_limit(resource.RLIMIT_CPU, timelim, timelim + 1)
                 if memlim is not None:
                     limit.try_limit(resource.RLIMIT_AS, memlim * (1024**2), resource.RLIM_INFINITY)
-                limit.try_limit(resource.RLIMIT_STACK,
-                                resource.RLIM_INFINITY, resource.RLIM_INFINITY)
+                limit.try_limit(resource.RLIMIT_STACK, resource.RLIM_INFINITY, resource.RLIM_INFINITY)
 
                 Program.__setfd(0, infile, os.O_RDONLY)
-                Program.__setfd(1, outfile,
-                                os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
-                Program.__setfd(2, errfile,
-                                os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
-
+                Program.__setfd(1, outfile, os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
+                Program.__setfd(2, errfile, os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
+                if working_directory is not None:
+                    os.chdir(working_directory)
                 os.execvp(argv[0], argv)
             except Exception as exc:
-                print("Oops. Fatal error in child process:")
+                print('Oops. Fatal error in child process:')
                 print(exc)
                 os.kill(os.getpid(), signal.SIGTERM)
             # Unreachable
-            logging.error("Unreachable part of run_wait reached")
+            log.error('Unreachable part of run_wait reached')
             os.kill(os.getpid(), signal.SIGTERM)
         (pid, status, rusage) = os.wait4(pid, 0)
         return status, rusage.ru_utime + rusage.ru_stime
 
-
     @staticmethod
     def __setfd(fd, filename, flag):
         tmpfd = os.open(filename, flag)
diff --git a/problemtools/run/rutil.py b/problemtools/run/rutil.py
index 7b889e39..62df8fbe 100644
--- a/problemtools/run/rutil.py
+++ b/problemtools/run/rutil.py
@@ -1,11 +1,12 @@
-"""Some utility functions for the run module.
-"""
+"""Some utility functions for the run module."""
+
 import errno
 import os
 import shutil
 
 from .errors import ProgramError
 
+
 def add_files(src, dstdir):
     """Copy src to dstdir.
 
@@ -30,14 +31,13 @@ def add_files(src, dstdir):
                 srcfile = os.path.join(src, name)
                 destfile = os.path.join(dstdir, name)
                 if os.path.isdir(srcfile):
-                    shutil.copytree(srcfile, destfile)
+                    shutil.copytree(srcfile, destfile, dirs_exist_ok=True)
                 else:
                     shutil.copy(srcfile, destfile)
     except IOError as exc:
         # FIXME why is this specific error special-cased
         if exc.errno == errno.ENOENT:
-            raise ProgramError(
-                'File not found when copying program:\n %s' % exc.filename)
+            raise ProgramError('File not found when copying program:\n %s' % exc.filename)
         raise
 
 
@@ -49,6 +49,6 @@ def list_files_recursive(root):
         directory and its subdirectories.
     """
     ret = []
-    for (path, _, files) in os.walk(root):
+    for path, _, files in os.walk(root):
         ret.extend([os.path.join(root, path, filename) for filename in files])
     return ret
diff --git a/problemtools/run/source.py b/problemtools/run/source.py
index a7724bda..514c6cf7 100644
--- a/problemtools/run/source.py
+++ b/problemtools/run/source.py
@@ -1,6 +1,7 @@
 """
 Implementation of programs provided by source code.
 """
+
 import re
 import os
 import shlex
@@ -12,9 +13,12 @@
 from .program import Program
 from . import rutil
 
+log = logging.getLogger(__name__)
+
+
 class SourceCode(Program):
-    """Class representing a program provided by source code.
-    """
+    """Class representing a program provided by source code."""
+
     def __init__(self, path, language, work_dir=None, include_dir=None):
         """Instantiate SourceCode object
 
@@ -36,6 +40,7 @@ def __init__(self, path, language, work_dir=None, include_dir=None):
                 then the files in include_dir/<foo>/ will be copied
                 into the work_dir along with the source file(s).
         """
+        super().__init__()
 
         if path[-1] == '/':
             path = path[:-1]
@@ -58,16 +63,11 @@ def __init__(self, path, language, work_dir=None, include_dir=None):
             if os.path.isdir(include_dir):
                 rutil.add_files(include_dir, self.path)
 
-        self.src = sorted(self.language.get_source_files(
-            rutil.list_files_recursive(self.path)
-        ))
+        self.src = sorted(self.language.get_source_files(rutil.list_files_recursive(self.path)))
         if len(self.src) == 0:
-            raise ProgramError('No source files found for language %s in %s'
-                               % (self.language.lang_id, self.name))
+            raise ProgramError('No source files found for language %s in %s' % (self.language.lang_id, self.name))
 
-        self.mainfile = next((x for x in self.src
-                              if re.match(r'^main\.', os.path.basename(x),
-                                          re.IGNORECASE)), None)
+        self.mainfile = next((x for x in self.src if re.match(r'^main\.', os.path.basename(x), re.IGNORECASE)), None)
         if self.mainfile is None:
             self.mainfile = self.src[0]
 
@@ -76,25 +76,17 @@ def __init__(self, path, language, work_dir=None, include_dir=None):
 
         self.binary = os.path.join(self.path, 'run')
 
-
-    def code_size(self):
+    def code_size(self) -> int:
         return sum(os.path.getsize(x) for x in self.src)
 
-
-    _compile_result = None
-
-    def compile(self):
+    def do_compile(self) -> tuple[bool, str | None]:
         """Compile the source code.
 
         Returns tuple:
             (True, None) if compilation succeeded
             (False, errmsg) otherwise
         """
-        if self._compile_result is not None:
-            return self._compile_result
-
         if self.language.compile is None:
-            self._compile_result = (True, None)
             return (True, None)
 
         command = self.get_compilecmd()
@@ -103,21 +95,17 @@ def compile(self):
         if not os.path.isfile(compiler) or not os.access(compiler, os.X_OK):
             return (False, '%s does not seem to be installed, expected to find compiler at %s' % (self.language.name, compiler))
 
-        logging.debug('compile command: %s', command)
+        log.debug('compile command: %s', command)
 
         try:
             subprocess.check_output(command, stderr=subprocess.STDOUT)
-            self._compile_result = (True, None)
+            return (True, None)
         except subprocess.CalledProcessError as err:
-            self._compile_result = (False, err.output.decode('utf8', 'replace'))
-
-        return self._compile_result
+            return (False, err.output.decode('utf8', 'replace'))
 
-
-    def get_compilecmd(self):
+    def get_compilecmd(self) -> list[str]:
         return shlex.split(self.language.compile.format(**self.__get_substitution()))
 
-
     def get_runcmd(self, cwd=None, memlim=1024):
         """Run command for the program.
 
@@ -136,17 +124,14 @@ def get_runcmd(self, cwd=None, memlim=1024):
             subs['mainfile'] = os.path.relpath(subs['mainfile'], cwd)
         return shlex.split(self.language.run.format(**subs))
 
-
-    def should_skip_memory_rlimit(self):
+    def should_skip_memory_rlimit(self) -> bool:
         """Ugly hack (see program.py for details)."""
         return self.language.name in ['Java', 'Scala', 'Kotlin', 'Common Lisp']
 
-
-    def __str__(self):
+    def __str__(self) -> str:
         """String representation"""
         return '%s (%s)' % (self.name, self.language.name)
 
-
     def __get_substitution(self, memlim=1024):
         return {
             'path': self.path,
@@ -155,5 +140,5 @@ def __get_substitution(self, memlim=1024):
             'mainfile': self.mainfile,
             'mainclass': self.mainclass,
             'Mainclass': self.Mainclass,
-            'binary': self.binary
+            'binary': self.binary,
         }
diff --git a/problemtools/run/tools.py b/problemtools/run/tools.py
index 78408cc2..23c30614 100644
--- a/problemtools/run/tools.py
+++ b/problemtools/run/tools.py
@@ -1,6 +1,7 @@
 import os
 from .executable import Executable
 
+
 def get_tool_path(name):
     """Find the path to one of problemtools' external tools.
 
@@ -11,11 +12,12 @@ def get_tool_path(name):
     Returns:
         str, path to the tool, or None if the tool was not found.
     """
-    return __locate_executable([os.path.join(os.path.dirname(__file__),
-                                             '..', 'support', name),
-                                os.path.join(os.path.dirname(__file__),
-                                             '..', '..', 'support',
-                                             os.path.splitext(name)[0], name)])
+    return __locate_executable(
+        [
+            os.path.join(os.path.dirname(__file__), '..', 'support', name),
+            os.path.join(os.path.dirname(__file__), '..', '..', 'support', os.path.splitext(name)[0], name),
+        ]
+    )
 
 
 def get_tool(name):
@@ -43,5 +45,4 @@ def __locate_executable(candidate_paths):
         str, first entry of candidate_paths that is an executable
             file, or None if no such entry.
     """
-    return next((p for p in candidate_paths
-                 if os.path.isfile(p) and os.access(p, os.X_OK)), None)
+    return next((p for p in candidate_paths if os.path.isfile(p) and os.access(p, os.X_OK)), None)
diff --git a/problemtools/run/viva.py b/problemtools/run/viva.py
index b6129169..23e35c68 100644
--- a/problemtools/run/viva.py
+++ b/problemtools/run/viva.py
@@ -9,8 +9,8 @@
 
 
 class Viva(Executable):
-    """Wrapper class for running VIVA scripts.
-    """
+    """Wrapper class for running VIVA scripts."""
+
     _VIVA_PATH = get_tool_path('viva.sh')
 
     def __init__(self, path):
@@ -20,32 +20,25 @@ def __init__(self, path):
             path (str): path to .viva source file
         """
         if Viva._VIVA_PATH is None:
-            raise ProgramError(
-                'Could not locate the VIVA program to run %s' % path)
-        super(Viva, self).__init__(Viva._VIVA_PATH,
-                                   args=[path])
-
+            raise ProgramError('Could not locate the VIVA program to run %s' % path)
+        super().__init__(Viva._VIVA_PATH, args=[path])
 
     def __str__(self):
         """String representation"""
         return '%s' % (self.args[0])
 
-
-    _compile_result = None
-    def compile(self):
+    def do_compile(self) -> tuple[bool, str | None]:
         """Syntax-check the VIVA script
 
         Returns:
             (False, None) if the VIVA script has syntax errors and (True, None) otherwise
         """
-        if self._compile_result is None:
-            (status, _) = super(Viva, self).run()
-            self._compile_result = ((os.WIFEXITED(status) and os.WEXITSTATUS(status) == 0), None)
-        return self._compile_result
-
+        (status, _) = super().run()
+        return ((os.WIFEXITED(status) and os.WEXITSTATUS(status) == 0), None)
 
-    def run(self, infile='/dev/null', outfile='/dev/null',
-            errfile='/dev/null', args=None, timelim=1000):
+    def run(
+        self, infile='/dev/null', outfile='/dev/null', errfile='/dev/null', args=None, timelim=1000, memlim=1024, work_dir=None
+    ):
         """Run the VIVA script to validate an input file.
 
         Args:
@@ -69,14 +62,13 @@ def run(self, infile='/dev/null', outfile='/dev/null',
         if infile != '/dev/null':
             args = args + [infile]
 
-        (status, runtime) = super(Viva, self).run(outfile=outfile,
-                                                  errfile=errfile,
-                                                  args=args,
-                                                  timelim=timelim)
+        (status, runtime) = super(Viva, self).run(
+            outfile=outfile, errfile=errfile, args=args, timelim=timelim, memlim=memlim, work_dir=work_dir
+        )
         # This is ugly, switches the accept exit status and our accept
         # exit status 42.
         if os.WIFEXITED(status) and os.WEXITSTATUS(status) == 0:
-            return (42<<8, runtime)
+            return (42 << 8, runtime)
         if os.WIFEXITED(status) and os.WEXITSTATUS(status) == 42:
             return (0, runtime)
         return (status, runtime)
diff --git a/problemtools/statement_util.py b/problemtools/statement_util.py
new file mode 100644
index 00000000..5cd7132f
--- /dev/null
+++ b/problemtools/statement_util.py
@@ -0,0 +1,273 @@
+import collections
+import html
+import json
+import os
+import re
+import subprocess
+import tempfile
+from pathlib import Path
+from typing import Optional, List, Tuple
+from urllib.parse import urlparse
+
+from . import metadata
+from .formatversion import FormatVersion, get_format_version
+
+ALLOWED_IMAGE_EXTENSIONS = ('.png', '.jpg', '.jpeg')  # ".svg"
+FOOTNOTES_STRINGS = ['<section class="footnotes">', '<aside class="footnotes">']
+
+
+def find_statements(problem_root: Path, version: FormatVersion) -> dict[str, list[Path]]:
+    """Returns a dict mapping language code to a list of paths to statements (relative to problem_root)
+
+    Note that in well-formed problem packages, there should only be a single
+    statement for each language, but this function returns all found
+    statements, to let the caller inform the user of errors.
+    """
+
+    directory = problem_root / version.statement_directory
+    ret = collections.defaultdict(list)
+    if directory.is_dir():
+        filename_re = re.compile(r'^problem(\.([a-z]{2,3}|[a-z]{2}-[A-Z]{2}))?\.(%s)$' % ('|'.join(version.statement_extensions)))
+        for file in directory.iterdir():
+            if m := filename_re.search(file.name):
+                if m.group(2) is None:  # problem.tex is allowed and assumed to be 'en' in legacy. We ignore it in newer formats.
+                    if version is FormatVersion.LEGACY:
+                        ret['en'].append(file)
+                else:
+                    ret[m.group(2)].append(file)
+    return dict(ret)
+
+
+def load_names_from_statements(problem_root: Path, version: FormatVersion) -> dict[str, str]:
+    """Returns a dict mapping language code => problem name"""
+
+    assert version is FormatVersion.LEGACY, 'load_names_from_statements only makes sense for legacy format'
+    ret: dict[str, str] = {}
+    for lang, files in find_statements(problem_root, version).items():
+        hit = re.search(r'\\problemname{(.*)}', files[0].read_text(), re.MULTILINE)
+        if hit:
+            ret[lang] = hit.group(1).strip()
+    return ret
+
+
+def find_statement(problem_root: Path, language: str) -> Path:
+    """Finds the statement in a given language.
+
+    Raises
+        ValueError: if there are multiple statements in language.
+        FileNotFoundError: if there are no statements in language.
+    """
+    candidates = find_statements(problem_root, get_format_version(problem_root))
+    if language not in candidates:
+        raise FileNotFoundError(f'No statement found in language {language}. Found languages: {", ".join(candidates)}')
+    elif len(candidates[language]) > 1:
+        raise ValueError(f'Multiple statements in language {language}: {", ".join((file.name for file in candidates[language]))}')
+    else:
+        return candidates[language][0]
+
+
+def get_yaml_problem_name(problem_root: Path, language: str) -> str:
+    """Finds the problem name from the problem.yaml file"""
+
+    problem_metadata, _ = metadata.load_metadata(problem_root)
+    names = problem_metadata.name
+    # If there is only one language, per the spec that is the one we want
+    if len(names) == 1:
+        return next(iter(names.values()))
+
+    if language not in names:
+        raise ValueError(f'No problem name defined for language {language}')
+    return names[language]
+
+
+def json_dfs(data, callback) -> None:
+    """Traverse all items in a JSON tree, find all images, and call callback for each one"""
+    if isinstance(data, dict):
+        for key, value in data.items():
+            # Markdown-style images
+            if key == 't' and value == 'Image':
+                callback(data['c'][2][0])
+            else:
+                json_dfs(value, callback)
+
+    elif isinstance(data, list):
+        for item in data:
+            json_dfs(item, callback)
+
+
+def foreach_image(statement_path: Path, callback):
+    """Find all images in the statement and call callback for each one"""
+    command = ['pandoc', str(statement_path), '-t', 'json']
+    # Must create a working directory for pytest to work
+    with tempfile.TemporaryDirectory() as work_dir:
+        statement_json = subprocess.run(command, capture_output=True, text=True, shell=False, check=True, cwd=work_dir).stdout
+
+    json_dfs(json.loads(statement_json), callback)
+
+
+def assert_image_is_valid(statement_dir: Path, img_src: str) -> None:
+    """Check that the image exists and uses an allowed extension"""
+    img_path = Path(img_src)
+    extension = img_path.suffix
+    # TODO: fix svg sanitization and allow svg
+    if extension not in ALLOWED_IMAGE_EXTENSIONS:
+        raise ValueError(f'Unsupported image extension {extension} for image {img_src}')
+    if img_path.is_absolute():
+        raise ValueError(f'Image path must be relative, but {img_src} is not.')
+    as_url = urlparse(img_src)
+    if as_url.scheme:
+        raise ValueError(f'Image path must not be an URL with a scheme, but {img_src} is.')
+
+    source_file = statement_dir / img_src
+    if not source_file.exists():
+        raise FileNotFoundError(f'Resource file {img_src} not found in statement')
+
+
+def assert_images_are_valid_md(statement_path: Path) -> None:
+    """Find all images in the statement and assert that they exist and
+    use valid image extensions"""
+    statement_dir = statement_path.parent
+    foreach_image(statement_path, lambda img_name: assert_image_is_valid(statement_dir, img_name))
+
+
+def find_footnotes(statement_html: str) -> Optional[int]:
+    """Find the position of the footnotes in the statement and return it or None"""
+    for footnote_string in FOOTNOTES_STRINGS:
+        if footnote_string in statement_html:
+            return statement_html.find(footnote_string)
+    return None
+
+
+def inject_samples(statement_html: str, samples: List[str]) -> Tuple[str, List[str]]:
+    """Injects samples at occurences of {{nextsample}} and {{remainingsamples}}
+    Non-destructive
+
+    Returns:
+        Statement with samples inject and left-over samples.
+    """
+
+    while True:
+        match = re.search(r'\{\{(nextsample|remainingsamples)\}\}', statement_html)
+        if not match:
+            break
+        matched_text = match.group(1)
+        if matched_text == 'nextsample' and len(samples) == 0:
+            raise ValueError('Error: called {{nextsample}} without any samples left')
+
+        num_inject = 1 if matched_text == 'nextsample' else len(samples)
+        to_inject = ''.join(samples[:num_inject])
+        samples = samples[num_inject:]
+
+        # Always inject, even if to_inject is empty
+        # This will remove all occurences of {{nextsample}} and {{remainingsamples}}
+        # (And also properly throw an error if {{nextsample}} is called with no samples left)
+        statement_html = statement_html[: match.start()] + to_inject + statement_html[match.end() :]
+
+    return statement_html, samples
+
+
+def format_samples(problem_root: Path) -> List[str]:
+    """Read all samples from the problem directory and convert them to pandoc-valid markdown
+
+    Args:
+        problem_root: path to root of problem
+
+    Returns:
+        List[str]: All samples, converted to a format appropriate to be pasted into
+        a markdown file. Ordered lexicographically by file names
+    """
+
+    sample_path = os.path.join(str(problem_root), 'data', 'sample')
+    if not os.path.isdir(sample_path):
+        return []
+    samples = []
+    casenum = 1
+    for sample in sorted(os.listdir(sample_path)):
+        if sample.endswith('.interaction'):
+            samples.append(format_interactive_sample(sample_path, sample, casenum))
+            casenum += 1
+            continue
+
+        if not sample.endswith('.in'):
+            continue
+        sample_name = sample[:-3]
+        outpath = os.path.join(sample_path, sample_name + '.ans')
+        if not os.path.isfile(outpath):
+            continue
+
+        samples.append(format_normal_sample(sample_path, sample, casenum))
+        casenum += 1
+
+    return samples
+
+
+def format_normal_sample(sample_root: str, sample: str, casenum: int) -> str:
+    """
+
+    Args:
+        sample_root: root of the sample folder
+        sample: file name of the sample
+        casenum: which sample is this? (1, 2, 3...)
+
+    Returns:
+        str: the sample, ready to be pasted into a markdown doc and fed to pandoc
+    """
+
+    with open(os.path.join(sample_root, sample), 'r', encoding='utf-8') as infile:
+        sample_input = infile.read()
+    sample_name = sample[:-3]
+    outpath = os.path.join(sample_root, sample_name + '.ans')
+    with open(outpath, 'r', encoding='utf-8') as outfile:
+        sample_output = outfile.read()
+
+    return """
+        <table class="sample" summary="sample data">
+        <tbody>
+            <tr>
+                <th>Sample Input %(case)d</th>
+                <th>Sample Output %(case)d</th>
+            </tr>
+            <tr>
+                <td><pre>%(input)s</pre></td>
+                <td><pre>%(output)s</pre></td>
+            </tr>
+        </tbody>
+        </table>""" % ({'case': casenum, 'input': html.escape(sample_input), 'output': html.escape(sample_output)})
+
+
+def format_interactive_sample(sample_root: str, sample: str, casenum: int) -> str:
+    """
+
+    Args:
+        sample_root: root of the sample folder
+        sample: file name of the sample
+        casenum: which sample is this? (1, 2, 3...)
+
+    Returns:
+        str: the sample, ready to be pasted into a markdown doc and fed to pandoc
+    """
+
+    line = f"""
+        <table class="sample" summary="sample data">
+            <tr>
+                <th style="text-align:left; width:33%;">Read</th>
+                <th style="text-align:center; width:33%;">Sample Interaction {casenum}</th>
+                <th style="text-align:right; width:33%;">Write</th>
+            </tr>
+        </table>"""
+
+    with open(os.path.join(sample_root, sample), 'r', encoding='utf-8') as infile:
+        sample_interaction = infile.readlines()
+    lines = []
+    for interaction in sample_interaction:
+        data = html.escape(interaction[1:])
+        line_type = ''
+        if interaction[0] == '>':
+            line_type = 'sampleinteractionwrite'
+        elif interaction[0] == '<':
+            line_type = 'sampleinteractionread'
+        else:
+            print(f'Warning: Interaction had unknown prefix {interaction[0]}')
+        lines.append(f"""<div class="{line_type}"><pre>{data}</pre></div>""")
+
+    return line + ''.join(lines)
diff --git a/problemtools/template.py b/problemtools/template.py
index 0d5951f7..643dcae8 100644
--- a/problemtools/template.py
+++ b/problemtools/template.py
@@ -1,95 +1,76 @@
-import re
 import os.path
 import glob
 import tempfile
 import shutil
-
-
-# For backwards compatibility, remove in bright and shiny future.
-def detect_version(problemdir, problemtex):
-    # Check for 0.1 - lack of \problemname
-    if open(problemtex).read().find(r'\problemname') < 0:
-        return '0.1'
-    return ''  # Current
+from pathlib import Path
 
 
 class Template:
-    def __init__(self, problemdir, language=None, force_copy_cls=False):
-        if not os.path.isdir(problemdir):
-            raise Exception('%s is not a directory' % problemdir)
-
-        if problemdir[-1] == '/':
-            problemdir = problemdir[:-1]
-        stmtdir = os.path.join(problemdir, 'problem_statement')
-
-        langs = []
-        if glob.glob(os.path.join(stmtdir, 'problem.tex')):
-            langs.append('')
-        for f in glob.glob(os.path.join(stmtdir, 'problem.[a-z][a-z].tex')):
-            langs.append(re.search("problem.([a-z][a-z]).tex$", f).group(1))
-        if len(langs) == 0:
-            raise Exception('No problem statements available')
-
-        dotlang = ''
-        # If language unspec., use first available one (will be
-        # problem.tex if exists)
-        if language is None:
-            language = langs[0]
-        if language != '':
-            if len(language) != 2 or not language.isalpha():
-                raise Exception('Invalid language code "%s"' % language)
-            if language not in langs:
-                raise Exception('No problem statement for language "%s" available' % language)
-            dotlang = '.' + language
-
-        # Used in the template.tex variable substitution.
-        self.language = dotlang
-        problemtex = os.path.join(stmtdir, 'problem' + dotlang + '.tex')
-
-        if not os.path.isfile(problemtex):
-            raise Exception('Unable to find problem statement, was looking for "%s"' % problemtex)
-
+    """Deals with the temporary .tex file template needed to render a LaTeX problem statement
+
+    Our problemset.cls latex class was originally written to make it easy to
+    render a problemset pdf from a bunch of problems for a contest. When we
+    want to render a pdf for a single problem, we need to dump a small,
+    temporary tex file in the parent directory (essentially a minified
+    problemset with just one problem). This class deals with creating and
+    cleaning up that template. The template has to be written in the parent
+    directory of problem_root.
+
+    Usage:
+        with Template(problem_root, texfile) as templ:
+            texfile = templ.get_file_name()
+            os.chdir(os.path.dirname(texfile))
+            subprocess.call(['pdflatex', texfile])
+    """
+
+    def __init__(self, problem_root: Path, texfile: Path, language: str, force_copy_cls=False):
+        assert texfile.suffix == '.tex', f'Template asked to render {texfile}, which does not end in .tex'
+        assert texfile.is_relative_to(problem_root), f'Template called with tex {texfile} outside of problem {problem_root}'
+
+        self.problem_root = problem_root
+        self.statement_directory = texfile.relative_to(problem_root).parent
+        self.statement_filename = texfile.name
         self.templatefile = 'template.tex'
         self.clsfile = 'problemset.cls'
-        timelim = 1  # Legacy for compatibility with v0.1
-        version = detect_version(problemdir, problemtex)
-        if version != '':
-            print('Note: problem is in an old version (%s) of problem format, you should consider updating it' % version)
-            self.templatefile = 'template_%s.tex' % version
-            self.clsfile = 'problemset_%s.cls' % version
-
-        templatepaths = [os.path.join(os.path.dirname(__file__), 'templates/latex'),
-                         os.path.join(os.path.dirname(__file__), '../templates/latex'),
-                         '/usr/lib/problemtools/templates/latex']
-        self.templatepath = next((p for p in templatepaths
-                                  if os.path.isdir(p) and os.path.isfile(os.path.join(p, self.templatefile))),
-                                 None)
-        if self.templatepath is None:
+        self.language = language
+
+        templatepaths = [
+            os.path.join(os.path.dirname(__file__), 'templates/latex'),
+            os.path.join(os.path.dirname(__file__), '../templates/latex'),
+            '/usr/lib/problemtools/templates/latex',
+        ]
+        try:
+            self.templatepath = next(
+                (p for p in templatepaths if os.path.isdir(p) and os.path.isfile(os.path.join(p, self.templatefile)))
+            )
+        except StopIteration:
             raise Exception('Could not find directory with latex template "%s"' % self.templatefile)
 
-        self.basedir = os.path.dirname(problemdir)
-        self.shortname = os.path.basename(problemdir)
-        sample_dir = os.path.join(problemdir, 'data', 'sample')
-        self.samples = sorted(set([os.path.splitext(os.path.basename(f))[0]
-                                   for f in (glob.glob(os.path.join(sample_dir, '*.in')) +
-                                             glob.glob(os.path.join(sample_dir, '*.interaction')))]))
-        self.problemset_cls = os.path.join(self.basedir, 'problemset.cls')
+        sample_dir = problem_root / 'data' / 'sample'
+        if sample_dir.is_dir():
+            self.samples = sorted({file.stem for file in sample_dir.iterdir() if file.suffix in ['in', 'interaction']})
+        else:
+            self.samples = []
 
+        self.problemset_cls = problem_root.parent / 'problemset.cls'
         self.copy_cls = True
-        if os.path.isfile(self.problemset_cls) and not force_copy_cls:
-            print('%s exists, will not copy it -- in case of weirdness this is likely culprit' % self.problemset_cls)
+        if self.problemset_cls.is_file() and not force_copy_cls:
+            print(f'{self.problemset_cls} exists, will not copy it -- in case of weirdness this is likely culprit')
             self.copy_cls = False
 
-
     def __enter__(self):
         if self.copy_cls:
             shutil.copyfile(os.path.join(self.templatepath, self.clsfile), self.problemset_cls)
 
-        (templfd, self.filename) = tempfile.mkstemp(suffix='.tex', dir=self.basedir)
+        (templfd, self.filename) = tempfile.mkstemp(suffix='.tex', dir=self.problem_root.parent)
         templout = os.fdopen(templfd, 'w')
         templin = open(os.path.join(self.templatepath, self.templatefile))
-        data = {'language': self.language,
-                'shortname': self.shortname}
+        data = {
+            'directory': self.problem_root.name,
+            'statement_directory': self.statement_directory,
+            'statement_filename': self.statement_filename,
+            'language': self.language,
+        }
         for line in templin:
             try:
                 templout.write(line % data)
@@ -112,6 +93,6 @@ def __exit__(self, exc_type, exc_value, exc_traceback):
                 if os.path.isfile(f):
                     os.remove(f)
 
-    def get_file_name(self):
+    def get_file_name(self) -> str:  # We should later change this to a Path
         assert os.path.isfile(self.filename)
         return self.filename
diff --git a/problemtools/templates/latex/problemset.cls b/problemtools/templates/latex/problemset.cls
index 8c198243..e6215d43 100644
--- a/problemtools/templates/latex/problemset.cls
+++ b/problemtools/templates/latex/problemset.cls
@@ -50,6 +50,8 @@
 \RequirePackage{url}              % Urls
 \RequirePackage[normalem]{ulem}   % \sout
 \RequirePackage[colorlinks=true,implicit=false]{hyperref}
+\RequirePackage{longtable}        % Used by Pandoc
+\RequirePackage{booktabs}         % Used by Pandoc
 \ifplastex\else
 \RequirePackage{xstring}
 \RequirePackage{pgffor}
@@ -61,12 +63,17 @@
 \newcommand*{\contestlogo}[1]{\def\@contestlogo{#1}}
 \newcommand*{\location}[1]{\def\@location{#1}}
 \newcommand*{\licenseblurb}[1]{\def\@licenseblurb{#1}}
-\newcommand*{\problemlanguage}[1]{\def\@problemlanguage{#1}}
+\newcommand*{\statementdirectory}[1]{\def\@statementdirectory{#1}}
+\newcommand*{\statementfilename}[1]{\def\@statementfilename{#1}}
+% \problemlanguge is solely for backwards compatibility on the off chance someone external uses problemset.cls. Probably not needed
+\newcommand*{\problemlanguage}[1]{\def\@problemlanguage{#1}\statementfilename{problem#1.tex}}
 \contestname{}
 \contestshortname{}
 \contestlogo{}
 \location{}
 \licenseblurb{}
+\statementdirectory{problem_statement} % Default to the old standard directory on the off chance someone external uses problemset.cls
+\statementfilename{}
 \problemlanguage{}
 
 
@@ -85,6 +92,9 @@
   \addtolength{\textheight}{-\headheight}
 }
 
+% Pandoc outputs these
+\newcommand{\tightlist}{%
+  \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
 
 
 % Typesetting sections in a problem
@@ -163,7 +173,8 @@
 %% Problem inclusion
 \newcommand{\includeproblem}[1]{
   \startproblem{#1}
-  \import{#1/problem_statement/}{problem\@problemlanguage.tex}
+  \import{#1/\@statementdirectory/}{\@statementfilename}
+
 
   %% Automatically include samples 1..9, if enabled
   \ifplastex\else
diff --git a/problemtools/templates/latex/problemset_0.1.cls b/problemtools/templates/latex/problemset_0.1.cls
deleted file mode 100644
index 8501dea1..00000000
--- a/problemtools/templates/latex/problemset_0.1.cls
+++ /dev/null
@@ -1,257 +0,0 @@
-\NeedsTeXFormat{LaTeX2e}
-\ProvidesClass{problemset}[2011/08/26 Problem Set For ACM-Style Programming Contests]
-
-
-% Options to add:
-% noproblemnumbers
-% nosamplenumbers
-% nopagenumbers
-% nofooter
-% noheader
-
-\newif\ifplastex
-\plastexfalse
-
-\newif\if@footer\@footertrue
-\DeclareOption{nofooter}{\@footerfalse}
-
-\newif\if@problemnumbers\@problemnumberstrue
-\DeclareOption{noproblemnumbers}{\@problemnumbersfalse}
-
-\newif\if@clearevenpages\@clearevenpagestrue
-
-\DeclareOption{plainproblems}{
-  \@footerfalse
-  \@problemnumbersfalse
-  \@clearevenpagesfalse
-}
-
-%\DeclareOption{noproblemnumbers}{...}
-
-\DeclareOption*{\PassOptionsToClass{\CurrentOption}{article}}
-\ProcessOptions\relax
-
-\LoadClass{article}
-
-\RequirePackage{times}            % Font choice
-\RequirePackage{amsmath}          % AMS
-\RequirePackage{amssymb}          % AMS
-\RequirePackage[OT2,T1]{fontenc}  % Cyrillic and standard % TODO: make alphabet options more general
-\RequirePackage[utf8]{inputenc}   % UTF-8 support
-\RequirePackage{fancyhdr}         % Headers
-\RequirePackage{graphicx}         % Graphics
-\RequirePackage{subfigure}        % Subfigures
-\RequirePackage{wrapfig}          % Illustrations
-\RequirePackage{import}           % Proper file inclusion
-\RequirePackage{verbatim}         % For samples
-\RequirePackage{fullpage}         % Set up margins for full page
-\RequirePackage{url}              % Urls
-\RequirePackage[colorlinks=true]{hyperref}
-\RequirePackage{ulem}             % \sout
-
-
-%% Commands used to set name, logo, etc of contest
-\newcommand*{\contestname}[1]{\def\@contestname{#1}}
-\newcommand*{\contestshortname}[1]{\def\@contestshortname{#1}}
-\newcommand*{\contestlogo}[1]{\def\@contestlogo{#1}}
-\newcommand*{\headerlogo}[1]{\def\@headerlogo{#1}}
-\newcommand*{\location}[1]{\def\@location{#1}}
-\newcommand*{\licenseblurb}[1]{\def\@licenseblurb{#1}}
-\newcommand*{\problemlanguage}[1]{\def\@problemlanguage{#1}}
-\contestname{}
-\contestshortname{}
-\contestlogo{}
-\headerlogo{}
-\location{}
-\licenseblurb{}
-\problemlanguage{}
-
-
-
-% Typesetting sections in a problem
-
-\renewcommand\section{\@startsection{section}{1}{\z@}%
-                                   {-3.5ex \@plus -1ex \@minus -.2ex}%
-                                   {2.3ex \@plus.2ex}%
-                                   {\normalfont\large\sf\bfseries}}
-
-\renewcommand\subsection{\@startsection{subsection}{2}{\z@}%
-                                     {-3.25ex\@plus -1ex \@minus -.2ex}%
-                                     {1.5ex \@plus .2ex}%
-                                     {\normalfont\normalsize\sf\bfseries}}
-
-\renewcommand{\contentsname}{Problems}
-
-
-% TODO: make last command of illustration/figure optional
-
-\newcommand{\illustration}[3]{
-  \begin{wrapfigure}{r}{#1\textwidth}
-    \includegraphics[width=#1\textwidth]{#2}
-    \begin{flushright}
-      \vspace{-9pt}
-      \tiny #3
-    \end{flushright}
-    \vspace{-15pt}
-  \end{wrapfigure}
-  \par
-  \noindent
-}
-
-
-%% Redefine cleardoublepage to put a text on even-numbered empty
-%% pages.
-\newcommand{\makeemptypage}{
-  ~\thispagestyle{empty} 
-  \vfill
-  \centerline{\Large \textsf{ This page is intentionally left (almost) blank.}}
-  \vfill
-  \clearpage
-}
-\renewcommand{\cleardoublepage}{
-  \clearpage%
-  \ifodd\value{page}\else\makeemptypage\fi%
-}
-
-\newcommand{\clearproblemsetpage}{
-  \if@clearevenpages
-  \cleardoublepage
-  \else
-  \clearpage
-  \fi
-}
-
-
-%% Set up a problem counter and number problems A B C ...
-\newcounter{problemcount}
-\setcounter{problemcount}{0}
-\newcommand{\problemnumber}{\Alph{problemcount}}
-
-%% Number figures as A.1 A.2... B.1 B.2... (except if we're converting to HTML)
-\ifplastex\else
-\renewcommand{\thefigure}{\problemnumber.\arabic{figure}}
-\fi
-
-
-%% Command for starting new problem
-
-%% Problem inclusion
-\newcommand{\includeproblem}[3]{
-  \startproblem{#1}{#2}{#3}
-  \import{#1/problem_statement/}{problem\@problemlanguage.tex}
-}
-
-\newcommand{\startproblem}[3]{
-  \clearproblemsetpage
-  \refstepcounter{problemcount}
-  \setcounter{samplenum}{0}
-  \setcounter{figure}{0}%
-  \def\@problemid{#1}
-  \def\@problemname{#2}
-  \def\@timelimit{#3}
-  \problemheader{\@problemname}{\@problemid}
-}
-
-\newcommand{\problemheader}[2]{
-  \begin{center}
-    \textsf{
-      {\huge #1\\}
-      {\Large Problem ID: #2\\}
-    }
-  \end{center}
-}
-
-%% Commands related to sample data
-
-%% Sample counter
-\newcounter{samplenum}
-\newcommand{\sampleid}{\arabic{samplenum}}
-
-%% Define the command used to give sample data
-%% Takes filename as parameter
-
-\newcommand{\includesample}[1]{
-  \displaysample{\@problemid/data/sample/#1}
-}
-
-\newcommand{\displaysample}[1]{
-  \IfFileExists{#1.in}{}{\ClassError{problemset}{Can't find file '#1.in'}{}}
-  \IfFileExists{#1.ans}{}{\ClassError{problemset}{Can't find file '#1.ans'}{}}
-  \refstepcounter{samplenum}
-  \par
-  \vspace{0.4cm}
-  \noindent
-  \sampletable
-      {Sample Input \sampleid}{#1.in}
-      {Sample Output \sampleid}{#1.ans}
-}
-
-\newcommand{\sampletable}[4]{
-  \begin{tabular}{|l|l|}
-    \multicolumn{1}{l}{\textsf{\textbf{#1}}} &
-    \multicolumn{1}{l}{\textsf{\textbf{#3}}} \\
-    \hline
-    \parbox[t]{0.475\textwidth}{\vspace{-0.5cm}\verbatiminput{#2}}
-    &
-    \parbox[t]{0.475\textwidth}{\vspace{-0.5cm}\verbatiminput{#4}}
-    \\
-    \hline
-  \end{tabular}
-}
-
-
-% Remaining part of file is headers and toc, not tested with plasTeX
-% and should not be used in plastex mode
-\ifplastex\else
-
-
-%% Set up headers
-\fancypagestyle{problem}{
-  \fancyhf{} % Clear old junk
-%  \ifx \@headerlogo \@empty\relax \else
-%  \fancyhead[C]{
-%    \includegraphics[scale=0.3]{\@headerlogo}
-%  }
-%  \fi
-  \if@footer
-  \fancyhead[L]{
-    \emph{
-      \@contestshortname{}
-      \if@problemnumbers Problem \problemnumber:{} \fi
-      \@problemname
-    }
-  }
-  \fancyhead[R]{\thepage}
-  \fancyfoot[L]{
-    \emph{\@licenseblurb}
-  }
-%  \fancyfoot[R]{\includegraphics[scale=0.5]{cc-by-sa} }
-  \fi
-}
-\renewcommand{\headrulewidth}{0pt}
-\pagestyle{problem}
-
-\AtBeginDocument{
-  % FIXME: Figure out how to do this in a header-indep. way.
-%  \ifx \@headerlogo \@empty \relax\else
-  \addtolength{\headheight}{12pt}
-  \addtolength{\topmargin}{-30pt}
-  \addtolength{\textheight}{18pt}
-%  \fi
-  \setlength{\headsep}{25pt} 
-}
-
-
-% Set up table of contents for cover page
-\AtBeginDocument{
-  \addtocontents{toc}{\protect\begin{tabular}{cl}}
-}
-\AtEndDocument{
-  \clearproblemsetpage
-  % Annoyingly enough addtocontents won't work at end of doc
-  \immediate\write\@auxout{%
-    \string\@writefile{toc}{\string\end{tabular}}%
-  }
-}
-
-\fi
diff --git a/problemtools/templates/latex/template.tex b/problemtools/templates/latex/template.tex
index ff4c3ae8..c893ee0f 100644
--- a/problemtools/templates/latex/template.tex
+++ b/problemtools/templates/latex/template.tex
@@ -1,10 +1,13 @@
 \documentclass[plainproblems,noautoincludesamples]{problemset}
 
-\problemlanguage{%(language)s}
+%% If you want to add comments in this file, you need to use %%, as it must be compatible with python's templates
+\problemlanguage{%(language)s} %% We inject problemlanguage to be backwards compatible with custom problemset.cls
+\statementdirectory{%(statement_directory)s}
+\statementfilename{%(statement_filename)s}
 
 \begin{document}
 
-\includeproblem{%(shortname)s}
+\includeproblem{%(directory)s}
 \includesample{%(sample)s}
 
 \end{document}
diff --git a/problemtools/templates/latex/template_0.1.tex b/problemtools/templates/latex/template_0.1.tex
deleted file mode 100644
index feeecb36..00000000
--- a/problemtools/templates/latex/template_0.1.tex
+++ /dev/null
@@ -1,10 +0,0 @@
-\documentclass[plainproblems]{problemset}
-
-\problemlanguage{%(language)s}
-
-\begin{document}
-
-\includeproblem{%(shortname)s}{%(title)s}{%(timelim)d}
-\includesample{%(sample)s}
-
-\end{document}
diff --git a/problemtools/templates/markdown_html/default-layout.html b/problemtools/templates/markdown_html/default-layout.html
new file mode 100644
index 00000000..93a84572
--- /dev/null
+++ b/problemtools/templates/markdown_html/default-layout.html
@@ -0,0 +1,35 @@
+<!doctype html>
+<html lang="%(language)s">
+<head>
+<meta http-equiv="content-type" charset="UTF-8">
+<title>%(title)s</title>
+
+<link rel="stylesheet" href="problem.css" />
+<script type="text/x-mathjax-config">
+MathJax.Hub.Config({
+  tex2jax: { 
+    inlineMath: [['\\(','\\)']],
+    displayMath: [['\\[','\\]']],
+	},
+  MMLorHTML: {
+    prefer: {Firefox: "HTML"}
+    }
+});
+</script>
+<script type="text/javascript"
+        src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
+</script>
+
+</head>
+<body>
+
+<div class="problemheader">
+   <h1>%(title)s</h1>
+   <h3>Problem ID: %(problemid)s</h3>
+</div>
+<div class="problembody">
+    %(statement_html)s
+</div>
+
+</body>
+</html>
diff --git a/problemtools/templates/markdown_html/problem.css b/problemtools/templates/markdown_html/problem.css
new file mode 100644
index 00000000..8fa307b5
--- /dev/null
+++ b/problemtools/templates/markdown_html/problem.css
@@ -0,0 +1,128 @@
+.problemheader {
+    text-align: center;
+}
+
+.problembody {
+    font-family: 'Times New Roman', Georgia, serif;
+    font-size: 1.1em;
+    text-align: justify;
+    padding-top: 1.5em;
+}
+
+.problembody h2, .problembody h3, .problembody table.sample th {
+    font-family: Arial, Helvetica, sans-serif;
+}
+
+/*Style all tables except sample*/
+table:not(.sample) {
+    border-collapse: collapse;
+    font-family: Arial, sans-serif;
+    font-size: 14px;
+    margin: 20px auto;
+}
+
+table:not(.sample) td, table:not(.sample) th {
+    text-align: left;
+    padding: 12px;
+    border: 1px solid #d6d6d6;
+}
+
+table:not(.sample) th {
+    background-color: #eeeeee;
+    font-weight: bold;
+}
+
+table:not(.sample) td {
+    margin: 0px;
+}
+
+/*Style sample in its own way*/
+.sample {
+    font-family: Arial, Helvetica, sans-serif;
+    width: 100%;
+}
+
+.sample th {
+    padding: 0px;
+    border: 0px;
+    background-color: #ffffff;
+    text-align: left;
+    width: 50%;
+    font-size: 16px;
+    font-family: Arial, Helvetica, sans-serif;
+}
+
+.sample td {
+    vertical-align: top;
+    border: 1px solid black;
+}
+
+.sample pre {
+    margin: 0px;
+}
+
+code {
+    font-family: 'Courier New', Courier, monospace;
+}
+
+pre code {
+    background-color: #eeeeee;
+    padding: 15px 20px;
+    border: 2px solid #dddddd;
+    font-size: 16px;
+    display: inline-block;
+    margin: 10px 0;
+    word-wrap: break-word;
+    white-space: pre-wrap;
+}
+
+div.minipage { 
+    display: inline-block;
+}
+
+div.illustration {
+    float: right;
+    padding-left: 20px;
+}
+
+img.illustration {
+    width: 100%;
+}
+
+div.figure {
+  display: block;
+  float: none;
+  margin-left: auto;
+  margin-right: auto;
+}
+
+.illustration div.description {
+    font-size: 8pt;
+    text-align: right;
+}
+
+.problembody p {
+  text-align: justify;
+}
+
+div.sampleinteractionread {
+    border: 1px solid black;
+    width: 60%;
+    float: left;
+    margin: 3px 0px;
+}
+
+.sampleinteractionread pre {
+    margin: 1px 5px;
+}
+
+div.sampleinteractionwrite {
+    border: 1px solid black;
+    width: 60%;
+    float: right;
+    margin: 3px 0px;
+}
+
+.sampleinteractionwrite pre {
+    margin: 1px 5px;
+}
diff --git a/problemtools/tex2html.py b/problemtools/tex2html.py
new file mode 100644
index 00000000..571a7b4d
--- /dev/null
+++ b/problemtools/tex2html.py
@@ -0,0 +1,61 @@
+import os
+import logging
+import string
+import argparse
+from pathlib import Path
+
+from . import template
+
+
+def convert(problem_root: Path, options: argparse.Namespace, statement_file: Path) -> None:
+    # PlasTeX.Logging statically overwrites logging and formatting, so delay loading
+    import plasTeX.TeX
+    import plasTeX.Logging
+    from .ProblemPlasTeX import ProblemRenderer
+    from .ProblemPlasTeX import ProblemsetMacros
+
+    if options.quiet:
+        plasTeX.Logging.disableLogging()
+    else:
+        plasTeX.Logging.getLogger().setLevel(getattr(logging, options.loglevel.upper()))
+        plasTeX.Logging.getLogger('status').setLevel(getattr(logging, options.loglevel.upper()))
+
+    destfile = string.Template(options.destfile).safe_substitute(problem=problem_root.name)
+    imgbasedir = string.Template(options.imgbasedir).safe_substitute(problem=problem_root.name)
+
+    # Set up template if necessary
+    with template.Template(problem_root, statement_file, options.language) as templ:
+        texfile = open(templ.get_file_name(), 'r')
+
+        # Setup parser and renderer etc
+        tex = plasTeX.TeX.TeX(file=texfile)
+
+        ProblemsetMacros.init(tex)
+
+        tex.ownerDocument.config['general']['copy-theme-extras'] = options.css
+        if not options.headers:
+            tex.ownerDocument.userdata['noheaders'] = True
+        tex.ownerDocument.config['files']['filename'] = destfile
+        tex.ownerDocument.config['images']['filenames'] = 'img-$num(4)'
+        tex.ownerDocument.config['images']['enabled'] = False
+        tex.ownerDocument.config['images']['imager'] = 'none'
+        tex.ownerDocument.config['images']['base-url'] = imgbasedir
+        # tell plasTeX where to search for problemtools' built-in packages
+        tex.ownerDocument.config['general']['packages-dirs'] = [os.path.join(os.path.dirname(__file__), 'ProblemPlasTeX')]
+
+        renderer = ProblemRenderer()
+
+        if not options.quiet:
+            print('Parsing TeX source...')
+        doc = tex.parse()
+        texfile.close()
+
+    renderer.render(doc)
+
+    # Clean up the logger class plasTeX registers, and reset to the default
+    logging.setLoggerClass(logging.Logger)
+
+    # Annoying: I have not figured out any way of stopping the plasTeX
+    # renderer from generating a .paux file
+    if os.path.isfile('.paux'):
+        os.remove('.paux')
diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py
index 32d28bee..eed76615 100644
--- a/problemtools/verifyproblem.py
+++ b/problemtools/verifyproblem.py
@@ -1,5 +1,12 @@
 #! /usr/bin/env python3
 # -*- coding: utf-8 -*-
+from __future__ import annotations
+
+import argparse
+import concurrent.futures
+from concurrent.futures import ThreadPoolExecutor
+import threading
+import queue
 import glob
 import string
 import hashlib
@@ -13,48 +20,61 @@
 import sys
 import copy
 import random
-import argparse
-import shlex
+import traceback
+import uuid
+from pathlib import Path
 
+import colorlog
 import yaml
 
-from . import problem2pdf
-from . import problem2html
-
 from . import config
 from . import languages
+from . import metadata
+from . import problem2html
+from . import problem2pdf
 from . import run
+from . import statement_util
+from .formatversion import FormatVersion, get_format_version
 
+from abc import ABC
+from typing import Any, Callable, ClassVar, Literal, Pattern, Match, ParamSpec, TypeVar
+from pydantic import ValidationError
 
-def is_TLE(status, may_signal_with_usr1=False):
-    return (os.WIFSIGNALED(status) and
-            (os.WTERMSIG(status) == signal.SIGXCPU or
-             (may_signal_with_usr1 and os.WTERMSIG(status) == signal.SIGUSR1)))
+log = logging.getLogger(__name__)
 
+Verdict = Literal['AC', 'TLE', 'OLE', 'MLE', 'RTE', 'WA', 'PAC', 'JE']
+
+
+def is_TLE(status: int, may_signal_with_usr1: bool = False) -> bool:
+    return os.WIFSIGNALED(status) and (
+        os.WTERMSIG(status) == signal.SIGXCPU or (may_signal_with_usr1 and os.WTERMSIG(status) == signal.SIGUSR1)
+    )
+
+
+def is_RTE(status: int) -> bool:
+    return not os.WIFEXITED(status) or bool(os.WEXITSTATUS(status))
 
-def is_RTE(status):
-    return not os.WIFEXITED(status) or os.WEXITSTATUS(status)
 
 class SubmissionResult:
-    def __init__(self, verdict, score=None, testcase=None, reason=None, additional_info=None):
+    def __init__(self, verdict: str, score: float | None = None, reason: str | None = None, additional_info: str | None = None):
         self.verdict = verdict
         self.score = score
-        self.testcase = testcase
         self.reason = reason
         self.additional_info = additional_info
+        self.testcase: TestCase | None = None
+        self.runtime_testcase: TestCase | None = None
         self.runtime = -1.0
-        self.runtime_testcase = None
         self.ac_runtime = -1.0
-        self.ac_runtime_testcase = None
+        self.ac_runtime_testcase: TestCase | None = None
         self.validator_first = False
-        self.sample_failures = []
+        self.sample_failures: list[SubmissionResult] = []
 
-    def set_ac_runtime(self):
+    def set_ac_runtime(self) -> None:
         if self.verdict == 'AC':
             self.ac_runtime = self.runtime
             self.ac_runtime_testcase = self.runtime_testcase
 
-    def __str__(self):
+    def __str__(self) -> str:
         verdict = self.verdict
         details = []
 
@@ -63,8 +83,8 @@ def __str__(self):
 
         if self.reason is not None:
             details.append(self.reason)
-        if self.verdict != 'AC' and self.testcase is not None:
-            details.append(f'test case: {self.testcase}')
+        if self.testcase is not None:
+            details.append(f'testcase: {self.testcase}')
         if self.runtime != -1:
             details.append(f'CPU: {self.runtime:.2f}s @ {self.runtime_testcase}')
 
@@ -73,22 +93,39 @@ def __str__(self):
         return f'{verdict} [{", ".join(details)}]'
 
 
-
 class VerifyError(Exception):
     pass
 
 
-class ProblemAspect:
-    max_additional_info = 15
-    errors = 0
-    warnings = 0
-    bail_on_error = False
-    _check_res = None
-    basename_regex = re.compile('^[a-zA-Z0-9][a-zA-Z0-9_.-]*[a-zA-Z0-9]$')
+_T = TypeVar('_T')
+_P = ParamSpec('_P')
 
-    @staticmethod
-    def __append_additional_info(msg, additional_info):
-        if additional_info is None or ProblemAspect.max_additional_info <= 0:
+
+class Context:
+    def __init__(self, args: argparse.Namespace, executor: ThreadPoolExecutor | None) -> None:
+        self.data_filter: Pattern[str] = args.data_filter
+        self.submission_filter: Pattern[str] = args.submission_filter
+        self.fixed_timelim: int | None = args.fixed_timelim
+        self.executor = executor
+        self._background_work: list[concurrent.futures.Future[object]] = []
+
+    def submit_background_work(self, job: Callable[_P, _T], *args: _P.args, **kwargs: _P.kwargs) -> None:
+        assert self.executor
+        self._background_work.append(self.executor.submit(job, *args, **kwargs))
+
+    def wait_for_background_work(self) -> None:
+        concurrent.futures.wait(self._background_work)
+
+
+class ProblemAspect(ABC):
+    errors: int = 0
+    warnings: int = 0
+    _check_res: bool | None = None
+    problem: Problem
+
+    def __append_additional_info(self, msg: str, additional_info: str | None) -> str:
+        max_additional_info = self.problem.max_additional_info()
+        if additional_info is None or max_additional_info <= 0:
             return msg
         additional_info = additional_info.rstrip()
         if not additional_info:
@@ -96,81 +133,156 @@ def __append_additional_info(msg, additional_info):
         lines = additional_info.split('\n')
         if len(lines) == 1:
             return f'{msg} ({lines[0]})'
-        if len(lines) > ProblemAspect.max_additional_info:
-            lines = lines[:ProblemAspect.max_additional_info] \
-                + [f'[.....truncated to {ProblemAspect.max_additional_info} lines.....]']
+        if len(lines) > max_additional_info:
+            lines = lines[:max_additional_info] + [f'[.....truncated to {max_additional_info} lines.....]']
 
-        return f'{msg}:\n' + '\n'.join(' '*8 + line for line in lines)
+        return f'{msg}:\n' + '\n'.join(' ' * 8 + line for line in lines)
 
-    def error(self, msg, additional_info=None):
+    def __init__(self, name: str, problem: Problem) -> None:
+        self.log = log.getChild(name)
+        self.problem = problem
+
+    def fatal(self, msg: str, additional_info: str | None = None, *args) -> None:
+        self._check_res = False
+        self._add_error()
+        self.log.critical(self.__append_additional_info(msg, additional_info), *args)
+        raise VerifyError(msg)
+
+    def error(self, msg: str, additional_info: str | None = None, *args) -> None:
         self._check_res = False
-        ProblemAspect.errors += 1
-        logging.error('in %s: %s', self, ProblemAspect.__append_additional_info(msg, additional_info))
-        if ProblemAspect.bail_on_error:
+        self._add_error()
+        self.log.error(self.__append_additional_info(msg, additional_info), *args)
+        if self.problem.bail_on_error():
             raise VerifyError(msg)
 
-    def warning(self, msg, additional_info=None):
-        if ProblemAspect.consider_warnings_errors:
-            self.error(msg)
+    def warning(self, msg: str, additional_info: str | None = None, *args) -> None:
+        if self.problem.consider_warnings_errors():
+            self.error(msg, additional_info, *args)
             return
-        ProblemAspect.warnings += 1
-        logging.warning('in %s: %s', self, ProblemAspect.__append_additional_info(msg, additional_info))
+        self._add_warning()
+        self.log.warning(self.__append_additional_info(msg, additional_info), *args)
+
+    def error_in_2023_07(self, msg: str, additional_info: str | None = None, *args) -> None:
+        if self.problem.format is FormatVersion.LEGACY:
+            self.warning(msg, additional_info, *args)
+        else:
+            self.error(msg, additional_info, *args)
+
+    def info(self, msg: str, *args) -> None:
+        self.log.info(msg, *args)
+
+    def debug(self, msg: str, *args) -> None:
+        self.log.debug(msg, *args)
 
     def msg(self, msg):
         print(msg)
 
-    def info(self, msg):
-        logging.info(': %s', msg)
+    def warn_directory(self, name: str, prop: str) -> None:
+        """Warns if a directory meant for a different problem format version exists"""
+        good_dir = getattr(self.problem.format, prop)
+        bad_dirs = {getattr(version, prop) for version in FormatVersion} - {good_dir}
+        problem_root = Path(self.problem.probdir)
+        for directory in bad_dirs:
+            if (problem_root / directory).exists():
+                self.warning(f'Found directory "{directory}". Version {self.problem.format} looks for {name} in "{good_dir}"')
+
+    def _add_error(self) -> None:
+        self.errors += 1
+        if self.problem is not self:
+            self.problem._add_error()
 
-    def debug(self, msg):
-        logging.debug(': %s', msg)
+    def _add_warning(self) -> None:
+        self.warnings += 1
+        if self.problem is not self:
+            self.problem._add_warning()
+
+
+class ProblemPart(ProblemAspect):
+    """Baseclass for all parts that can be included in a problem-format."""
+
+    """Should always be overridden by the subclass. Specifies the name that will be used to refer
+    to the part e.g for logs.
+    """
+    PART_NAME: ClassVar[str]
+
+    def __init__(self, problem: Problem) -> None:
+        if self.PART_NAME is None:
+            raise NotImplementedError('Every problem-part must override PART_NAME')
+        super().__init__(f'{problem.shortname}.{self.PART_NAME}', problem)
+        self.setup()
+
+    def setup(self) -> None:
+        pass
+
+    def start_background_work(self, context: Context) -> None:
+        pass
+
+    def check(self, context: Context) -> bool:
+        return True
 
-    def check_basename(self, path):
-        basename = os.path.basename(path)
-        if not self.basename_regex.match(basename):
-            self.error(f"Invalid name '{basename}' (should match '{self.basename_regex.pattern}')")
 
 class TestCase(ProblemAspect):
-    def __init__(self, problem, base, testcasegroup):
+    Result = tuple[SubmissionResult, SubmissionResult, SubmissionResult]
+
+    def __init__(self, problem: Problem, base: str, testcasegroup: TestCaseGroup) -> None:
+        super().__init__(f'{problem.shortname}.test.{testcasegroup.name}.{os.path.basename(base)}', problem)
         self._base = base
         self.infile = f'{base}.in'
         self.ansfile = f'{base}.ans'
         self._problem = problem
         self.testcasegroup = testcasegroup
-        self.reuse_result_from = None
-        self._result_cache = (None, None)
+        self.reuse_result_from: TestCase | None = None
+        self.counter = len(problem.testcase_by_infile)
         problem.testcase_by_infile[self.infile] = self
 
-    def check_newlines(self, filename):
-        with open(filename, 'r') as f:
-            data = f.read()
+    def check_newlines(self, filename: str) -> None:
+        with open(filename, 'rb') as f:
+            rawdata = f.read()
+            try:
+                data = rawdata.decode('utf-8', 'strict')
+            except UnicodeDecodeError:
+                self.warning(f'The file {filename} could not be decoded as utf-8')
+                return
         if data.find('\r') != -1:
             self.warning(f'The file {filename} contains non-standard line breaks.')
         if len(data) > 0 and data[-1] != '\n':
             self.warning(f"The file {filename} does not end with '\\n'.")
 
-    def strip_path_prefix(self, path):
+    def check_size_limits(self, filename: str) -> None:
+        filesize = os.path.getsize(filename) / 1024.0 / 1024.0
+        if filesize > 1000:
+            self.error(f'The file {filename} ({filesize:.1f} Mb) is larger than 1000 Mb and can not be installed.')
+        elif filesize > 100:
+            self.warning(
+                f'The file {filename} ({filesize:.1f} Mb) is larger than 100 Mb. This may cause performance issues and is not recommended.'
+            )
+
+    def strip_path_prefix(self, path: str) -> str:
         return os.path.relpath(path, os.path.join(self._problem.probdir, 'data'))
 
-    def is_in_sample_group(self):
+    def is_in_sample_group(self) -> bool:
         return self.strip_path_prefix(self.infile).startswith('sample')
 
-    def check(self, args):
+    def check(self, context: Context) -> bool:
         if self._check_res is not None:
             return self._check_res
         self._check_res = True
-        self.check_basename(self.infile)
-        self.check_basename(self.ansfile)
         self.check_newlines(self.infile)
         self.check_newlines(self.ansfile)
-        self._problem.input_format_validators.validate(self)
+        self.check_size_limits(self.infile)
+        self.check_size_limits(self.ansfile)
+        self._problem.input_validators.validate(self)
         anssize = os.path.getsize(self.ansfile) / 1024.0 / 1024.0
-        outputlim = self._problem.config.get('limits')['output']
+        outputlim = self._problem.metadata.limits.output
         if anssize > outputlim:
-            self.error(f'Answer file ({anssize:.1f} Mb) is larger than output limit ({outputlim} Mb), you need to increase output limit')
+            self.error(
+                f'Answer file ({anssize:.1f} Mb) is larger than output limit ({outputlim} Mb), you need to increase output limit'
+            )
         elif 2 * anssize > outputlim:
-            self.warning(f'Answer file ({anssize:.1f} Mb) is within 50% of output limit ({outputlim} Mb), you might want to increase output limit')
-        if not self._problem.is_interactive:
+            self.warning(
+                f'Answer file ({anssize:.1f} Mb) is within 50% of output limit ({outputlim} Mb), you might want to increase output limit'
+            )
+        if not self._problem.is_interactive():
             val_res = self._problem.output_validators.validate(self, self.ansfile)
             if val_res.verdict != 'AC':
                 if self.is_in_sample_group():
@@ -180,20 +292,20 @@ def check(self, args):
         self._check_symlinks()
         return self._check_res
 
-    def __str__(self):
-        return f'test case {self.strip_path_prefix(self._base)}'
+    def __str__(self) -> str:
+        return f'testcase {self.strip_path_prefix(self._base)}'
 
-    def matches_filter(self, filter_re):
+    def matches_filter(self, filter_re: Pattern[str]) -> bool:
         return filter_re.search(self.strip_path_prefix(self._base)) is not None
 
-    def set_symlinks(self):
+    def set_symlinks(self) -> None:
         if not os.path.islink(self.infile):
             return
         target = os.path.realpath(self.infile)
         if target in self._problem.testcase_by_infile:
             self.reuse_result_from = self._problem.testcase_by_infile[target]
 
-    def _check_symlinks(self):
+    def _check_symlinks(self) -> bool:
         if not os.path.islink(self.infile):
             return True
         nicepath = os.path.relpath(self.infile, self._problem.probdir)
@@ -208,53 +320,55 @@ def _check_symlinks(self):
         if self.reuse_result_from is None:
             self.error(f"Symbolic link points outside data/ directory for file '{nicepath}'")
             return False
-        if self.testcasegroup.config['output_validator_flags'] != self.reuse_result_from.testcasegroup.config['output_validator_flags']:
-            self.error(f"Symbolic link '{nicepath}' points to test case with different output validator flags")
+        if (
+            self.testcasegroup.config['output_validator_flags']
+            != self.reuse_result_from.testcasegroup.config['output_validator_flags']
+        ):
+            self.error(f"Symbolic link '{nicepath}' points to testcase with different output validator flags")
             return False
         return True
 
-    def run_submission(self, sub, args, timelim, timelim_low, timelim_high):
-        res, res_low, res_high, reused = self._run_submission_real(sub, args, timelim, timelim_low, timelim_high)
+    def run_submission(self, sub, runner: Runner, context: Context) -> Result:
+        (res, res_low, res_high), reused = runner.run(self)
         res = self._init_result_for_testcase(res)
         res_low = self._init_result_for_testcase(res_low)
         res_high = self._init_result_for_testcase(res_high)
-        msg = "Reused test file result" if reused else "Test file result"
+        msg = 'Reused test file result' if reused else 'Test file result'
         self.info(f'{msg}: {res}')
         if res.verdict != 'AC' and self.is_in_sample_group():
             res.sample_failures.append(res)
 
         return (res, res_low, res_high)
 
-    def _run_submission_real(self, sub, args, timelim, timelim_low, timelim_high):
-        if self.reuse_result_from is not None:
-            return self.reuse_result_from._run_submission_real(sub, args, timelim, timelim_low, timelim_high)
-
-        cache_key = (sub, args, timelim, timelim_low, timelim_high)
-        if self._result_cache[0] == cache_key:
-            res, res_low, res_high = self._result_cache[1]
-            return (res, res_low, res_high, True)
-
-        outfile = os.path.join(self._problem.tmpdir, 'output')
-        if sys.stdout.isatty():
-            msg = f'Running {sub} on {self}...'
-            sys.stdout.write(msg)
-            sys.stdout.flush()
-
-        if self._problem.is_interactive:
+    def run_submission_real(self, sub, context: Context, timelim: int, timelim_low: int, timelim_high: int) -> Result:
+        # This may be called off-main thread.
+        if self._problem.is_interactive():
             res_high = self._problem.output_validators.validate_interactive(self, sub, timelim_high, self._problem.submissions)
         else:
-            status, runtime = sub.run(self.infile, outfile,
-                                      timelim=timelim_high+1,
-                                      memlim=self._problem.config.get('limits')['memory'])
+            outfile = os.path.join(self._problem.tmpdir, f'output-{self.counter}')
+            errfile = os.path.join(self._problem.tmpdir, f'error-{self.counter}')
+            status, runtime = sub.run(
+                infile=self.infile,
+                outfile=outfile,
+                errfile=errfile,
+                timelim=timelim_high + 1,
+                memlim=self._problem.metadata.limits.memory,
+                work_dir=sub.path,
+            )
             if is_TLE(status) or runtime > timelim_high:
                 res_high = SubmissionResult('TLE')
             elif is_RTE(status):
-                res_high = SubmissionResult('RTE')
+                try:
+                    with open(errfile, mode='rt') as f:
+                        info = f.read()
+                except IOError:
+                    self.info('Failed to read error file %s', errfile)
+                    info = None
+                res_high = SubmissionResult('RTE', additional_info=info)
             else:
                 res_high = self._problem.output_validators.validate(self, outfile)
             res_high.runtime = runtime
-        if sys.stdout.isatty():
-            sys.stdout.write('\b \b' * (len(msg)))
+
         if res_high.runtime <= timelim_low:
             res_low = res_high
             res = res_high
@@ -276,10 +390,9 @@ def _run_submission_real(self, sub, args, timelim, timelim_low, timelim_high):
         res.set_ac_runtime()
         res_low.set_ac_runtime()
         res_high.set_ac_runtime()
-        self._result_cache = (cache_key, (res, res_low, res_high))
-        return (res, res_low, res_high, False)
+        return (res, res_low, res_high)
 
-    def _init_result_for_testcase(self, res):
+    def _init_result_for_testcase(self, res: SubmissionResult) -> SubmissionResult:
         res = copy.copy(res)
         res.testcase = self
         res.runtime_testcase = self
@@ -290,53 +403,60 @@ def _init_result_for_testcase(self, res):
                 res.score = self.testcasegroup.config['reject_score']
         return res
 
-    def get_all_testcases(self):
+    def get_all_testcases(self) -> list[TestCase]:
         return [self]
 
-    def all_datasets(self):
+    def all_datasets(self) -> list[str]:
         return [self._base]
 
 
 class TestCaseGroup(ProblemAspect):
+    name: str
     _DEFAULT_CONFIG = config.load_config('testdata.yaml')
     _SCORING_ONLY_KEYS = ['accept_score', 'reject_score', 'range']
 
-    def __init__(self, problem, datadir, parent=None):
+    def __init__(self, problem: Problem, datadir: str | None = None, parent: TestCaseGroup | None = None):
         self._parent = parent
         self._problem = problem
+        datadir = datadir or os.path.join(problem.probdir, 'data')
         self._datadir = datadir
+        self.name = os.path.relpath(os.path.abspath(self._datadir), os.path.abspath(self._problem.probdir)).replace('/', '.')
+
+        super().__init__(f'{problem.shortname}.test.{self.name}', problem)
+
         self._seen_oob_scores = False
-        self.debug(f'  Loading test data group {datadir}')
+        self.debug('Loading test data group %s', datadir)
         configfile = os.path.join(self._datadir, 'testdata.yaml')
-        self.config = {}
+        self.config: dict[str, Any] = {}
         if os.path.isfile(configfile):
             try:
                 with open(configfile) as f:
                     self.config = yaml.safe_load(f)
             except Exception as e:
-                self.error(e)
+                self.error(str(e))
             if self.config is None:
                 self.config = {}
 
         # For non-root groups, missing properties are inherited from the parent group
         if parent:
             for field, parent_value in parent.config.items():
-                if not field in self.config:
+                if field not in self.config:
                     self.config[field] = parent_value
 
+        # TODO: Decide if these should stay
         # Some deprecated properties are inherited from problem config during a transition period
-        problem_grading = problem.config.get('grading')
+        legacy_grading = problem.metadata.legacy_grading
         for key in ['accept_score', 'reject_score', 'range']:
-            if key in problem.config.get('grading'):
-                self.config[key] = problem_grading[key]
+            if getattr(legacy_grading, key) is not None:
+                self.config[key] = getattr(legacy_grading, key)
 
-        problem_on_reject = problem_grading.get('on_reject')
+        problem_on_reject = legacy_grading.on_reject
         if problem_on_reject == 'first_error':
             self.config['on_reject'] = 'break'
         if problem_on_reject == 'grade':
             self.config['on_reject'] = 'continue'
 
-        if self._problem.config.get('type') == 'pass-fail':
+        if self._problem.is_pass_fail():
             for key in TestCaseGroup._SCORING_ONLY_KEYS:
                 if key not in self.config:
                     self.config[key] = None
@@ -345,91 +465,85 @@ def __init__(self, problem, datadir, parent=None):
             if field not in self.config:
                 self.config[field] = default
 
-        self._items = []
+        self._items: list[TestCaseGroup | TestCase] = []
         if os.path.isdir(datadir):
-            for f in sorted(os.listdir(datadir)):
-                f = os.path.join(datadir, f)
-                if os.path.isdir(f):
-                    self._items.append(TestCaseGroup(problem, f, self))
+            for filename in sorted(os.listdir(datadir)):
+                filename = os.path.join(datadir, filename)
+                if os.path.isdir(filename):
+                    self._items.append(TestCaseGroup(problem, filename, self))
                 else:
-                    base, ext = os.path.splitext(f)
+                    base, ext = os.path.splitext(filename)
                     if ext == '.ans' and os.path.isfile(f'{base}.in'):
                         self._items.append(TestCase(problem, base, self))
 
         if not parent:
             self.set_symlinks()
 
+    def __str__(self) -> str:
+        return f'testcase group {self.name}'
 
-    def __str__(self):
-        return f'test case group {os.path.relpath(self._datadir, os.path.join(self._problem.probdir))}'
-
-    def set_symlinks(self):
+    def set_symlinks(self) -> None:
         for sub in self._items:
             sub.set_symlinks()
 
-
-    def matches_filter(self, filter_re):
+    def matches_filter(self, filter_re: Pattern[str]) -> bool:
         return True
 
-
-    def get_all_testcases(self):
-        res = []
+    def get_all_testcases(self) -> list:
+        res: list = []
         for child in self._items:
             res += child.get_all_testcases()
         return res
 
-
-    def get_testcases(self):
+    def get_testcases(self) -> list[TestCase]:
         return [child for child in self._items if isinstance(child, TestCase)]
 
-
-    def get_subgroups(self):
+    def get_subgroups(self) -> list[TestCaseGroup]:
         return [child for child in self._items if isinstance(child, TestCaseGroup)]
 
+    def get_subgroup(self, name: str) -> TestCaseGroup | None:
+        return next(
+            (child for child in self._items if isinstance(child, TestCaseGroup) and os.path.basename(child._datadir) == name),
+            None,
+        )
 
-    def get_subgroup(self, name):
-        return next((child for child in self._items if isinstance(child, TestCaseGroup) and os.path.basename(child._datadir) == name), None)
-
-
-    def has_custom_groups(self):
+    def has_custom_groups(self) -> bool:
         return any(group.get_subgroups() for group in self.get_subgroups())
 
-
-    def get_score_range(self):
+    def get_score_range(self) -> tuple[float, float]:
         try:
             score_range = self.config['range']
             min_score, max_score = list(map(float, score_range.split()))
             return (min_score, max_score)
-        except:
-            return (-float('inf'), float('inf'))
-
+        except Exception:
+            return (float('-inf'), float('inf'))
 
-    def check(self, args):
+    def check(self, context: Context) -> bool:
         if self._check_res is not None:
             return self._check_res
         self._check_res = True
 
-        self.check_basename(self._datadir)
-
         if self.config['grading'] not in ['default', 'custom']:
-            self.error("Invalid grading policy in testdata.yaml")
+            self.error('Invalid grading policy in testdata.yaml')
 
         if self.config['grading'] == 'custom' and len(self._problem.graders._graders) == 0:
-            self._problem.graders.error(f'{self} has custom grading but no custom graders provided')
+            self._problem.graders.fatal(f'{self} has custom grading but no custom graders provided')
         if self.config['grading'] == 'default' and Graders._default_grader is None:
-            self._problem.graders.error(f'{self} has default grading but I could not find default grader')
+            self._problem.graders.fatal(f'{self} has default grading but I could not find default grader')
 
         if self.config['grading'] == 'default' and 'ignore_sample' in self.config['grader_flags'].split():
             if self._parent is not None:
                 self.error("'grader_flags: ignore_sample' is specified, but that flag is only allowed at top level")
             elif self.config['on_reject'] == 'break':
-                self.error("'grader_flags: ignore_sample' is specified, but 'on_reject: break' may cause secret data not to be judged")
+                self.error(
+                    "'grader_flags: ignore_sample' is specified, but 'on_reject: break' may cause secret data not to be judged"
+                )
 
         for field in self.config.keys():
             if field not in TestCaseGroup._DEFAULT_CONFIG.keys():
                 self.warning(f"Unknown key '{field}' in '{os.path.join(self._datadir, 'testdata.yaml')}'")
 
-        if not self._problem.is_scoring:
+        if not self._problem.is_scoring():
             for key in TestCaseGroup._SCORING_ONLY_KEYS:
                 if self.config.get(key) is not None:
                     self.error(f"Key '{key}' is only applicable for scoring problems, this is a pass-fail problem")
@@ -437,7 +551,7 @@ def check(self, args):
         if self.config['on_reject'] not in ['break', 'continue']:
             self.error(f"Invalid value '{self.config['on_reject']}' for on_reject policy")
 
-        if self._problem.is_scoring:
+        if self._problem.is_scoring():
             # Check grading
             try:
                 score_range = self.config['range']
@@ -446,7 +560,7 @@ def check(self, args):
                     self.error(f"Invalid score range '{score_range}': minimum score cannot be greater than maximum score")
             except VerifyError:
                 raise
-            except:
+            except Exception:
                 self.error(f"Invalid format '{score_range}' for range: must be exactly two floats")
 
         if self._parent is None:
@@ -462,12 +576,12 @@ def check(self, args):
                     elif name == 'sample':
                         seen_sample = True
                     else:
-                        self.error("Test data at top level can only have the groups sample and secret")
-                        self.debug(self._items)
+                        self.error('Test data at top level can only have the groups sample and secret')
+                        self.debug(str(self._items))
             if not seen_secret:
-                self.error("No secret data provided")
+                self.error('No secret data provided')
             if not seen_sample:
-                self.warning("No sample data provided")
+                self.warning('No sample data provided')
 
             hashes = collections.defaultdict(list)
             for root, dirs, files in os.walk(self._datadir):
@@ -487,34 +601,42 @@ def check(self, args):
         infiles = glob.glob(os.path.join(self._datadir, '*.in'))
         ansfiles = glob.glob(os.path.join(self._datadir, '*.ans'))
 
-        for f in infiles:
-            if os.path.isdir(f): continue
-            if not f'{f[:-3]}.ans' in ansfiles:
-                self.error(f"No matching answer file for input '{f}'")
-        for f in ansfiles:
-            if os.path.isdir(f): continue
-            if not f'{f[:-4]}.in' in infiles:
-                self.error(f"No matching input file for answer '{f}'")
+        for infile in infiles:
+            if os.path.isdir(infile):
+                continue
+            if f'{infile[:-3]}.ans' not in ansfiles:
+                self.error(f"No matching answer file for input '{infile}'")
+        for ansfile in ansfiles:
+            if os.path.isdir(ansfile):
+                continue
+            if f'{ansfile[:-4]}.in' not in infiles:
+                self.error(f"No matching input file for answer '{ansfile}'")
 
         if not self.get_subgroups() and not self.get_testcases():
-            self.error('Test case group is empty')
+            if os.path.basename(self._datadir) != 'sample':
+                self.error(f'Testcase group {self._datadir} exists, but does not contain any testcases')
+            else:
+                if not (self._problem.is_interactive() and glob.glob(os.path.join(self._datadir, '*.interaction'))):
+                    self.warning(f'Sample testcase group {self._datadir} exists, but does not contain any testcases')
 
         # Check whether a <= b according to a natural sorting where numeric components
         # are compactified, so that e.g. "a" < "a1" < "a2" < "a10" = "a010" < "a10a".
-        def natural_sort_le(a, b):
+        def natural_sort_le(a: str, b: str) -> bool:
             a += '\0'
             b += '\0'
             i = j = 0
-            def parse_num(s, i):
+
+            def parse_num(s: str, i: int) -> tuple[int, int]:
                 ret = 0
                 while ord('0') <= ord(s[i]) <= ord('9'):
                     ret = ret * 10 + ord(s[i]) - ord('0')
                     i += 1
                 return ret, i
+
             while i < len(a) and j < len(b):
                 if ord('0') <= ord(a[i]) <= ord('9') and ord('0') <= ord(b[i]) <= ord('9'):
-                    anum,i = parse_num(a, i)
-                    bnum,j = parse_num(b, j)
+                    anum, i = parse_num(a, i)
+                    bnum, j = parse_num(b, j)
                     if anum == bnum:
                         continue
                     return anum < bnum
@@ -533,23 +655,23 @@ def parse_num(s, i):
             last_testgroup_name = name
 
         for child in self._items:
-            if child.matches_filter(args.data_filter):
-                child.check(args)
+            if child.matches_filter(context.data_filter):
+                child.check(context)
 
         return self._check_res
 
-
-    def run_submission(self, sub, args, timelim, timelim_low, timelim_high):
+    def run_submission(self, sub, runner: Runner, context: Context) -> TestCase.Result:
         self.info(f'Running on {self}')
-        subres = []
-        subres_low = []
-        subres_high = []
+        subres: list[SubmissionResult] = []
+        subres_low: list[SubmissionResult] = []
+        subres_high: list[SubmissionResult] = []
         active_low, active = True, True
         on_reject = self.config['on_reject']
+        broken = False
         for child in self._items:
-            if not child.matches_filter(args.data_filter):
+            if not child.matches_filter(context.data_filter):
                 continue
-            res, res_low, res_high = child.run_submission(sub, args, timelim, timelim_low, timelim_high)
+            res, res_low, res_high = child.run_submission(sub, runner, context)
             subres_high.append(res_high)
             if active:
                 subres.append(res)
@@ -559,15 +681,19 @@ def run_submission(self, sub, args, timelim, timelim_low, timelim_high):
                 active_low &= res_low.verdict == 'AC'
                 active &= res.verdict == 'AC'
                 if res_high.verdict != 'AC':
+                    broken = True
                     break
 
-        return (self.aggregate_results(sub, subres),
-                self.aggregate_results(sub, subres_low, shadow_result=True),
-                self.aggregate_results(sub, subres_high, shadow_result=True))
+        runner.mark_group_done(self, broken)
 
+        return (
+            self.aggregate_results(sub, subres),
+            self.aggregate_results(sub, subres_low, shadow_result=True),
+            self.aggregate_results(sub, subres_high, shadow_result=True),
+        )
 
-    def aggregate_results(self, sub, sub_results, shadow_result=False):
-        res = SubmissionResult(None)
+    def aggregate_results(self, sub, sub_results: list[SubmissionResult], shadow_result: bool = False) -> SubmissionResult:
+        res = SubmissionResult('JE')
 
         for r in sub_results:
             if r.runtime > res.runtime:
@@ -589,7 +715,7 @@ def aggregate_results(self, sub, sub_results, shadow_result=False):
             if sub_results:
                 res.testcase = sub_results[-1].testcase
                 res.additional_info = sub_results[-1].additional_info
-            if self._problem.is_scoring:
+            if self._problem.is_scoring():
                 res.score = score
                 min_score, max_score = self.get_score_range()
                 if score is not None and not (min_score <= score <= max_score) and not self._seen_oob_scores:
@@ -597,544 +723,216 @@ def aggregate_results(self, sub, sub_results, shadow_result=False):
                     # to have the same error.
                     self._seen_oob_scores = True
                     groupname = os.path.relpath(self._datadir, self._problem.probdir)
-                    self.error(f'submission {sub} got {res} on group {groupname}, which is outside of expected score range [{min_score}, {max_score}]')
+                    self.error(
+                        f'submission {sub} got {res} on group {groupname}, which is outside of expected score range [{min_score}, {max_score}]'
+                    )
         return res
 
-
-    def all_datasets(self):
-        res = []
+    def all_datasets(self) -> list:
+        res: list = []
         for child in self._items:
             res += child.all_datasets()
         return res
 
 
-class ProblemConfig(ProblemAspect):
-    _MANDATORY_CONFIG = ['name']
-    _OPTIONAL_CONFIG = config.load_config('problem.yaml')
-    _VALID_LICENSES = ['unknown', 'public domain', 'cc0', 'cc by', 'cc by-sa', 'educational', 'permission']
+class ProblemStatement(ProblemPart):
+    statements: dict[str, list[Path]]  # Maps language code -> statement(s)
+    PART_NAME = 'statement'
 
-    def __init__(self, problem):
-        self.debug('  Loading problem config')
-        self._problem = problem
-        self.configfile = os.path.join(problem.probdir, 'problem.yaml')
-        self._data = {}
+    def setup(self):
+        self.debug('  Loading problem statement')
+        self.statements = statement_util.find_statements(Path(self.problem.probdir), self.problem.format)
 
-        if os.path.isfile(self.configfile):
-            try:
-                with open(self.configfile) as f:
-                    self._data = yaml.safe_load(f)
-                # Loading empty yaml yields None, for no apparent reason...
-                if self._data is None:
-                    self._data = {}
-            except Exception as e:
-                self.error(e)
+    def check(self, context: Context) -> bool:
+        if self._check_res is not None:
+            return self._check_res
+        self._check_res = True
 
-        # Add config items from problem statement e.g. name
-        self._data.update(problem.statement.get_config())
+        self.warn_directory('problem statements', 'statement_directory')
 
-        # Populate rights_owner unless license is public domain
-        if 'rights_owner' not in self._data and self._data.get('license') != 'public domain':
-            if 'author' in self._data:
-                self._data['rights_owner'] = self._data['author']
-            elif 'source' in self._data:
-                self._data['rights_owner'] = self._data['source']
+        for ifilename in glob.glob(os.path.join(self.problem.probdir, 'data/sample/*.interaction')):
+            if not self.problem.is_interactive():
+                self.error(f'Problem is not interactive, but there is an interaction sample {ifilename}')
+            with open(ifilename, 'r') as interaction:
+                for i, line in enumerate(interaction):
+                    if len(line) == 0 or (line[0] != '<' and line[0] != '>'):
+                        self.error(f'Interaction {ifilename}: line {i + 1} does not start with < or >')
+                        break
 
-        if 'license' in self._data:
-            self._data['license'] = self._data['license'].lower()
+        if not self.statements:
+            if self.problem.format is FormatVersion.LEGACY:
+                allowed_statements = ', '.join(
+                    f'problem.{ext}, problem.<language>.{ext}' for ext in self.problem.format.statement_extensions
+                )
+            else:
+                allowed_statements = ', '.join(f'problem.<language>.{ext}' for ext in self.problem.format.statement_extensions)
 
-        # Ugly backwards compatibility hack
-        if 'name' in self._data and not isinstance(self._data['name'], dict):
-            self._data['name'] = {'': self._data['name']}
+            self.error(
+                f'No problem statements found (expected file of one of following forms in directory {self.problem.format.statement_directory}/: {allowed_statements})'
+            )
 
-        self._origdata = copy.deepcopy(self._data)
+        for lang, files in self.statements.items():
+            if len(files) > 1:
+                self.error(f'Found multiple statements in the same language {lang}: {", ".join((file.name for file in files))}')
 
-        for field, default in copy.deepcopy(ProblemConfig._OPTIONAL_CONFIG).items():
-            if not field in self._data:
-                self._data[field] = default
-            elif isinstance(default, dict) and isinstance(self._data[field], dict):
-                self._data[field] = dict(list(default.items()) + list(self._data[field].items()))
+            if lang not in self.problem.metadata.name:
+                self.error(f'No problem name given in language {lang}')
+            elif not self.problem.metadata.name[lang]:
+                self.error(f'Problem name in language {lang} is empty')
+            elif not self.problem.metadata.name[lang].strip():
+                self.error(f'Problem name in language {lang} contains only whitespace')
 
-        val = self._data['validation'].split()
-        self._data['validation-type'] = val[0]
-        self._data['validation-params'] = val[1:]
+            for file in files:
+                try:
+                    options = problem2pdf.get_parser().parse_args([''])
+                    options.problem = self.problem.probdir
+                    options.language = lang
+                    options.nopdf = True
+                    options.quiet = True
+                    if not problem2pdf.convert(options, file):
+                        self.error(
+                            f'Could not compile problem statement for language "{lang}".  Run problem2pdf --language {lang} on the problem to diagnose.'
+                        )
+                except Exception as e:
+                    self.error(
+                        f'Error raised when checking problem statement for language {lang}:\n{e}\n{traceback.format_exc()}'
+                    )
 
-        self._data['grading']['custom_scoring'] = False
-        for param in self._data['validation-params']:
-            if param == 'score':
-                self._data['grading']['custom_scoring'] = True
-            elif param == 'interactive':
-                pass
+                try:
+                    options = problem2html.get_parser().parse_args([''])
+                    options.problem = self.problem.probdir
+                    options.destdir = os.path.join(self.problem.tmpdir, 'html')
+                    options.language = lang
+                    options.quiet = True
+                    problem2html.convert(options, file)
+                except Exception as e:
+                    self.error(
+                        f'Could not convert problem statement to html for language "{lang}".  Run problem2html --language {lang} on the problem to diagnose.\n{e}\n{traceback.format_exc()}'
+                    )
 
-        self._data['languages'] = self._data['languages'].split()
+        return self._check_res
+
+    def __str__(self) -> str:
+        return 'problem statement'
 
-    def __str__(self):
-        return 'problem configuration'
 
-    def get(self, key=None):
-        if key:
-            return self._data[key]
-        return self._data
+class ProblemConfig(ProblemPart):
+    PART_NAME = 'config'
+
+    def setup(self):
+        self.debug('  Loading problem config')
+        try:
+            self._metadata, self._origdata = metadata.load_metadata(Path(self.problem.probdir))
+            self.problem._set_metadata(self._metadata)
+        except ValidationError as e:
+            error_str = '\n'.join([f'    {"->".join((str(loc) for loc in err["loc"]))}: {err["msg"]}' for err in e.errors()])
+            self.fatal(f'Failed parsing problem.yaml. Found {len(e.errors())} errors:\n{error_str}')
+        except Exception as e:
+            self.fatal(f'Failed loading problem configuration: {e}')
+
+    def __str__(self) -> str:
+        return 'problem configuration'
 
-    def check(self, args):
+    def check(self, context: Context) -> bool:
         if self._check_res is not None:
             return self._check_res
         self._check_res = True
 
-        if not os.path.isfile(self.configfile):
-            self.error(f"No config file {self.configfile} found")
-
-        for field in ProblemConfig._MANDATORY_CONFIG:
-            if not field in self._data:
-                self.error(f"Mandatory field '{field}' not provided")
-
-        for field, value in self._origdata.items():
-            if field not in ProblemConfig._OPTIONAL_CONFIG.keys() and field not in ProblemConfig._MANDATORY_CONFIG:
-                self.warning(f"Unknown field '{field}' provided in problem.yaml")
+        INCOMPATIBLE_TYPES = [
+            (metadata.ProblemType.PASS_FAIL, metadata.ProblemType.SCORING),
+            (metadata.ProblemType.SUBMIT_ANSWER, metadata.ProblemType.MULTI_PASS),
+            (metadata.ProblemType.SUBMIT_ANSWER, metadata.ProblemType.INTERACTIVE),
+        ]
+        for t1, t2 in INCOMPATIBLE_TYPES:
+            if t1 in self._metadata.type and t2 in self._metadata.type:
+                self.error(f'Problem has incompatible types: {t1}, {t2}')
 
-        for field, value in self._data.items():
-            if value is None:
-                self.error(f"Field '{field}' provided in problem.yaml but is empty")
-                self._data[field] = ProblemConfig._OPTIONAL_CONFIG.get(field, '')
-
-        # Check type
-        if not self._data['type'] in ['pass-fail', 'scoring']:
-            self.error(f"Invalid value '{self._data['type']}' for type")
+        if self.problem.is_multi_pass():
+            self.warning('The type multi-pass is not yet supported.')
+        if self.problem.is_submit_answer():
+            self.warning('The type submit-answer is not yet supported.')
 
         # Check rights_owner
-        if self._data['license'] == 'public domain':
-            if self._data['rights_owner'].strip() != '':
+        if self._metadata.license == metadata.License.PUBLIC_DOMAIN:
+            if self._metadata.rights_owner:
                 self.error('Can not have a rights_owner for a problem in public domain')
-        elif self._data['license'] != 'unknown':
-            if self._data['rights_owner'].strip() == '':
+        elif self._metadata.license != metadata.License.UNKNOWN:
+            if not self._metadata.rights_owner and not self._metadata.source and not self._metadata.credits.authors:
                 self.error('No author, source or rights_owner provided')
 
-        # Check source_url
-        if (self._data['source_url'].strip() != '' and
-            self._data['source'].strip() == ''):
-            self.error('Can not provide source_url without also providing source')
-
         # Check license
-        if not self._data['license'] in ProblemConfig._VALID_LICENSES:
-            self.error(f"Invalid value for license: {self._data['license']}.\n  Valid licenses are {ProblemConfig._VALID_LICENSES}")
-        elif self._data['license'] == 'unknown':
+        if self._metadata.license == metadata.License.UNKNOWN:
             self.warning("License is 'unknown'")
 
-        if self._data['grading']['show_test_data_groups'] not in [True, False]:
-            self.error(f"Invalid value for grading.show_test_data_groups: {self._data['grading']['show_test_data_groups']}")
-        elif self._data['grading']['show_test_data_groups'] and self._data['type'] == 'pass-fail':
-            self.error("Showing test data groups is only supported for scoring problems, this is a pass-fail problem")
-        if self._data['type'] != 'pass-fail' and self._problem.testdata.has_custom_groups() and 'show_test_data_groups' not in self._origdata.get('grading', {}):
-            self.warning("Problem has custom test case groups, but does not specify a value for grading.show_test_data_groups; defaulting to false")
-
-        if 'on_reject' in self._data['grading']:
-            if self._data['type'] == 'pass-fail' and self._data['grading']['on_reject'] == 'grade':
-                self.error(f"Invalid on_reject policy '{self._data['grading']['on_reject']}' for problem type '{self._data['type']}'")
-            if not self._data['grading']['on_reject'] in ['first_error', 'worst_error', 'grade']:
-                self.error(f"Invalid value '{self._data['grading']['on_reject']}' for on_reject policy")
-
-        if self._data['grading']['objective'] not in ['min', 'max']:
-            self.error(f"Invalid value '{self._data['grading']['objective']}' for objective")
+        if self._metadata.uuid is None:
+            self.error_in_2023_07(f'Missing uuid from problem.yaml. Add "uuid: {uuid.uuid4()}" to problem.yaml.')
+
+        names_with_no_statement = [lang for lang in self._metadata.name if lang not in self.problem.statement.statements]
+        if names_with_no_statement:
+            self.error(f'Names exist for languages without problem statements: {", ".join(names_with_no_statement)}')
+
+        if self._metadata.legacy_grading.show_test_data_groups and self.problem.is_pass_fail():
+            self.error('Showing test data groups is only supported for scoring problems, this is a pass-fail problem')
+        if (
+            not self.problem.is_pass_fail()
+            and self.problem.testdata.has_custom_groups()
+            and 'show_test_data_groups' not in self._origdata.get('grading', {})
+            and self.problem.format is FormatVersion.LEGACY
+        ):
+            self.warning(
+                'Problem has custom testcase groups, but does not specify a value for grading.show_test_data_groups; defaulting to false'
+            )
+
+        if self._metadata.legacy_grading.on_reject is not None:
+            if self.problem.is_pass_fail() and self._metadata.legacy_grading.on_reject == 'grade':
+                self.error("Invalid on_reject policy 'grade' for problem type 'pass-fail'")
 
         for deprecated_grading_key in ['accept_score', 'reject_score', 'range', 'on_reject']:
-            if deprecated_grading_key in self._data['grading']:
-                self.warning(f"Grading key '{deprecated_grading_key}' is deprecated in problem.yaml, use '{deprecated_grading_key}' in testdata.yaml instead")
-
-        if not self._data['validation-type'] in ['default', 'custom']:
-            self.error(f"Invalid value '{self._data['validation']}' for validation, first word must be 'default' or 'custom'")
-
-        if self._data['validation-type'] == 'default' and len(self._data['validation-params']) > 0:
-            self.error(f"Invalid value '{self._data['validation']}' for validation")
-
-        if self._data['validation-type'] == 'custom':
-            for param in self._data['validation-params']:
-                if param not in['score', 'interactive']:
-                    self.error(f"Invalid parameter '{param}' for custom validation")
-
-        # Check limits
-        if not isinstance(self._data['limits'], dict):
-            self.error('Limits key in problem.yaml must specify a dict')
-            self._data['limits'] = ProblemConfig._OPTIONAL_CONFIG['limits']
-
-        if self._data['languages'] != '':
-            for lang_id in self._data['languages']:
-                if lang_id != 'all' and self._problem.language_config.get(lang_id) is None:
-                    self.error("Unrecognized language id '%s'" % lang_id)
-
-        # Some things not yet implemented
-        if self._data['libraries'] != '':
-            self.error("Libraries not yet supported")
+            if getattr(self._metadata.legacy_grading, deprecated_grading_key) is not None:
+                self.warning(
+                    f"Grading key '{deprecated_grading_key}' is deprecated in problem.yaml, use '{deprecated_grading_key}' in testdata.yaml instead"
+                )
+
+        if self._metadata.legacy_validation:
+            val = self._metadata.legacy_validation.split()
+            validation_type = val[0]
+            validation_params = val[1:]
+            if validation_type not in ['default', 'custom']:
+                self.error(f"Invalid value '{validation_type}' for validation, first word must be 'default' or 'custom'")
+
+            if validation_type == 'default' and len(validation_params) > 0:
+                self.error(f"Invalid value '{self._metadata.legacy_validation}' for validation")
+
+            if validation_type == 'custom':
+                for param in validation_params:
+                    if param not in ['score', 'interactive']:
+                        self.error(f"Invalid parameter '{param}' for custom validation")
+
+        if self._metadata.limits.time_limit is not None and not self._metadata.limits.time_limit.is_integer():
+            self.warning(
+                'Time limit configured to non-integer value. Problemtools does not yet support non-integer time limits, and will truncate'
+            )
 
         return self._check_res
 
 
-class Generators(ProblemAspect):
-    _TESTCASE_OPTIONS = ['input', 'solution', 'visualizer', 'random_salt']
-    _NULLABLE_OPTIONS = ['input', 'solution', 'visualizer']
-    _DATA_DIRECTORIES = {'sample', 'secret'}
-    _VISUALIZER_EXTENSIONS = ['png', 'jpg', 'jpeg', 'svg', 'interaction', 'desc', 'hint']
-
-    def __init__(self, problem):
-        self.debug('  Loading generators')
-        self._problem = problem
-        self.configfile = os.path.join(problem.probdir, 'generators', 'generators.yaml')
-        self._data = None
-        self._testcases = []
-        self._testdata_yaml = {}
-        self._data_directories = set()
-
-        if os.path.isfile(self.configfile):
-            try:
-                with open(self.configfile) as f:
-                    self._data = yaml.safe_load(f)
-                # Loading empty yaml yields None, for no apparent reason...
-                if self._data is None:
-                    self._data = {}
-            except Exception as e:
-                self.error(e)
-
-        if isinstance(self._data, dict):
-            # The top-level dict always represents a directory, even if there
-            # is no type key
-            self._data['type'] = 'directory'
-
-    def __str__(self):
-        return 'generators'
-
-    def _parse_command(self, key, state):
-        command = state[key]
-        name = os.path.basename(state['path'])
-        random_salt = str(state['random_salt'])
-
-        def err():
-            self.error('Invalid %s key for path %s in generators.yaml' % (key, state['path']))
-
-        if not isinstance(command, str):
-            return err()
-
-        seed = str(int(hashlib.sha512((random_salt + command).encode('utf-8')).hexdigest(), 16) % (2**31))
-
-        parts = shlex.split(command)
-        if not parts:
-            return err()
-
-        for i, part in enumerate(parts):
-            new = ''
-            for j, group in enumerate(part.split('{')):
-                if group.count('}') != (0 if j == 0 else 1):
-                    return err()
-                if j == 0:
-                    new += group
-                else:
-                    group, rest = group.split('}')
-                    if group.startswith('seed'):
-                        new += seed
-                    elif group == 'name':
-                        new += name
-                    else:
-                        return err()
-                    new += rest
-            parts[i] = new
-
-        program, arguments = parts[0], parts[1:]
-        if program not in self._generators:
-            self._generators[program] = program
-
-        return (program, arguments)
-
-    def _parse_testcase(self, data, state):
-        if state['input'] is None:
-            self.error('Path %s in generators.yaml must contain an input key' % state['path'])
-        for key in ['input', 'solution', 'visualizer']:
-            if state[key] is not None:
-                state[key] = self._parse_command(key, state)
-        if state['input'] is not None:
-            self._testcases.append(state)
-
-    def _parse_directory(self, data, state):
-        # TODO: Process includes
-
-        if 'testdata.yaml' in data:
-            content = data['testdata.yaml']
-            if content is None:
-                content = {}
-            self._testdata_yaml['%s/%s' % (state['path'], 'testdata.yaml')] = content
-
-        cases = data.get('data', {})
-        ordered = True
-        if not isinstance(cases, list):
-            ordered = False
-            cases = [cases]
-
-        case_counter = 0
-        case_format = '%%0%dd' % len(str(len(cases)))
-        for case in cases:
-            if not isinstance(case, dict):
-                self.error('Path %s/data in generators.yaml must contain a dict or a list of dicts' % state['path'])
-                continue
-
-            if ordered:
-                case_counter += 1
-
-            for name, value in sorted(case.items(), key=lambda kv: str(kv[0])):
-                if ordered:
-                    num = case_format % case_counter
-                    name = num + ('' if name is None else '-' + str(name))
-                else:
-                    name = str(name)
-
-                next_state = copy.deepcopy(state)
-                next_state['path'] = '%s/%s' % (state['path'], name)
-                self._parse_element(value, next_state)
-
-    def _parse_element(self, data, state):
-        if data is None:
-            data = '/%s.in' % state['path']
-            state['manual'] = True
-        if isinstance(data, str):
-            data = { 'input': data }
-        if not isinstance(data, dict):
-            self.error("Path %s in generators.yaml must specify a dict" % state['path'])
-            return
-
-        state.update({
-            key: data[key]
-            for key in Generators._TESTCASE_OPTIONS
-            if key in data
-        })
-
-        if data.get('type', 'testcase') == 'testcase':
-            self._parse_testcase(data, state)
-        else:
-            if data['type'] != 'directory':
-                self.error("Type of %s in generators.yaml must be 'directory'" % state['path'])
-            self._parse_directory(data, state)
-
-    def _resolve_path(self, path):
-        base_path = self._problem.probdir
-        if path.startswith('/'):
-            path = path[1:]
-        else:
-            base_path = os.path.join(base_path, 'generators')
-        return os.path.join(*([base_path] + path.split('/')))
-
-    def _compile_generators(self):
-        for gen, files in list(self._generators.items()):
-            implicit = True
-            manual = False
-            if isinstance(files, str):
-                path = files
-                files = []
-                implicit = False
-                if path.endswith('.in'):
-                    manual = True
-                    for ext in ['ans'] + Generators._VISUALIZER_EXTENSIONS:
-                        other_path = path[:-2] + ext
-                        if os.path.isfile(self._resolve_path(other_path)):
-                            files.append(other_path)
-                # Always add original file last, to ensure it is chosen as
-                # the representative file
-                files.append(path)
-            if not isinstance(files, list) or not files:
-                self.error('Invalid generator %s in generators.yaml' % gen)
-                continue
-            tmpdir = tempfile.mkdtemp(prefix='generator', dir=self._problem.tmpdir)
-            ok = True
-            for opath in files:
-                if not isinstance(opath, str) or not opath:
-                    self.error('Invalid generator %s in generators.yaml' % gen)
-                    ok = False
-                    break
-
-                name = os.path.basename(opath)
-                if implicit and opath == files[0]:
-                    # In implicit generators, the first listed file should
-                    # be the entry point. problemtools usually picks the
-                    # lexicographically smallest filename as the entry
-                    # point, unless there exists a file that starts with
-                    # "main.". Thus the following renames the file that
-                    # should be the entry point to "main.old.extension".
-                    # TODO: Make problemtools support passing a different
-                    # entry point than "main.", and remove this hack.
-                    name = 'main' + os.path.splitext(name)[1]
-
-                fpath = self._resolve_path(opath)
-                dest = os.path.join(tmpdir, name)
-                if os.path.exists(dest):
-                    self.error('Duplicate entry for filename %s in generator %s' % (name, gen))
-                    ok = False
-                elif not os.path.exists(fpath):
-                    self.error('Generator %s does not exist' % opath)
-                    ok = False
-                else:
-                    try:
-                        if os.path.isdir(fpath):
-                            shutil.copytree(fpath, dest)
-                        else:
-                            shutil.copy2(fpath, dest)
-                    except Exception as e:
-                        self.error(e)
-                        ok = False
-            if ok:
-                if manual:
-                    self._generators[gen] = dest
-                else:
-                    prog = run.get_program(tmpdir if implicit else dest,
-                                        language_config=self._problem.language_config,
-                                        work_dir=self._problem.tmpdir)
-                    if prog is None:
-                        self.error('Could not load generator %s' % gen)
-                        ok = False
-                    else:
-                        self._generators[gen] = prog
-                        success, msg = prog.compile()
-                        if not success:
-                            self.error('Compile error for generator %s' % gen, msg)
-                            ok = False
-            if not ok and gen in self._generators:
-                del self._generators[gen]
-
-    def check(self, args):
-        if self._check_res is not None:
-            return self._check_res
-        self._check_res = True
-
-        if self._data is None:
-            return self._check_res
-        if not isinstance(self._data, dict):
-            self.error('generators.yaml must specify a dict')
-            return self._check_res
-
-        self._generators = self._data.get('generators') or {}
-        if not isinstance(self._generators, dict):
-            self.error('Generators key in generators.yaml must specify a dict')
-            self._generators = {}
-
-        # Check the shape of the top-level data dict
-        if isinstance(self._data.get('data'), list):
-            self.error('Top-level data key in generators.yaml must specify a dict')
-            self._data['data'] = {}
-
-        if isinstance(self._data.get('data'), dict):
-            invalid = []
-            for key, value in self._data['data'].items():
-                valid = False
-                if key not in Generators._DATA_DIRECTORIES:
-                    self.warning("Invalid key '%s' in generators.yaml, expected one of %s" % (key, Generators._DATA_DIRECTORIES))
-                elif not isinstance(value, dict):
-                    self.warning("Key '%s' in generators.yaml must specify a dict" % key)
-                elif value.get('type') != 'directory':
-                    self.warning("Type of %s in generators.yaml must be 'directory'" % key)
-                else:
-                    valid = True
-                    self._data_directories.add(key)
-                if not valid:
-                    invalid.append(key)
-            for key in invalid:
-                del self._data['data'][key]
-
-        # Run a depth-first search through generators.yaml and generate a
-        # flattened list of testcases
-        default_state = { key: None for key in Generators._TESTCASE_OPTIONS }
-        default_state.update({
-            'path': 'data',
-            'manual': False,
-            'random_salt': '',
-        })
-
-        self._parse_element(self._data, default_state)
-
-        if 'compile_generators' not in args or args.compile_generators:
-            self._compile_generators()
-
-        return self._check_res
-
-
-class ProblemStatement(ProblemAspect):
-    def __init__(self, problem):
-        self.debug('  Loading problem statement')
-        self._problem = problem
-        self.languages = []
-        glob_path = os.path.join(problem.probdir, 'problem_statement', 'problem.')
-        if glob.glob(glob_path + 'tex'):
-            self.languages.append('')
-        for f in glob.glob(glob_path + '[a-z][a-z].tex'):
-            self.languages.append(re.search("problem.([a-z][a-z]).tex$", f).group(1))
-
-    def check(self, args):
-        if self._check_res is not None:
-            return self._check_res
-        self._check_res = True
-
-        if not self.languages:
-            self.error('No problem statements found (expected problem.tex or problem.[a-z][a-z].tex in problem_statement directory)')
-        if '' in self.languages and 'en' in self.languages:
-            self.error("Can't supply both problem.tex and problem.en.tex")
-        pdfopt = problem2pdf.ConvertOptions()
-        pdfopt.nopdf = True
-        pdfopt.quiet = True
-        htmlopt = problem2html.ConvertOptions()
-        htmlopt.destdir = os.path.join(self._problem.tmpdir, 'html')
-        htmlopt.quiet = True
-
-        for lang in self.languages:
-            pdfopt.language = lang
-            htmlopt.language = lang
-            try:
-                if not problem2pdf.convert(self._problem.probdir, pdfopt):
-                    langparam = ''
-                    if lang != '':
-                        langparam = '-l ' + lang
-                    self.error(f'Could not compile problem statement for language "{lang}".  Run problem2pdf {langparam} on the problem to diagnose.')
-            except Exception as e:
-                self.error(f'Error raised when checking problem statement for language {lang}:\n{e}')
-            try:
-                problem2html.convert(self._problem.probdir, htmlopt)
-            except Exception as e:
-                langparam = ''
-                if lang != '':
-                    langparam = '-l ' + lang
-                self.error(f'Could not convert problem statement to html for language "{lang}".  Run problem2html {langparam} on the problem to diagnose.')
-        return self._check_res
-
-    def __str__(self):
-        return 'problem statement'
-
-    def get_config(self):
-        ret = {}
-        for lang in self.languages:
-            filename = f'problem.{lang}.tex' if lang != '' else 'problem.tex'
-            stmt = open(os.path.join(self._problem.probdir, 'problem_statement', filename)).read()
-            patterns = [(r'\\problemname{(.*)}', 'name'),
-                        (r'^%%\s*plainproblemname:(.*)$', 'name')
-                        ]
-            for tup in patterns:
-                pattern = tup[0]
-                dest = tup[1]
-                hit = re.search(pattern, stmt, re.MULTILINE)
-                if hit:
-                    if not dest in ret:
-                        ret[dest] = {}
-                    ret[dest][lang] = hit.group(1).strip()
-        return ret
-
-
-class Attachments(ProblemAspect):
+class Attachments(ProblemPart):
     """Represents the attachments of a problem.
 
     Attributes:
         attachments: The absolute paths to the attachment files for this problem.
-
     """
 
-    def __init__(self, problem):
-        attachments_path = os.path.join(problem.probdir, 'attachments')
-        if os.path.isdir(attachments_path):
-            self.attachments = [os.path.join(attachments_path, attachment_name) for attachment_name in os.listdir(attachments_path)]
-        else:
-            self.attachments = []
+    attachments: list[Path]
+
+    PART_NAME = 'attachments'
+
+    def setup(self):
+        attachments_dir = Path(self.problem.probdir) / 'attachments'
+        self.attachments = [p for p in attachments_dir.iterdir()] if attachments_dir.is_dir() else []
         self.debug(f'Adding attachments {str(self.attachments)}')
 
-    def check(self, args):
+    def check(self, context: Context) -> bool:
         if self._check_res is not None:
             return self._check_res
         self._check_res = True
@@ -1148,52 +946,71 @@ def check(self, args):
     def get_attachment_paths(self):
         return self.attachments
 
-    def __str__(self):
+    def __str__(self) -> str:
         return 'attachments'
 
 
 _JUNK_CASES = [
     ('an empty file', b''),
-    ('a binary file with byte values 0 up to 256', bytearray(x for x in range(256))),
+    ('a binary file with random bytes', bytearray(random.Random(0).randbytes(1024))),
     ('a text file with the ASCII characters 32 up to 127', bytearray(x for x in range(32, 127))),
-    ('a random text file with printable ASCII characters', bytearray(random.choice(string.printable.encode('utf8')) for _ in range(200))),
+    (
+        'a random text file with printable ASCII characters',
+        bytearray(random.choice(string.printable.encode('utf8')) for _ in range(200)),
+    ),
 ]
 
-def _build_junk_modifier(desc, pattern, repl):
+
+def _build_junk_modifier(
+    desc: str, pattern: str, repl: str | Callable[[Match[str]], str]
+) -> tuple[str, Callable, Callable[[str], str]]:
     p = re.compile(pattern)
     return (desc, p.search, lambda text: p.sub(repl, text))
 
+
 _JUNK_MODIFICATIONS = [
-    _build_junk_modifier('spaces added where there already is whitespace', r'\s', lambda m: m.group(0) + ' ' * random.randint(1, 5)),
+    _build_junk_modifier(
+        'spaces added where there already is whitespace', r'\s', lambda m: m.group(0) + ' ' * random.randint(1, 5)
+    ),
     _build_junk_modifier('newlines added where there already are newlines', '\n', lambda m: '\n' * random.randint(2, 5)),
     _build_junk_modifier('leading zeros added to integers', r'(^|[^.]\b)([0-9]+)\b', r'\g<1>0000000000\g<2>'),
     _build_junk_modifier('trailing zeros added to real number decimal portion', r'\.[0-9]+\b', r'\g<0>0000000000'),
-    ('random junk added to the end of the file', lambda f: True, lambda f: f + ''.join(random.choice(string.printable) for _ in range(200))),
+    (
+        'random junk added to the end of the file',
+        lambda f: True,
+        lambda f: f + ''.join(random.choice(string.printable) for _ in range(200)),
+    ),
 ]
 
-class InputFormatValidators(ProblemAspect):
 
-    def __init__(self, problem):
-        self._problem = problem
-        input_validators_path = os.path.join(problem.probdir, 'input_format_validators')
+class InputValidators(ProblemPart):
+    PART_NAME = 'input_validator'
+
+    def setup(self):
+        input_validators_path = os.path.join(self.problem.probdir, 'input_format_validators')
         if os.path.isdir(input_validators_path):
             self._uses_old_path = True
         else:
             self._uses_old_path = False
-            new_input_validators_path = os.path.join(problem.probdir, 'input_validators')
+            new_input_validators_path = os.path.join(self.problem.probdir, 'input_validators')
             if os.path.isdir(new_input_validators_path):
                 input_validators_path = new_input_validators_path
-        self._validators = run.find_programs(input_validators_path,
-                                             language_config=problem.language_config,
-                                             allow_validation_script=True,
-                                             work_dir=problem.tmpdir)
-
-
-    def __str__(self):
+        self._validators = run.find_programs(
+            input_validators_path,
+            language_config=self.problem.language_config,
+            allow_validation_script=True,
+            work_dir=self.problem.tmpdir,
+        )
+        return {}
+
+    def __str__(self) -> str:
         return 'input format validators'
 
+    def start_background_work(self, context: Context) -> None:
+        for val in self._validators:
+            context.submit_background_work(lambda v: v.compile(), val)
 
-    def check(self, args):
+    def check(self, context: Context | None) -> bool:
         if self._check_res is not None:
             return self._check_res
         if self._uses_old_path:
@@ -1209,26 +1026,28 @@ def check(self, args):
                     self.error(f'Compile error for {val}', msg)
                     self._validators.remove(val)
             except run.ProgramError as e:
-                self.error(e)
+                self.error(str(e))
 
         # Only sanity check input validators if they all actually compiled
         if self._check_res:
-            all_flags = set()
-            def collect_flags(group, flags):
+            all_flags: set[str] = set()
+
+            def collect_flags(group: TestCaseGroup, flags: set[str]) -> None:
                 if len(group.get_testcases()) > 0:
                     flags.add(group.config['input_validator_flags'])
                 for subgroup in group.get_subgroups():
                     collect_flags(subgroup, flags)
-            collect_flags(self._problem.testdata, all_flags)
+
+            collect_flags(self.problem.testdata, all_flags)
 
             fd, file_name = tempfile.mkstemp()
             os.close(fd)
-            for (desc, case) in _JUNK_CASES:
-                f = open(file_name, "wb")
+            for desc, case in _JUNK_CASES:
+                f = open(file_name, 'wb')
                 f.write(case)
                 f.close()
-                for flags in all_flags:
-                    flags = flags.split()
+                for flags_str in all_flags:
+                    flags = flags_str.split()
                     for val in self._validators:
                         status, _ = val.run(file_name, args=flags)
                         if os.WEXITSTATUS(status) != 42:
@@ -1237,17 +1056,17 @@ def collect_flags(group, flags):
                         self.warning(f'No validator rejects {desc} with flags "{" ".join(flags)}"')
 
             def modified_input_validates(applicable, modifier):
-                for testcase in self._problem.testdata.get_all_testcases():
+                for testcase in self.problem.testdata.get_all_testcases():
                     with open(testcase.infile) as infile:
-                        infile = infile.read()
-                    if not applicable(infile):
+                        infile_data = infile.read()
+                    if not applicable(infile_data):
                         continue
 
-                    with open(file_name, "wb") as f:
-                        f.write(modifier(infile).encode('utf8'))
+                    with open(file_name, 'wb') as f:
+                        f.write(modifier(infile_data).encode('utf8'))
 
-                    for flags in all_flags:
-                        flags = flags.split()
+                    for flags_str in all_flags:
+                        flags = flags_str.split()
                         for val in self._validators:
                             status, _ = val.run(file_name, args=flags)
                             if os.WEXITSTATUS(status) != 42:
@@ -1261,7 +1080,7 @@ def modified_input_validates(applicable, modifier):
                 # no files were modifiable
                 return False
 
-            for (desc, applicable, modifier) in _JUNK_MODIFICATIONS:
+            for desc, applicable, modifier in _JUNK_MODIFICATIONS:
                 if modified_input_validates(applicable, modifier):
                     self.warning(f'No validator rejects {desc}')
 
@@ -1269,10 +1088,12 @@ def modified_input_validates(applicable, modifier):
 
         return self._check_res
 
-
-    def validate(self, testcase):
+    def validate(self, testcase: TestCase) -> None:
         flags = testcase.testcasegroup.config['input_validator_flags'].split()
+
+        # Remove input validators that don't compile, even without -p validators
         self.check(None)
+
         for val in self._validators:
             with tempfile.NamedTemporaryFile() as outfile, tempfile.NamedTemporaryFile() as errfile:
                 status, _ = val.run(testcase.infile, outfile.name, errfile.name, args=flags)
@@ -1284,39 +1105,43 @@ def validate(self, testcase):
                     continue
                 validator_stdout = outfile.read().decode('utf-8', 'replace')
                 validator_stderr = errfile.read().decode('utf-8', 'replace')
-                validator_output = "\n".join(
-                    out for out in [validator_stdout, validator_stderr] if out)
+                validator_output = '\n'.join(out for out in [validator_stdout, validator_stderr] if out)
                 testcase.error(emsg, validator_output)
 
 
-class Graders(ProblemAspect):
+class Graders(ProblemPart):
     _default_grader = run.get_tool('default_grader')
 
-    def __init__(self, problem):
-        self._problem = problem
-        self._graders = run.find_programs(os.path.join(problem.probdir, 'graders'),
-                                          language_config=problem.language_config,
-                                          work_dir=problem.tmpdir)
+    PART_NAME = 'grader'
 
-    def __str__(self):
+    def setup(self):
+        self._graders: list = run.find_programs(
+            os.path.join(self.problem.probdir, 'graders'),
+            language_config=self.problem.language_config,
+            work_dir=self.problem.tmpdir,
+        )
+        return {}
+
+    def __str__(self) -> str:
         return 'graders'
 
-    def check(self, args):
+    def check(self, context: Context) -> bool:
         if self._check_res is not None:
             return self._check_res
         self._check_res = True
 
-        if self._problem.config.get('type') == 'pass-fail' and len(self._graders) > 0:
+        if self.problem.is_pass_fail() and len(self._graders) > 0:
             self.error('There are grader programs but the problem is pass-fail')
 
         for grader in self._graders:
             success, msg = grader.compile()
             if not success:
-                self.error(f'Compile error for {grader}', msg)
+                self.fatal(f'Compile error for {grader}', msg)
         return self._check_res
 
-    def grade(self, sub_results, testcasegroup, shadow_result=False):
-
+    def grade(
+        self, sub_results: list[SubmissionResult], testcasegroup: TestCaseGroup, shadow_result: bool = False
+    ) -> tuple[Verdict, float | None]:
         if testcasegroup.config['grading'] == 'default':
             graders = [self._default_grader]
         else:
@@ -1324,8 +1149,8 @@ def grade(self, sub_results, testcasegroup, shadow_result=False):
 
         grader_input = ''.join([f'{r.verdict} {0 if r.score is None else r.score}\n' for r in sub_results])
         grader_output_re = r'^((AC)|(WA)|(TLE)|(RTE)|(JE))\s+-?[0-9.]+\s*$'
-        verdict = 'AC'
-        score = 0
+        verdict: Verdict = 'AC'
+        score: float = 0
 
         if not sub_results:
             self.info('No results on %s, so no graders ran' % (testcasegroup,))
@@ -1344,8 +1169,7 @@ def grade(self, sub_results, testcasegroup, shadow_result=False):
 
                 open(infile, 'w').write(grader_input)
 
-                status, runtime = grader.run(infile, outfile,
-                                             args=grader_flags)
+                status, runtime = grader.run(infile, outfile, args=grader_flags)
 
                 grader_output = open(outfile, 'r').read()
                 os.remove(infile)
@@ -1366,71 +1190,90 @@ def grade(self, sub_results, testcasegroup, shadow_result=False):
                     self.debug(f'Output was: "{grader_output}"')
                     return ('JE', None)
 
-                verdict, score = grader_output.split()
-                score = float(score)
+                verdict_str, score_str = grader_output.split()
+                verdict = verdict_str  # type: ignore
+                score = float(score_str)
         # TODO: check that all graders give same result
 
         if not shadow_result:
-            self.info(f'Grade on {testcasegroup} is {verdict} ({score})')
+            self.debug(f'Grade on {testcasegroup} is {verdict} ({score})')
 
         return (verdict, score)
 
 
-class OutputValidators(ProblemAspect):
+class OutputValidators(ProblemPart):
     _default_validator = run.get_tool('default_validator')
 
+    PART_NAME = 'output_validator'
 
-    def __init__(self, problem):
-        self._problem = problem
-        self._validators = run.find_programs(os.path.join(problem.probdir,
-                                                          'output_validators'),
-                                             language_config=problem.language_config,
-                                             work_dir=problem.tmpdir)
+    def setup(self):
+        self._validators = run.find_programs(
+            os.path.join(self.problem.probdir, self.problem.format.output_validator_directory),
+            language_config=self.problem.language_config,
+            work_dir=self.problem.tmpdir,
+        )
+        self._has_precompiled = False
 
+    def uses_default_validator(self) -> bool:
+        if self.problem.format is FormatVersion.LEGACY:
+            return self.problem.metadata.legacy_validation == 'default'
+        return not self._validators
 
-    def __str__(self):
+    def __str__(self) -> str:
         return 'output validators'
 
+    def start_background_work(self, context: Context) -> None:
+        if not self._has_precompiled:
+            for val in self._actual_validators():
+                context.submit_background_work(lambda v: v.compile(), val)
+            self._has_precompiled = True
 
-    def check(self, args):
+    def check(self, context: Context) -> bool:
         if self._check_res is not None:
             return self._check_res
         self._check_res = True
 
-        recommended_output_validator_languages = {'c', 'cpp', 'python3'}
+        self.warn_directory('output validators', 'output_validator_directory')
+
+        safe_output_validator_languages = {'c', 'cpp', 'python3'}
 
         for v in self._validators:
-            if not isinstance(v, run.BuildRun) and v.language.lang_id not in recommended_output_validator_languages:
-                self.warning('output validator language %s is not recommended' % v.language.name)
+            if isinstance(v, run.SourceCode) and v.language.lang_id not in safe_output_validator_languages:
+                self.error_in_2023_07(
+                    f'Output validator in {v.language.name}. Only {safe_output_validator_languages} are standardized. Check carefully if your CCS supports more (Kattis does not).'
+                )
 
-        if self._problem.config.get('validation') == 'default' and self._validators:
+        if len(self._validators) > 1:
+            self.error_in_2023_07('Found more than one output validator. This was allowed in legacy (but not on Kattis)')
+
+        if self.uses_default_validator() and self._validators:
             self.error('There are validator programs but problem.yaml has validation = "default"')
-        elif self._problem.config.get('validation') != 'default' and not self._validators:
-            self.error('problem.yaml specifies custom validator but no validator programs found')
+        elif not self.uses_default_validator() and not self._validators:
+            self.fatal('problem.yaml specifies custom validator but no validator programs found')
 
-        if self._problem.config.get('validation') == 'default' and self._default_validator is None:
-            self.error('Unable to locate default validator')
+        if self.uses_default_validator() and self._default_validator is None:
+            self.fatal('Unable to locate default validator')
 
         for val in self._validators[:]:
             try:
                 success, msg = val.compile()
                 if not success:
-                    self.error(f'Compile error for output validator {val}', msg)
+                    self.fatal(f'Compile error for output validator {val}', msg)
             except run.ProgramError as e:
-                self.error(e)
+                self.error(str(e))
 
         # Only sanity check output validators if they all actually compiled
         if self._check_res:
-            flags = self._problem.config.get('validator_flags')
+            flags = self.problem.metadata.legacy_validator_flags
 
             fd, file_name = tempfile.mkstemp()
             os.close(fd)
-            for (desc, case) in _JUNK_CASES:
-                f = open(file_name, "wb")
+            for desc, case in _JUNK_CASES:
+                f = open(file_name, 'wb')
                 f.write(case)
                 f.close()
                 rejected = False
-                for testcase in self._problem.testdata.get_all_testcases():
+                for testcase in self.problem.testdata.get_all_testcases():
                     result = self.validate(testcase, file_name)
                     if result.verdict != 'AC':
                         rejected = True
@@ -1444,44 +1287,49 @@ def check(self, args):
         return self._check_res
 
     @staticmethod
-    def __get_feedback(feedback_dir):
+    def _get_feedback(feedback_dir: str) -> str | None:
         all_feedback = []
         for feedback_file in os.listdir(feedback_dir):
             feedback_path = os.path.join(feedback_dir, feedback_file)
             if os.path.getsize(feedback_path) == 0:
                 continue
             all_feedback.append(f'=== {feedback_file}: ===')
-            # FIXME handle feedback files containing non-text
-            with open(feedback_path, 'r') as feedback:
+            # Note: The file could contain non-unicode characters, "replace" to be on the safe side
+            with open(feedback_path, 'r', errors='replace') as feedback:
                 # Cap amount of feedback per file at some high-ish
                 # size, so that a buggy validator spewing out lots of
                 # data doesn't kill us.
-                all_feedback.append(feedback.read(128*1024))
+                all_feedback.append(feedback.read(128 * 1024))
         if all_feedback:
             return '\n'.join(all_feedback)
         return None
 
-
-    def _parse_validator_results(self, val, status, feedbackdir, testcase):
-        custom_score = self._problem.config.get('grading')['custom_scoring']
+    def _parse_validator_results(self, val, status: int, feedbackdir, testcase: TestCase) -> SubmissionResult:
+        custom_score = self.problem.metadata.legacy_custom_score
         score = None
         # TODO: would be good to have some way of displaying the feedback for debugging uses
         score_file = os.path.join(feedbackdir, 'score.txt')
         if not custom_score and os.path.isfile(score_file):
-            return SubmissionResult('JE', reason='validator produced "score.txt" but problem does not have custom scoring activated')
+            return SubmissionResult(
+                'JE', reason='validator produced "score.txt" but problem does not have custom scoring activated'
+            )
 
         if not os.WIFEXITED(status):
-            return SubmissionResult('JE',
-                                    reason=f'output validator {val} crashed, status {status}',
-                                    additional_info=OutputValidators.__get_feedback(feedbackdir))
+            return SubmissionResult(
+                'JE',
+                reason=f'output validator {val} crashed, status {status}',
+                additional_info=OutputValidators._get_feedback(feedbackdir),
+            )
         ret = os.WEXITSTATUS(status)
         if ret not in [42, 43]:
-            return SubmissionResult('JE',
-                                    reason=f'output validator {val} exited with status {ret}',
-                                    additional_info=OutputValidators.__get_feedback(feedbackdir))
+            return SubmissionResult(
+                'JE',
+                reason=f'output validator {val} exited with status {ret}',
+                additional_info=OutputValidators._get_feedback(feedbackdir),
+            )
 
         if ret == 43:
-            return SubmissionResult('WA', additional_info=OutputValidators.__get_feedback(feedbackdir))
+            return SubmissionResult('WA', additional_info=OutputValidators._get_feedback(feedbackdir))
 
         if custom_score:
             if os.path.isfile(score_file):
@@ -1495,15 +1343,13 @@ def _parse_validator_results(self, val, status, feedbackdir, testcase):
 
         return SubmissionResult('AC', score=score)
 
+    def _actual_validators(self) -> list:
+        if self.uses_default_validator():
+            return [self._default_validator]
+        return self._validators
 
-    def _actual_validators(self):
-        vals = self._validators
-        if self._problem.config.get('validation') == 'default':
-            vals = [self._default_validator]
-        return vals
-
-
-    def validate_interactive(self, testcase, submission, timelim, errorhandler):
+    def validate_interactive(self, testcase: TestCase, submission, timelim: int, errorhandler: Submissions) -> SubmissionResult:
+        # This may be called off-main thread.
         interactive_output_re = r'\d+ \d+\.\d+ \d+ \d+\.\d+ (validator|submission)'
         res = SubmissionResult('JE')
         interactive = run.get_tool('interactive')
@@ -1513,31 +1359,35 @@ def validate_interactive(self, testcase, submission, timelim, errorhandler):
         # file descriptor, wall time lim
         initargs = ['1', str(2 * timelim)]
         validator_args = [testcase.infile, testcase.ansfile, '<feedbackdir>']
-        submission_args = submission.get_runcmd(memlim=self._problem.config.get('limits')['memory'])
+        submission_args = submission.get_runcmd(memlim=self.problem.metadata.limits.memory)
 
-        val_timelim = self._problem.config.get('limits')['validation_time']
-        val_memlim = self._problem.config.get('limits')['validation_memory']
+        val_memlim = self.problem.metadata.limits.validation_memory
         for val in self._actual_validators():
-            if val is not None and val.compile()[0]:
-                feedbackdir = tempfile.mkdtemp(prefix='feedback', dir=self._problem.tmpdir)
+            if val.compile()[0]:
+                feedbackdir = tempfile.mkdtemp(prefix='feedback', dir=self.problem.tmpdir)
                 validator_args[2] = feedbackdir + os.sep
                 f = tempfile.NamedTemporaryFile(delete=False)
                 interactive_out = f.name
                 f.close()
-                i_status, _ = interactive.run(outfile=interactive_out,
-                                              args=initargs + val.get_runcmd(memlim=val_memlim) + validator_args + [';'] + submission_args)
+                i_status, _ = interactive.run(
+                    outfile=interactive_out,
+                    args=initargs + val.get_runcmd(memlim=val_memlim) + validator_args + [';'] + submission_args,
+                    work_dir=submission.path,
+                )
                 if is_RTE(i_status):
                     errorhandler.error(f'Interactive crashed, status {i_status}')
                 else:
                     interactive_output = open(interactive_out).read()
                     errorhandler.debug(f'Interactive output: "{interactive_output}"')
                     if not re.match(interactive_output_re, interactive_output):
-                        errorhandler.error(f'Output from interactive does not follow expected format, got output "{interactive_output}"')
+                        errorhandler.error(
+                            f'Output from interactive does not follow expected format, got output "{interactive_output}"'
+                        )
                     else:
-                        val_status, _, sub_status, sub_runtime, first = interactive_output.split()
-                        sub_status = int(sub_status)
-                        sub_runtime = float(sub_runtime)
-                        val_status = int(val_status)
+                        val_status_str, _, sub_status_str, sub_runtime_str, first = interactive_output.split()
+                        sub_status = int(sub_status_str)
+                        sub_runtime = float(sub_runtime_str)
+                        val_status = int(val_status_str)
                         val_JE = not os.WIFEXITED(val_status) or os.WEXITSTATUS(val_status) not in [42, 43]
                         val_WA = os.WIFEXITED(val_status) and os.WEXITSTATUS(val_status) == 43
                         if val_JE or (val_WA and first == 'validator'):
@@ -1556,29 +1406,50 @@ def validate_interactive(self, testcase, submission, timelim, errorhandler):
                             res = self._parse_validator_results(val, val_status, feedbackdir, testcase)
 
                         res.runtime = sub_runtime
-                        res.validator_first = (first == 'validator')
+                        res.validator_first = first == 'validator'
 
                 os.unlink(interactive_out)
                 shutil.rmtree(feedbackdir)
                 if res.verdict != 'AC':
                     return res
-        # TODO: check that all output validators give same result
         return res
 
-
-    def validate(self, testcase, submission_output):
+    def validate(self, testcase: TestCase, submission_output: str) -> SubmissionResult:
         res = SubmissionResult('JE')
-        val_timelim = self._problem.config.get('limits')['validation_time']
-        val_memlim = self._problem.config.get('limits')['validation_memory']
-        flags = self._problem.config.get('validator_flags').split() + testcase.testcasegroup.config['output_validator_flags'].split()
+        val_timelim = self.problem.metadata.limits.validation_time
+        val_memlim = self.problem.metadata.limits.validation_memory
+        flags = (
+            self.problem.metadata.legacy_validator_flags.split() + testcase.testcasegroup.config['output_validator_flags'].split()
+        )
         for val in self._actual_validators():
-            if val is not None and val.compile()[0]:
-                feedbackdir = tempfile.mkdtemp(prefix='feedback', dir=self._problem.tmpdir)
-                status, runtime = val.run(submission_output,
-                                          args=[testcase.infile, testcase.ansfile, feedbackdir] + flags,
-                                          timelim=val_timelim, memlim=val_memlim)
+            if val.compile()[0]:
+                feedbackdir = tempfile.mkdtemp(prefix='feedback', dir=self.problem.tmpdir)
+                validator_output = tempfile.mkdtemp(prefix='checker_out', dir=self.problem.tmpdir)
+                outfile = validator_output + '/out.txt'
+                errfile = validator_output + '/err.txt'
+                status, runtime = val.run(
+                    submission_output,
+                    args=[testcase.infile, testcase.ansfile, feedbackdir] + flags,
+                    timelim=val_timelim,
+                    memlim=val_memlim,
+                    outfile=outfile,
+                    errfile=errfile,
+                )
+                if self.log.isEnabledFor(logging.DEBUG):
+                    try:
+                        with open(outfile, mode='rt') as f:
+                            output = f.read()
+                        if output:
+                            self.log.debug('Validator output:\n%s', output)
+                        with open(errfile, mode='rt') as f:
+                            error = f.read()
+                        if error:
+                            self.log.debug('Validator stderr:\n%s', error)
+                    except IOError as e:
+                        self.info('Failed to read validator output: %s', e)
                 res = self._parse_validator_results(val, status, feedbackdir, testcase)
                 shutil.rmtree(feedbackdir)
+                shutil.rmtree(validator_output)
                 if res.verdict != 'AC':
                     return res
 
@@ -1586,34 +1457,146 @@ def validate(self, testcase, submission_output):
         return res
 
 
-class Submissions(ProblemAspect):
+class Runner:
+    def __init__(self, problem: Problem, sub, context: Context, timelim: int, timelim_low: int, timelim_high: int) -> None:
+        self._problem = problem
+        self._sub = sub
+        self._context = context
+        self._multithreaded = context.executor is not None
+        self._timelim = timelim
+        self._timelim_low = timelim_low
+        self._timelim_high = timelim_high
+        self._cache: dict[TestCase, TestCase.Result] = {}
+        if self._multithreaded:
+            self._queues: dict[TestCase, queue.Queue[TestCase.Result]] = {}
+            self._lock = threading.Lock()
+            self._started_jobs: set[TestCase] = set()
+            self._done_groups: set[TestCaseGroup] = set()
+            self._remaining_jobs: list[TestCase] = []
+            self._recompute_jobs()
+
+    def __enter__(self) -> Runner:
+        if self._multithreaded:
+            for i in range(len(self._remaining_jobs)):
+                self._context.submit_background_work(self._work)
+        return self
+
+    def __exit__(self, *exc) -> None:
+        if self._multithreaded:
+            with self._lock:
+                self._remaining_jobs = []
+
+    def run(self, testcase: TestCase) -> tuple[TestCase.Result, bool]:
+        while testcase.reuse_result_from:
+            testcase = testcase.reuse_result_from
+
+        if testcase in self._cache:
+            return (self._cache[testcase], True)
+
+        if sys.stdout.isatty():
+            msg = f'Running {self._sub} on {testcase}...'
+            sys.stdout.write(msg)
+            sys.stdout.flush()
+
+        if self._multithreaded:
+            result = self._queues[testcase].get()
+        else:
+            result = self._run_submission_real(testcase)
+
+        if sys.stdout.isatty():
+            sys.stdout.write('\b \b' * len(msg))
+
+        self._cache[testcase] = result
+        return (result, False)
+
+    def mark_group_done(self, group: TestCaseGroup, broken: bool) -> None:
+        if self._multithreaded:
+            self._done_groups.add(group)
+            if broken:
+                # Since a group was broken out of, some test cases may no
+                # longer be relevant to run. Recompute the work list.
+                self._recompute_jobs()
+
+    def _run_submission_real(self, item: TestCase) -> TestCase.Result:
+        return item.run_submission_real(self._sub, self._context, self._timelim, self._timelim_low, self._timelim_high)
+
+    def _work(self) -> None:
+        item = self._next_job()
+        if item:
+            res = self._run_submission_real(item)
+            self._queues[item].put(res)
+
+    def _gather_testcases(self, item: TestCase | TestCaseGroup) -> list[TestCase]:
+        if not item.matches_filter(self._context.data_filter):
+            return []
+        if isinstance(item, TestCase):
+            if item.reuse_result_from:
+                return self._gather_testcases(item.reuse_result_from)
+            else:
+                return [item]
+        elif item not in self._done_groups:
+            ret = []
+            for child in item.get_testcases() + item.get_subgroups():
+                ret.extend(self._gather_testcases(child))
+            return ret
+        else:
+            return []
+
+    def _next_job(self) -> TestCase | None:
+        with self._lock:
+            if self._remaining_jobs:
+                job = self._remaining_jobs.pop()
+                self._started_jobs.add(job)
+                return job
+            else:
+                return None
+
+    def _recompute_jobs(self) -> None:
+        with self._lock:
+            seen = set(self._started_jobs)
+            self._remaining_jobs = []
+            for testcase in self._gather_testcases(self._problem.testdata):
+                if testcase not in seen:
+                    seen.add(testcase)
+                    self._remaining_jobs.append(testcase)
+                    if testcase not in self._queues:
+                        self._queues[testcase] = queue.Queue(maxsize=1)
+            self._remaining_jobs.reverse()
+
+
+class Submissions(ProblemPart):
     _SUB_REGEXP = re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_.-]*[a-zA-Z0-9](\.c\+\+)?$')
     # (verdict, directory, required)
-    _VERDICTS = [
-        ['AC', 'accepted', True],
-        ['PAC', 'partially_accepted', False],
-        ['WA', 'wrong_answer', False],
-        ['RTE', 'run_time_error', False],
-        ['TLE', 'time_limit_exceeded', False],
+    _VERDICTS: list[tuple[Verdict, str, bool]] = [
+        ('AC', 'accepted', True),
+        ('PAC', 'partially_accepted', False),
+        ('WA', 'wrong_answer', False),
+        ('RTE', 'run_time_error', False),
+        ('TLE', 'time_limit_exceeded', False),
     ]
 
-    def __init__(self, problem):
+    PART_NAME = 'submission'
+
+    def setup(self):
         self._submissions = {}
-        self._problem = problem
-        srcdir = os.path.join(problem.probdir, 'submissions')
+        srcdir = os.path.join(self.problem.probdir, 'submissions')
         for verdict in Submissions._VERDICTS:
             acr = verdict[0]
-            self._submissions[acr] = run.find_programs(os.path.join(srcdir, verdict[1]),
-                                                       language_config=problem.language_config,
-                                                       pattern=Submissions._SUB_REGEXP,
-                                                       work_dir=problem.tmpdir,
-                                                       include_dir=os.path.join(problem.probdir,
-                                                                                    'include'))
-
-    def __str__(self):
+            self._submissions[acr] = run.find_programs(
+                os.path.join(srcdir, verdict[1]),
+                language_config=self.problem.language_config,
+                pattern=Submissions._SUB_REGEXP,
+                work_dir=self.problem.tmpdir,
+                include_dir=os.path.join(self.problem.probdir, 'include'),
+            )
+        return {}
+
+    def __str__(self) -> str:
         return 'submissions'
 
-    def check_submission(self, sub, args, expected_verdict, timelim, timelim_low, timelim_high):
+    def check_submission(
+        self, sub, context: Context, expected_verdict: Verdict, timelim: int, timelim_low: int, timelim_high: int
+    ) -> SubmissionResult:
         desc = f'{expected_verdict} submission {sub}'
         partial = False
         if expected_verdict == 'PAC':
@@ -1624,23 +1607,28 @@ def check_submission(self, sub, args, expected_verdict, timelim, timelim_low, ti
         else:
             timelim_low = timelim
 
-        result, result_low, result_high = self._problem.testdata.run_submission(sub, args, timelim, timelim_low, timelim_high)
+        with Runner(self.problem, sub, context, timelim, timelim_low, timelim_high) as runner:
+            result, result_low, result_high = self.problem.testdata.run_submission(sub, runner, context)
 
         if result.verdict == 'AC' and expected_verdict == 'AC' and not partial and result.sample_failures:
             res = result.sample_failures[0]
             self.warning(f'{desc} got {res.verdict} on sample: {res}')
 
         if result_low.verdict != result_high.verdict or result_low.score != result_high.score:
-            r1, r2 = (result_low, result_high) if result_low.verdict == result_high.verdict else (result_low.verdict, result_high.verdict)
-            self.warning(f'{desc} sensitive to time limit: limit of {timelim_low} secs -> {r1}, limit of {timelim_high} secs -> {r2}')
+            r1, r2 = (
+                (result_low, result_high)
+                if result_low.verdict == result_high.verdict
+                else (result_low.verdict, result_high.verdict)
+            )
+            self.warning(
+                f'{desc} sensitive to time limit: limit of {timelim_low} secs -> {r1}, limit of {timelim_high} secs -> {r2}'
+            )
 
         if partial and self.fully_accepted(result):
             self.warning(f'{desc} got {result}')
         elif result.verdict == expected_verdict:
             self.msg(f'   {desc} OK: {result}')
-            if (expected_verdict == 'AC' and not partial
-                    and not self.fully_accepted(result)
-                    and self.full_score_finite()):
+            if expected_verdict == 'AC' and not partial and not self.fully_accepted(result) and self.full_score_finite():
                 # For some heuristic problems, this is expected. Thus, only warn.
                 self.warning(f'{desc} did not attain full score (consider moving it to partially_accepted)')
         elif result_high.verdict == expected_verdict and not (partial and self.fully_accepted(result_high)):
@@ -1650,35 +1638,43 @@ def check_submission(self, sub, args, expected_verdict, timelim, timelim_low, ti
 
         return result
 
-    def full_score_finite(self):
-        min_score, max_score = self._problem.testdata.get_score_range()
-        if self._problem.config.get('grading')['objective'] == 'min':
-            return min_score != -float('inf')
+    def full_score_finite(self) -> bool:
+        min_score, max_score = self.problem.testdata.get_score_range()
+        if self.problem.metadata.legacy_grading.objective == 'min':
+            return min_score != float('-inf')
         else:
             return max_score != float('inf')
 
-    def fully_accepted(self, result):
-        min_score, max_score = self._problem.testdata.get_score_range()
-        best_score = min_score if self._problem.config.get('grading')['objective'] == 'min' else max_score
-        return result.verdict == 'AC' and (not self._problem.is_scoring or result.score == best_score)
+    def fully_accepted(self, result: SubmissionResult) -> bool:
+        min_score, max_score = self.problem.testdata.get_score_range()
+        best_score = min_score if self.problem.metadata.legacy_grading.objective == 'min' else max_score
+        return result.verdict == 'AC' and (not self.problem.is_scoring() or result.score == best_score)
 
-    def check(self, args):
+    def start_background_work(self, context: Context) -> None:
+        # Send off an early background compile job for each submission and
+        # validator, to avoid a bottleneck step at the start of each test run.
+        self.problem.output_validators.start_background_work(context)
+        for acr in self._submissions:
+            for sub in self._submissions[acr]:
+                context.submit_background_work(lambda s: s.compile(), sub)
+
+    def check(self, context: Context) -> bool:
         if self._check_res is not None:
             return self._check_res
         self._check_res = True
 
-        limits = self._problem.config.get('limits')
-        time_multiplier = limits['time_multiplier']
-        safety_margin = limits['time_safety_margin']
+        limits = self.problem.metadata.limits
+        time_multiplier = limits.time_multipliers.ac_to_time_limit
+        safety_margin = limits.time_multipliers.time_limit_to_tle
 
         timelim_margin_lo = 300  # 5 minutes
         timelim_margin = 300
         timelim = 300
 
-        if 'time_for_AC_submissions' in limits:
-            timelim = timelim_margin = limits['time_for_AC_submissions']
-        if args.fixed_timelim is not None:
-            timelim = args.fixed_timelim
+        if limits.time_limit is not None:
+            timelim = timelim_margin = int(limits.time_limit)  # TODO: Support non-integer time limits
+        if context.fixed_timelim is not None:
+            timelim = context.fixed_timelim
             timelim_margin = int(round(timelim * safety_margin))
 
         for verdict in Submissions._VERDICTS:
@@ -1689,11 +1685,14 @@ def check(self, args):
             runtimes = []
 
             for sub in self._submissions[acr]:
-                if args.submission_filter.search(os.path.join(verdict[1], sub.name)):
+                sub_name = sub.name  # type: ignore
+                if context.submission_filter.search(os.path.join(verdict[1], sub_name)):
                     self.info(f'Check {acr} submission {sub}')
 
-                    if sub.code_size() > 1024*limits['code']:
-                        self.error(f'{acr} submission {sub} has size {sub.code_size() / 1024.0:.1f} kiB, exceeds code size limit of {limits["code"]} kiB')
+                    if sub.code_size() > 1024 * limits.code:
+                        self.error(
+                            f'{acr} submission {sub} has size {sub.code_size() / 1024.0:.1f} kiB, exceeds code size limit of {limits.code} kiB'
+                        )
                         continue
 
                     success, msg = sub.compile()
@@ -1701,109 +1700,237 @@ def check(self, args):
                         self.error(f'Compile error for {acr} submission {sub}', additional_info=msg)
                         continue
 
-                    res = self.check_submission(sub, args, acr, timelim, timelim_margin_lo, timelim_margin)
+                    res = self.check_submission(sub, context, acr, timelim, timelim_margin_lo, timelim_margin)
                     runtimes.append(res.runtime)
 
             if acr == 'AC':
                 if len(runtimes) > 0:
                     max_runtime = max(runtimes)
                     exact_timelim = max_runtime * time_multiplier
-                    max_runtime = f'{max_runtime:.3f}'
-                    timelim = max(1, int(0.5 + exact_timelim))
+                    max_runtime_str = f'{max_runtime:.3f}'
+                    timelim = max(1, int(0.5 + exact_timelim))  # TODO: properly support 2023-07 time limit computation
                     timelim_margin_lo = max(1, min(int(0.5 + exact_timelim / safety_margin), timelim - 1))
-                    timelim_margin = max(timelim + 1,
-                                         int(0.5 + exact_timelim * safety_margin))
+                    timelim_margin = max(timelim + 1, int(0.5 + exact_timelim * safety_margin))
                 else:
-                    max_runtime = None
-                if args.fixed_timelim is not None and args.fixed_timelim != timelim:
-                    self.msg(f"   Solutions give timelim of {timelim} seconds, but will use provided fixed limit of {args.fixed_timelim} seconds instead")
-                    timelim = args.fixed_timelim
-                    timelim_margin = timelim * safety_margin
-
-                self.msg(f"   Slowest AC runtime: {max_runtime}, setting timelim to {timelim} secs, safety margin to {timelim_margin} secs")
-            limits['time'] = timelim
+                    max_runtime_str = None
+                if context.fixed_timelim is not None and context.fixed_timelim != timelim:
+                    self.msg(
+                        f'   Solutions give timelim of {timelim} seconds, but will use provided fixed limit of {context.fixed_timelim} seconds instead'
+                    )
+                    timelim = context.fixed_timelim
+                    timelim_margin = round(timelim * safety_margin)
+
+                self.msg(
+                    f'   Slowest AC runtime: {max_runtime_str}, setting timelim to {timelim} secs, safety margin to {timelim_margin} secs'
+                )
+                self.problem._set_timelim(timelim)
 
         return self._check_res
 
-PROBLEM_PARTS = ['config', 'statement', 'validators', 'graders', 'generators', 'data', 'submissions']
+
+PROBLEM_PARTS = ['config', 'data', 'graders', 'statement', 'submissions', 'validators']
+
 
 class Problem(ProblemAspect):
-    def __init__(self, probdir):
+    """Represents a checkable problem"""
+
+    def __init__(self, probdir: str, args: argparse.Namespace):
         self.probdir = os.path.realpath(probdir)
-        self.shortname = os.path.basename(self.probdir)
+        self.shortname: str = os.path.basename(self.probdir)
+        super().__init__(self.shortname, self)
         self.language_config = languages.load_language_config()
+        self.testcase_by_infile: dict[str, TestCase] = {}
+        self.loaded = False
+        self._metadata: metadata.Metadata | None = None
+        self._args = args
+        self._timelim: float | None = None
+
+    # Unfortunately must be before metadata, otherwise mypy gets confused about the type metadata.Metadata (feels like a bug)
+    def _set_metadata(self, metadata: metadata.Metadata) -> None:  # Should only be called by ProblemConfig
+        assert self._metadata is None, 'Attempted to set metadata twice'
+        self._metadata = metadata
+
+    @property
+    def metadata(self) -> metadata.Metadata:
+        assert self._metadata is not None, 'Attempted to access config before it was set. load() or check() first.'
+        return self._metadata
+
+    @property
+    def timelim(self) -> float:
+        assert self._timelim is not None, 'Attempted to access timelim before it was set. check() first.'
+        return self._timelim
+
+    def _set_timelim(self, timelim: float) -> None:  # Should only be called by Submissions
+        assert self._timelim is None, 'Attempted to set timelim twice'
+        self._timelim = timelim
+
+    def is_pass_fail(self) -> bool:
+        return self.metadata.is_pass_fail()
+
+    def is_scoring(self) -> bool:
+        return self.metadata.is_scoring()
+
+    def is_interactive(self) -> bool:
+        return self.metadata.is_interactive()
+
+    def is_multi_pass(self) -> bool:
+        return self.metadata.is_multi_pass()
+
+    def is_submit_answer(self) -> bool:
+        return self.metadata.is_submit_answer()
+
+    def load(self) -> None:
+        """Parses the problem package statically, loading up information with very little verification.
+
+        Call this if you want to get a usable Problem object without expensive
+        steps (such as compiling validators, and testing submissions).
+
+        N.B., This api is EXPERIMENTAL. We eventually want to create a stable
+        API from problemtools, this is a first move in that direction.
+
+        Raises:
+            VerifyError: if problem package is too broken to parse safely
+        """
+
+        if self.loaded:
+            return
 
-    def __enter__(self):
-        self.tmpdir = tempfile.mkdtemp(prefix=f'verify-{self.shortname}-')
         if not os.path.isdir(self.probdir):
-            self.error(f"Problem directory '{self.probdir}' not found")
-            self.shortname = None
-            return self
+            self.fatal(f"Problem directory '{self.probdir}' not found")
 
+        try:
+            self.format = get_format_version(Path(self.probdir))
+        except Exception as e:
+            self.fatal(f'Failed loading problem version: {e}')
+        self.config = ProblemConfig(self)  # Populates self.metadata as a side effect. Needs to run first.
         self.statement = ProblemStatement(self)
         self.attachments = Attachments(self)
-        self.config = ProblemConfig(self)
-        available_languages = self.config.get('languages')
-        if 'all' not in available_languages:
-            language_config = languages.Languages()
-            for lang_id in available_languages:
-                lang_spec = self.language_config.get(lang_id)
-                if lang_spec is not None:
-                    language_config.update({lang_id: self.language_config.get(lang_id)})
-            self.language_config = language_config
-
-        self.is_interactive = 'interactive' in self.config.get('validation-params')
-        self.is_scoring = (self.config.get('type') == 'scoring')
-        self.input_format_validators = InputFormatValidators(self)
+        self.input_validators = InputValidators(self)
         self.output_validators = OutputValidators(self)
         self.graders = Graders(self)
-        self.testcase_by_infile = {}
         self.testdata = TestCaseGroup(self, os.path.join(self.probdir, 'data'))
         self.submissions = Submissions(self)
-        self.generators = Generators(self)
+        self.loaded = True
+
+    def __enter__(self) -> Problem:
+        self.tmpdir = tempfile.mkdtemp(prefix=f'verify-{self.shortname}-')
         return self
 
-    def __exit__(self, exc_type, exc_value, exc_traceback):
+    def __exit__(self, exc_type, exc_value, exc_traceback) -> None:
         shutil.rmtree(self.tmpdir)
 
-    def __str__(self):
-        return self.shortname
+    def __str__(self) -> str:
+        return str(self.shortname)
 
-    def check(self, args=None):
-        if self.shortname is None:
-            return [1, 0]
-        if args is None:
-            args = default_args()
+    def check(self) -> tuple[int, int]:
+        """Loads and checks the problem package
 
-        ProblemAspect.errors = 0
-        ProblemAspect.warnings = 0
-        ProblemAspect.bail_on_error = args.bail_on_error
-        ProblemAspect.consider_warnings_errors = args.werror
+        Loads the problem package and runs checks. After this has completed,
+        the Problem object is fully populated. You do not need to manually
+        run load() first.
 
+        Returns:
+            Tuple with the number of errors, warnings found.
+
+        Raises:
+            VerifyError: if problem package is too broken to parse safely
+        """
         try:
-            part_mapping = {'config': [self.config],
-                            'statement': [self.statement, self.attachments],
-                            'validators': [self.input_format_validators, self.output_validators],
-                            'graders': [self.graders],
-                            'data': [self.testdata],
-                            'submissions': [self.submissions],
-                            'generators': [self.generators]}
+            self.load()
+        except VerifyError:
+            return self.errors, self.warnings
+
+        executor = ThreadPoolExecutor(self._args.threads) if self._args.threads > 1 else None
+        context = Context(self._args, executor)
+
+        try:
+            part_mapping: dict[str, list] = {
+                'config': [self.config],
+                'statement': [self.statement, self.attachments],
+                'validators': [self.input_validators, self.output_validators],
+                'graders': [self.graders],
+                'data': [self.testdata],
+                'submissions': [self.submissions],
+            }
+            assert sorted(part_mapping.keys()) == sorted(PROBLEM_PARTS), 'part_mapping and PROBLEM_PARTS must be kept in sync'
 
             if not re.match('^[a-z0-9]+$', self.shortname):
                 self.error(f"Invalid shortname '{self.shortname}' (must be [a-z0-9]+)")
+            if self.format is FormatVersion.V_2023_07:
+                self.warning(f'Support for version {self.format} is very incomplete. Verification may not work as expected.')
+
+            self._check_symlinks()
+            self._check_file_and_directory_names()
 
             run.limit.check_limit_capabilities(self)
 
-            for part in args.parts:
+            parts = [
+                part for part in part_mapping if part in self._args.parts
+            ]  # Parts from _args in the order they appear in part_mapping
+            if executor:
+                for part in parts:
+                    for item in part_mapping[part]:
+                        item.start_background_work(context)
+
+            for part in parts:
                 self.msg(f'Checking {part}')
                 for item in part_mapping[part]:
-                    item.check(args)
+                    item.check(context)
         except VerifyError:
             pass
-        return [ProblemAspect.errors, ProblemAspect.warnings]
-
+        finally:
+            # Wait for background work to finish before performing an rmtree on
+            # the directory tree it uses.
+            context.wait_for_background_work()
+        return self.errors, self.warnings
 
-def re_argument(s):
+    def _check_symlinks(self):
+        """Check that all symlinks point to something existing within the problem package"""
+        probdir = os.path.realpath(self.probdir)
+        for root, dirs, files in os.walk(probdir):
+            for file in dirs + files:
+                filename = os.path.join(root, file)
+                if os.path.islink(filename):
+                    target = os.path.realpath(filename)
+                    # relfile is the filename of the symlink, relative to the problem root (only used for nicer error messages)
+                    relfile = os.path.relpath(filename, self.probdir)
+                    # reltarget is what the symlink points to (absolute, or relative to where the symlink is)
+                    reltarget = os.readlink(filename)
+                    if not os.path.exists(target):
+                        self.error(f'Symlink {relfile} links to {reltarget} which does not exist')
+                    if os.path.commonpath([probdir, target]) != probdir:
+                        self.error(f'Symlink {relfile} links to {reltarget} which is outside of problem package')
+                    if os.path.isabs(reltarget):
+                        self.error(
+                            f'Symlink {relfile} links to {reltarget} which is an absolute path. Symlinks must be relative.'
+                        )
+
+    def _check_file_and_directory_names(self):
+        filename_regex = re.compile(r'^[a-z0-9][a-z0-9_.-]{0,253}[a-z0-9]$', re.I)
+        directory_regex = re.compile(r'^[a-z0-9]([a-z0-9_-]{0,253}[a-z0-9])?$', re.I)
+        for root, dirs, files in os.walk(self.probdir):
+            # Path of the directory we're in, starting with problem shortname. Only used for nicer error messages.
+            reldir = os.path.relpath(root, os.path.dirname(self.probdir))
+            for file in files:
+                if not filename_regex.match(file):
+                    self.error(f"Invalid file name '{file}' in {reldir} (should match {filename_regex.pattern} ignoring case)")
+            for directory in dirs:
+                if not directory_regex.match(directory):
+                    self.error_in_2023_07(
+                        f"Invalid directory name '{directory}' in {reldir} (should match {directory_regex.pattern} ignoring case)"
+                    )
+
+    def bail_on_error(self) -> bool:
+        return self._args.bail_on_error
+
+    def consider_warnings_errors(self) -> bool:
+        return self._args.werror
+
+    def max_additional_info(self) -> int:
+        return self._args.max_additional_info
+
+
+def re_argument(s: str) -> Pattern[str]:
     try:
         r = re.compile(s)
         return r
@@ -1811,41 +1938,64 @@ def re_argument(s):
         raise argparse.ArgumentTypeError(f'{s} is not a valid regex')
 
 
-def part_argument(s):
+def part_argument(s: str) -> str:
     if s not in PROBLEM_PARTS:
-        raise argparse.ArgumentTypeError(f"Invalid problem part specified: {s}")
+        raise argparse.ArgumentTypeError(f'Invalid problem part specified: {s}')
     return s
 
 
-def argparser_basic_arguments(parser):
-    parser.add_argument('-b', '--bail_on_error',
-                        action='store_true',
-                        help='bail verification on first error')
-    parser.add_argument('-l', '--log_level',
-                        default='warning',
-                        help='set log level (debug, info, warning, error, critical)')
-    parser.add_argument('-e', '--werror',
-                        action='store_true',
-                        help='consider warnings as errors')
-    parser.add_argument('--max_additional_info',
-                        type=int, default=15,
-                        help='maximum number of lines of additional info (e.g. compiler output or validator feedback) to display about an error (set to 0 to disable additional info)')
+def argparser_basic_arguments(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument('-b', '--bail_on_error', action='store_true', help='bail verification on first error')
+    parser.add_argument('-l', '--log_level', default='warning', help='set log level (debug, info, warning, error, critical)')
+    parser.add_argument('-e', '--werror', action='store_true', help='consider warnings as errors')
+    parser.add_argument(
+        '--max_additional_info',
+        type=int,
+        default=15,
+        help='maximum number of lines of additional info (e.g. compiler output or validator feedback) to display about an error (set to 0 to disable additional info)',
+    )
 
 
-def argparser():
+def argparser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser(description='Validate a problem package in the Kattis problem format.')
-    parser.add_argument('-s', '--submission_filter', metavar='SUBMISSIONS',
-                        type=re_argument, default=re.compile('.*'),
-                        help='run only submissions whose name contains this regex.  The name includes category (accepted, wrong_answer, etc), e.g. "accepted/hello.java" (for a single file submission) or "wrong_answer/hello" (for a directory submission)')
-    parser.add_argument('-d', '--data_filter', metavar='DATA',
-                        type=re_argument, default=re.compile('.*'),
-                        help='use only data files whose name contains this regex.  The name includes path relative to the data directory but not the extension, e.g. "sample/hello" for a sample data file')
-    parser.add_argument('-t', '--fixed_timelim',
-                        type=int,
-                        help='use this fixed time limit (useful in combination with -d and/or -s when all AC submissions might not be run on all data)')
-    parser.add_argument('-p', '--parts', metavar='PROBLEM_PART',
-                        type=part_argument, nargs='+', default=PROBLEM_PARTS,
-                        help=f'only test the indicated parts of the problem.  Each PROBLEM_PART can be one of {PROBLEM_PARTS}.')
+    parser.add_argument(
+        '-s',
+        '--submission_filter',
+        metavar='SUBMISSIONS',
+        type=re_argument,
+        default=re.compile('.*'),
+        help='run only submissions whose name contains this regex.  The name includes category (accepted, wrong_answer, etc), e.g. "accepted/hello.java" (for a single file submission) or "wrong_answer/hello" (for a directory submission)',
+    )
+    parser.add_argument(
+        '-d',
+        '--data_filter',
+        metavar='DATA',
+        type=re_argument,
+        default=re.compile('.*'),
+        help='use only data files whose name contains this regex.  The name includes path relative to the data directory but not the extension, e.g. "sample/hello" for a sample data file',
+    )
+    parser.add_argument(
+        '-t',
+        '--fixed_timelim',
+        type=int,
+        help='use this fixed time limit (useful in combination with -d and/or -s when all AC submissions might not be run on all data)',
+    )
+    parser.add_argument(
+        '-p',
+        '--parts',
+        metavar='PROBLEM_PART',
+        type=part_argument,
+        nargs='+',
+        default=PROBLEM_PARTS,
+        help=f'only test the indicated parts of the problem.  Each PROBLEM_PART can be one of {PROBLEM_PARTS}.',
+    )
+    parser.add_argument(
+        '-j',
+        '--threads',
+        type=int,
+        default=1,
+        help='run validation using multiple threads. This will make timings less reliable, but can be convenient during development',
+    )
 
     argparser_basic_arguments(parser)
 
@@ -1853,34 +2003,35 @@ def argparser():
     return parser
 
 
-def default_args():
-    return argparser().parse_args([None])
-
+def initialize_logging(args: argparse.Namespace) -> None:
+    fmt = '%(log_color)s%(levelname)s %(message)s'
+    colorlog.basicConfig(stream=sys.stdout, format=fmt, level=getattr(logging, args.log_level.upper()))
 
-def initialize_logging(args):
-    ProblemAspect.max_additional_info = args.max_additional_info
 
-    fmt = "%(levelname)s %(message)s"
-    logging.basicConfig(stream=sys.stdout,
-                        format=fmt,
-                        level=eval(f"logging.{args.log_level.upper()}"))
-
-
-def main():
+def main() -> None:
     args = argparser().parse_args()
 
     initialize_logging(args)
 
     total_errors = 0
-    for problemdir in args.problemdir:
-        print(f'Loading problem {os.path.basename(os.path.realpath(problemdir))}')
-        with Problem(problemdir) as prob:
-            [errors, warnings] = prob.check(args)
-            p = lambda x: '' if x == 1 else 's'
-            print(f'{prob.shortname} tested: {errors} error{p(errors)}, {warnings} warning{p(warnings)}')
-            total_errors += errors
-
-    sys.exit(1 if total_errors > 0 else 0)
+    try:
+        for problemdir in args.problemdir:
+            print(f'Loading problem {os.path.basename(os.path.realpath(problemdir))}')
+            with Problem(problemdir, args) as prob:
+                errors, warnings = prob.check()
+
+                def p(x: int) -> str:
+                    return '' if x == 1 else 's'
+
+                print(f'{prob.shortname} tested: {errors} error{p(errors)}, {warnings} warning{p(warnings)}')
+                total_errors += errors
+
+    except KeyboardInterrupt:
+        print('\naborting...')
+    finally:
+        if total_errors > 0:
+            sys.exit(1)
+
 
 if __name__ == '__main__':
     main()
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..3a99cedc
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,51 @@
+[build-system]
+requires = ["setuptools >= 77.0.0", "setuptools-scm"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "problemtools"
+authors = [
+    {name = "Kattis AB", email = "contact@kattis.com"},
+]
+description = "Kattis Problem Tools"
+readme = "README.md"
+license = "MIT"
+keywords = ["kattis", "problemtools", "icpc", "clics"]
+requires-python = ">= 3.11"
+
+dependencies = [
+    "colorlog",
+    "nh3",
+    "PyYAML",
+    "plasTeX>=3.0",
+    "pydantic>=2.11",
+]
+dynamic = [ "version" ]
+
+[project.scripts]
+verifyproblem = "problemtools.verifyproblem:main"
+problem2html = "problemtools.problem2html:main"
+problem2pdf = "problemtools.problem2pdf:main"
+
+[project.urls]
+Repository = "https://github.com/Kattis/problemtools"
+Homepage = "https://www.kattis.com"
+Documentation = "https://www.kattis.com/problem-package-format/"
+Issues = "https://github.com/Kattis/problemtools/issues/"
+
+[tool.setuptools]
+include-package-data = true
+
+[tool.setuptools.packages.find]
+include = ["problemtools", "problemtools.*"]
+
+[tool.setuptools_scm]
+version_file = "problemtools/_version.py"
+local_scheme = "no-local-version"
+
+[tool.ruff]
+line-length = 130
+exclude = [ "examples" ]
+
+[tool.ruff.format]
+quote-style = "single"
diff --git a/requirements.txt b/requirements.txt
index d6e1198b..6e73c8a7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,5 @@
--e .
+colorlog
+nh3
+PyYAML
+plasTeX>=3.0
+pydantic>=2.11
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 68d7b004..00000000
--- a/setup.cfg
+++ /dev/null
@@ -1,5 +0,0 @@
-[aliases]
-# Temporarily disabled until we can make it play well with debhelper
-# pybuild (issue is that pytest-runner is not available in
-# deb-packaged form from standard repos)
-#test=pytest
diff --git a/setup.py b/setup.py
index a10457e0..5cda9d11 100755
--- a/setup.py
+++ b/setup.py
@@ -1,96 +1,74 @@
 #!/usr/bin/env python3
 
-from setuptools import setup, find_packages
-from setuptools.command.bdist_egg import bdist_egg as _bdist_egg
-import distutils.cmd
-from distutils.command.build import build as _build
 import os
 import subprocess
 
+import setuptools
+import setuptools.command.build
+import setuptools.command.sdist
 
-class BuildSupport(distutils.cmd.Command):
+
+class BuildSupport(setuptools.Command):
     """A custom command to build the support programs."""
 
     description = 'build the problemtools support programs'
 
-    def initialize_options(self):
-        pass
+    build_lib: str | None
 
-    def finalize_options(self):
-        pass
+    def initialize_options(self) -> None:
+        self.build_lib = None
+
+    def finalize_options(self) -> None:
+        self.set_undefined_options('build_py', ('build_lib', 'build_lib'))
 
     def run(self):
-        """Run command."""
-        # FIXME this seems very fragile...
-        dest = os.path.join(os.path.realpath(self.distribution.command_obj['build'].build_lib),
-                            'problemtools', 'support')
+        dest = os.path.join(os.path.realpath(self.build_lib), 'problemtools', 'support')
         command = ['make', '-C', 'support', 'install', 'DESTDIR=%s' % dest]
-        self.announce('Running command: %s' % ' '.join(command), level=distutils.log.INFO)
         subprocess.check_call(command)
 
 
-class bdist_egg(_bdist_egg):
-    """Updated bdist_egg command that also builds support."""
+class CheckoutChecktestdata(setuptools.Command):
+    """A custom command to build the support programs."""
 
-    def run(self):
-        self.run_command('build_support')
-        _bdist_egg.run(self)
+    description = 'checkout the git submodule for checktestdata (via make)'
 
+    def initialize_options(self) -> None:
+        pass
 
-class build(_build):
-    """Updated build command that also builds support."""
+    def finalize_options(self) -> None:
+        pass
 
+    def run(self):
+        command = ['make', 'checktestdata']
+        subprocess.check_call(command)
+
+
+# It's *very* unclear from setuptools' documentation what the best way to do this is.
+#
+# I think that the ideal way would be to insert BuildSupport as a SubCommand
+# (https://setuptools.pypa.io/en/latest/userguide/extension.html), but I cannot find
+# any documented way to inject a new subcommand (aside from overwriting one of
+# the existing `build_*`, but those are only run conditionally).
+class build(setuptools.command.build.build):
     def run(self):
         self.run_command('build_support')
-        _build.run(self)
+        super().run()
 
 
-def get_version():
-    base_dir = os.path.dirname(__file__)
+# To make python -m build work from a fresh checkout, we also need to hook sdist to
+# do a git submodule checkout so that the source code for checktestdata is included
+# in the sdist (an alternative approach would be to include .git in the sdist (eww).
+class sdist(setuptools.command.sdist.sdist):
+    def run(self):
+        self.run_command('checkout_checktestdata')
+        super().run()
 
-    __version__ = None
-    try:
-        update_script = os.path.join(base_dir, 'admin', 'update_version.py.sh')
-        __version__ = subprocess.check_output([update_script]).decode('utf-8').strip()
-    except:
-        pass
 
-    if __version__ is None:
-        version_file = os.path.join(base_dir, 'problemtools', '_version.py')
-        with open(version_file, 'r') as version_in:
-            exec(version_in.read())
-
-    return __version__
-
-
-setup(name='problemtools',
-      version=get_version(),
-      description='Kattis Problem Tools',
-      maintainer='Per Austrin',
-      maintainer_email='austrin@kattis.com',
-      url='https://github.com/Kattis/problemtools',
-      license='MIT',
-      packages=find_packages(),
-      entry_points = {
-          'console_scripts': [
-              'verifyproblem=problemtools.verifyproblem:main',
-              'problem2html=problemtools.problem2html:main',
-              'problem2pdf=problemtools.problem2pdf:main',
-              'generatedata=problemtools.generatedata:main',
-          ]
-      },
-      include_package_data=True,
-      install_requires=[
-          'PyYAML',
-          'plasTeX>=3.0;python_version>="3"'
-      ],
-#      Temporarily disabled, see setup.cfg
-#      For now tests can be run manually with pytest
-#      setup_requires=['pytest-runner'],
-#      tests_require=['pytest'],
-      cmdclass={
-          'build_support': BuildSupport,
-          'bdist_egg': bdist_egg,
-          'build': build
-      },
+setuptools.setup(
+    cmdclass={
+        'build_support': BuildSupport,
+        'build': build,
+        'checkout_checktestdata': CheckoutChecktestdata,
+        'sdist': sdist,
+    },
 )
diff --git a/support/default_validator/default_validator.cc b/support/default_validator/default_validator.cc
index fe1390c2..33416666 100644
--- a/support/default_validator/default_validator.cc
+++ b/support/default_validator/default_validator.cc
@@ -14,14 +14,13 @@ const int EXIT_WA = 43;
 std::ifstream judgein, judgeans;
 FILE *judgemessage = NULL;
 FILE *diffpos = NULL;
-int judgeans_pos, stdin_pos;
-int judgeans_line, stdin_line;
+int judgeans_pos = 0, stdin_pos = 0;
+int judgeans_line = 1, stdin_line = 1;
 
 void wrong_answer(const char *err, ...) {
 	va_list pvar;
 	va_start(pvar, err);
-	fprintf(judgemessage, "Wrong answer on line %d of output (corresponding to line %d in answer file)\n",
-			stdin_line, judgeans_line);
+	fprintf(judgemessage, "Wrong answer on line %d of output (corresponding to line %d in answer file)\n", stdin_line, judgeans_line);
 	vfprintf(judgemessage, err, pvar);
 	fprintf(judgemessage, "\n");
 	if (diffpos) {
@@ -65,10 +64,10 @@ FILE *openfeedback(const char *feedbackdir, const char *feedback, const char *wh
 	return res;
 }
 
-const char *USAGE = "Usage: %s judge_in judge_ans feedback_file [options] < team_out";
+const char *USAGE = "Usage: %s judge_in judge_ans feedback_file [options] < user_out";
 
 int main(int argc, char **argv) {
-	if(argc < 4) {
+	if (argc < 4) {
 		judge_error(USAGE, argv[0]);
 	}
 	judgemessage = openfeedback(argv[3], "judgemessage.txt", argv[0]);
@@ -88,16 +87,19 @@ int main(int argc, char **argv) {
 		} else if (!strcmp(argv[a], "space_change_sensitive")) {
 			space_change_sensitive = true;
 		} else if (!strcmp(argv[a], "float_absolute_tolerance")) {
-			if (a+1 == argc || !isfloat(argv[a+1], float_abs_tol))
+			if (a+1 == argc || !isfloat(argv[a+1], float_abs_tol)) {
 				judge_error(USAGE, argv[0]);
+			}
 			++a;
 		} else if (!strcmp(argv[a], "float_relative_tolerance")) {
-			if (a+1 == argc || !isfloat(argv[a+1], float_rel_tol))
+			if (a+1 == argc || !isfloat(argv[a+1], float_rel_tol)) {
 				judge_error(USAGE, argv[0]);
+			}
 			++a;
 		} else if (!strcmp(argv[a], "float_tolerance")) {
-			if (a+1 == argc || !isfloat(argv[a+1], float_rel_tol))
+			if (a+1 == argc || !isfloat(argv[a+1], float_rel_tol)) {
 				judge_error(USAGE, argv[0]);
+			}
 			float_abs_tol = float_rel_tol;
 			++a;
 		} else {
@@ -106,11 +108,8 @@ int main(int argc, char **argv) {
 	}
 	use_floats = float_abs_tol >= 0 || float_rel_tol >= 0;
 
-	judgeans_pos = stdin_pos;
-	judgeans_line = stdin_line = 1;
-   
 	std::string judge, team;
-	while (true) {
+	for (int token = 0; true; token++) {
 		// Space!  Can't live with it, can't live without it...
 		while (isspace(judgeans.peek())) {
 			char c = (char)judgeans.get();
@@ -128,17 +127,32 @@ int main(int argc, char **argv) {
 		while (isspace(std::cin.peek())) {
 			char d = (char)std::cin.get();
 			if (space_change_sensitive) {
-				wrong_answer("Space change error: judge out of space, got %d from team", d);
+				wrong_answer("Space change error: judge out of space, got %d from user", d);
 			}
 			if (d == '\n') ++stdin_line;
 			++stdin_pos;
 		}
 
-		if (!(judgeans >> judge))
+		if (!(judgeans >> judge)) {
 			break;
+		}
 
 		if (!(std::cin >> team)) {
-			wrong_answer("User EOF while judge had more output\n(Next judge token: %s)", judge.c_str());
+			if (token == 0) {
+				if (stdin_pos == 0) {
+					wrong_answer(
+						"User EOF while judge had more output; user output was empty.\n(Next judge token: %s)",
+						judge.c_str()
+					);
+				} else {
+					wrong_answer(
+						"User EOF while judge had more output; user output contained only whitespace.\n(Next judge token: %s)",
+						judge.c_str()
+					);
+				}
+			} else {
+				wrong_answer("User EOF while judge had more output\n(Next judge token: %s)", judge.c_str());
+			}
 		}
      
 		double jval, tval;
@@ -146,18 +160,18 @@ int main(int argc, char **argv) {
 			if (!isfloat(team.c_str(), tval)) {
 				wrong_answer("Expected float, got: %s", team.c_str());
 			}
-			if(!(fabs(jval - tval) <= float_abs_tol) && 
-			   !(fabs(jval - tval) <= float_rel_tol*fabs(jval))) {
-				wrong_answer("Too large difference.\n Judge: %s\n Team: %s\n Difference: %le\n (abs tol %le rel tol %le)", 
+			if (!(fabs(jval - tval) <= float_abs_tol) &&
+			   !(fabs(jval - tval) <= float_rel_tol * fabs(jval))) {
+				wrong_answer("Too large difference.\n Judge: %s\n User: %s\n Difference: %le\n (abs tol %le rel tol %le)", 
 							 judge.c_str(), team.c_str(), jval-tval, float_abs_tol, float_rel_tol);
 			}
 		} else if (case_sensitive) {
 			if (strcmp(judge.c_str(), team.c_str()) != 0) {
-				wrong_answer("String tokens mismatch\nJudge: \"%s\"\nTeam: \"%s\"", judge.c_str(), team.c_str());
+				wrong_answer("String tokens mismatch\nJudge: \"%s\"\nUser: \"%s\"", judge.c_str(), team.c_str());
 			}
 		} else {
-			if(strcasecmp(judge.c_str(), team.c_str()) != 0) {
-				wrong_answer("String tokens mismatch\nJudge: \"%s\"\nTeam: \"%s\"", judge.c_str(), team.c_str());
+			if (strcasecmp(judge.c_str(), team.c_str()) != 0) {
+				wrong_answer("String tokens mismatch\nJudge: \"%s\"\nUser: \"%s\"", judge.c_str(), team.c_str());
 			}
 		}
 		judgeans_pos += judge.length();
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/problemtools/tests/config1/broken.yaml b/tests/config1/broken.yaml
similarity index 100%
rename from problemtools/tests/config1/broken.yaml
rename to tests/config1/broken.yaml
diff --git a/problemtools/tests/config1/test.yaml b/tests/config1/test.yaml
similarity index 100%
rename from problemtools/tests/config1/test.yaml
rename to tests/config1/test.yaml
diff --git a/problemtools/tests/config1/test2.yaml b/tests/config1/test2.yaml
similarity index 100%
rename from problemtools/tests/config1/test2.yaml
rename to tests/config1/test2.yaml
diff --git a/problemtools/tests/config2/test2.yaml b/tests/config2/test2.yaml
similarity index 100%
rename from problemtools/tests/config2/test2.yaml
rename to tests/config2/test2.yaml
diff --git a/tests/hello/data/secret/hello.ans b/tests/hello/data/secret/hello.ans
new file mode 100644
index 00000000..980a0d5f
--- /dev/null
+++ b/tests/hello/data/secret/hello.ans
@@ -0,0 +1 @@
+Hello World!
diff --git a/tests/hello/data/secret/hello.in b/tests/hello/data/secret/hello.in
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/hello/input_validators/validate.py b/tests/hello/input_validators/validate.py
new file mode 100755
index 00000000..2f71d601
--- /dev/null
+++ b/tests/hello/input_validators/validate.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python3
+from sys import stdin
+import sys
+
+# There shouldn't be any input
+assert len(stdin.readline()) == 0
+
+sys.exit(42)
diff --git a/tests/hello/problem.yaml b/tests/hello/problem.yaml
new file mode 100644
index 00000000..bc12a981
--- /dev/null
+++ b/tests/hello/problem.yaml
@@ -0,0 +1,9 @@
+source: Kattis
+license: public domain
+name: Hello World!
+
+# Fix memory limit at 512 MB.  (Note that for most problems, this
+# should not be done.  It is only done in this case because we include
+# a test submission that goes over this limit.)
+limits:
+  memory: 512
diff --git a/tests/hello/problem_statement/problem.en.tex b/tests/hello/problem_statement/problem.en.tex
new file mode 100644
index 00000000..3df9596c
--- /dev/null
+++ b/tests/hello/problem_statement/problem.en.tex
@@ -0,0 +1,9 @@
+\problemname{Hello World!}
+
+\section*{Input}
+
+There is no input for this problem.
+
+\section*{Output}
+
+Output should contain one line, containing the string ``Hello World!''. 
diff --git a/tests/hello/problem_statement/problem.sv.tex b/tests/hello/problem_statement/problem.sv.tex
new file mode 100644
index 00000000..a87c7402
--- /dev/null
+++ b/tests/hello/problem_statement/problem.sv.tex
@@ -0,0 +1,9 @@
+\problemname{Hej Världen!}
+
+\section*{Indata}
+
+Detta problem har inget indata.
+
+\section*{Output}
+
+Utdata ska bestå av en rad, innehållandes strängen ``Hello World!''.
diff --git a/tests/hello/submissions/accepted/hello.cc b/tests/hello/submissions/accepted/hello.cc
new file mode 100644
index 00000000..9feeee8e
--- /dev/null
+++ b/tests/hello/submissions/accepted/hello.cc
@@ -0,0 +1,6 @@
+#include <cstdio>
+
+int main(void) {
+  printf("Hello World!\n");
+  return 0;
+}
diff --git a/tests/hello/submissions/accepted/hello.java b/tests/hello/submissions/accepted/hello.java
new file mode 100644
index 00000000..49243411
--- /dev/null
+++ b/tests/hello/submissions/accepted/hello.java
@@ -0,0 +1,5 @@
+public class hello {
+    public static void main(String args[]) {
+	System.out.println("Hello World!");
+    }
+}
\ No newline at end of file
diff --git a/tests/hello/submissions/accepted/hello.kt b/tests/hello/submissions/accepted/hello.kt
new file mode 100644
index 00000000..3dcd543f
--- /dev/null
+++ b/tests/hello/submissions/accepted/hello.kt
@@ -0,0 +1,4 @@
+fun main(args: Array<String>) {
+  val words = if (args.size == 0) arrayOf("Hello", "World!") else args
+  System.`out`.println(words.joinToString(separator = " "))
+}
diff --git a/tests/hello/submissions/accepted/hello.py b/tests/hello/submissions/accepted/hello.py
new file mode 100644
index 00000000..9016c9b6
--- /dev/null
+++ b/tests/hello/submissions/accepted/hello.py
@@ -0,0 +1,3 @@
+#!/usr/bin/env python3
+
+print('Hello World!')
diff --git a/tests/hello/submissions/accepted/hello_alarm.c b/tests/hello/submissions/accepted/hello_alarm.c
new file mode 100644
index 00000000..ba104baa
--- /dev/null
+++ b/tests/hello/submissions/accepted/hello_alarm.c
@@ -0,0 +1,35 @@
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* Based on the libc manual*/   
+
+/* This flag controls termination of the main loop. */
+volatile sig_atomic_t keep_going = 1;
+     
+/* The signal handler just clears the flag and re-enables itself. */
+void catch_alarm (int sig)
+{
+  keep_going = 0;
+  signal (sig, catch_alarm);
+}
+
+void do_nothing (void)
+{
+  int i=0;
+  for (i=0;i<1000;i+=1);
+}
+
+int main (void)
+{
+  /* Establish a handler for SIGALRM signals. */
+  signal (SIGALRM, catch_alarm);
+  /* Set an alarm to go off in a little while. */
+  alarm (1);
+  /* Check the flag once in a while to see when to quit. */
+  while (keep_going)
+    do_nothing();
+  
+  printf("Hello World!\n");
+  return EXIT_SUCCESS;
+}
diff --git a/tests/hello/submissions/run_time_error/memory_limit.cc b/tests/hello/submissions/run_time_error/memory_limit.cc
new file mode 100644
index 00000000..6d858755
--- /dev/null
+++ b/tests/hello/submissions/run_time_error/memory_limit.cc
@@ -0,0 +1,11 @@
+#include <iostream>
+#include <algorithm>
+
+int main(void) {
+  char *buf = new char[512*1024*1024]; // 512MB
+  buf[0] = 0;
+  for (int i = 1; i < 512*1024*1024; ++i)
+      buf[i] = 23*buf[i-1]+42;
+  std::cout << "Hello World!\n" << std::endl;
+  return 0;
+}
diff --git a/tests/hello/submissions/wrong_answer/hello.cc b/tests/hello/submissions/wrong_answer/hello.cc
new file mode 100644
index 00000000..80ab5c4f
--- /dev/null
+++ b/tests/hello/submissions/wrong_answer/hello.cc
@@ -0,0 +1,6 @@
+#include <cstdio>
+
+int main(void) {
+  printf("Hello!");
+  return 0;
+}
diff --git a/problemtools/tests/languages_examples/src1.zoo b/tests/languages_examples/src1.zoo
similarity index 100%
rename from problemtools/tests/languages_examples/src1.zoo
rename to tests/languages_examples/src1.zoo
diff --git a/problemtools/tests/languages_examples/src2.zoo b/tests/languages_examples/src2.zoo
similarity index 100%
rename from problemtools/tests/languages_examples/src2.zoo
rename to tests/languages_examples/src2.zoo
diff --git a/problemtools/tests/languages_examples/src3.zpp b/tests/languages_examples/src3.zpp
similarity index 100%
rename from problemtools/tests/languages_examples/src3.zpp
rename to tests/languages_examples/src3.zpp
diff --git a/tests/problems/footnote/problem.yaml b/tests/problems/footnote/problem.yaml
new file mode 100644
index 00000000..90adb3d4
--- /dev/null
+++ b/tests/problems/footnote/problem.yaml
@@ -0,0 +1,2 @@
+problem_format_version: 2023-07
+name: Footnote Test
diff --git a/tests/problems/footnote/statement/problem.en.md b/tests/problems/footnote/statement/problem.en.md
new file mode 100644
index 00000000..7302f165
--- /dev/null
+++ b/tests/problems/footnote/statement/problem.en.md
@@ -0,0 +1,5 @@
+Footnote test
+
+[^1]
+
+[^1]: [https://sv.wikipedia.org/wiki/Interferens](https://sv.wikipedia.org/wiki/Interferens)
diff --git a/tests/problems/imgrequest/problem.yaml b/tests/problems/imgrequest/problem.yaml
new file mode 100644
index 00000000..10ac351a
--- /dev/null
+++ b/tests/problems/imgrequest/problem.yaml
@@ -0,0 +1,2 @@
+problem_format_version: 2023-07
+name: Make web request via image
diff --git a/tests/problems/imgrequest/statement/problem.en.md b/tests/problems/imgrequest/statement/problem.en.md
new file mode 100644
index 00000000..53ac7554
--- /dev/null
+++ b/tests/problems/imgrequest/statement/problem.en.md
@@ -0,0 +1,3 @@
+Make web request via image
+
+![Alt text](http:picsum.photos/400)
diff --git a/tests/problems/imgrequest2/problem.yaml b/tests/problems/imgrequest2/problem.yaml
new file mode 100644
index 00000000..10ac351a
--- /dev/null
+++ b/tests/problems/imgrequest2/problem.yaml
@@ -0,0 +1,2 @@
+problem_format_version: 2023-07
+name: Make web request via image
diff --git a/tests/problems/imgrequest2/statement/problem.en.md b/tests/problems/imgrequest2/statement/problem.en.md
new file mode 100644
index 00000000..26086cdd
--- /dev/null
+++ b/tests/problems/imgrequest2/statement/problem.en.md
@@ -0,0 +1,3 @@
+Make web request via image
+
+<img src="https:open.kattis.com/images/site/header/logo-empty.png">
diff --git a/tests/problems/problemnamexss/problem.yaml b/tests/problems/problemnamexss/problem.yaml
new file mode 100644
index 00000000..cad13b0b
--- /dev/null
+++ b/tests/problems/problemnamexss/problem.yaml
@@ -0,0 +1,2 @@
+problem_format_version: 2023-07
+name: <script>alert("Hello world!");</script>
diff --git a/tests/problems/problemnamexss/statement/problem.en.md b/tests/problems/problemnamexss/statement/problem.en.md
new file mode 100644
index 00000000..95c3b387
--- /dev/null
+++ b/tests/problems/problemnamexss/statement/problem.en.md
@@ -0,0 +1 @@
+XSS injection via problem name.
diff --git a/tests/problems/samplexss/data/sample/1.ans b/tests/problems/samplexss/data/sample/1.ans
new file mode 100644
index 00000000..0f61cbb0
--- /dev/null
+++ b/tests/problems/samplexss/data/sample/1.ans
@@ -0,0 +1 @@
+PWNED
diff --git a/tests/problems/samplexss/data/sample/1.in b/tests/problems/samplexss/data/sample/1.in
new file mode 100644
index 00000000..9114f1c2
--- /dev/null
+++ b/tests/problems/samplexss/data/sample/1.in
@@ -0,0 +1,3 @@
+<script>
+   alert("Hello world!");
+</script>
diff --git a/tests/problems/samplexss/data/sample/testdata.yaml b/tests/problems/samplexss/data/sample/testdata.yaml
new file mode 100644
index 00000000..8034585a
--- /dev/null
+++ b/tests/problems/samplexss/data/sample/testdata.yaml
@@ -0,0 +1,5 @@
+on_reject: continue
+range: 0 0
+accept_score: 0
+grader_flags: first_error
+input_validator_flags: nFive=0
diff --git a/tests/problems/samplexss/data/testdata.yaml b/tests/problems/samplexss/data/testdata.yaml
new file mode 100644
index 00000000..6e832954
--- /dev/null
+++ b/tests/problems/samplexss/data/testdata.yaml
@@ -0,0 +1,3 @@
+on_reject: continue
+range: 0 2
+grader_flags: ignore_sample
diff --git a/tests/problems/samplexss/problem.yaml b/tests/problems/samplexss/problem.yaml
new file mode 100644
index 00000000..5f9a55fb
--- /dev/null
+++ b/tests/problems/samplexss/problem.yaml
@@ -0,0 +1,2 @@
+problem_format_version: 2023-07
+name: Sample XSS
diff --git a/tests/problems/samplexss/statement/problem.en.md b/tests/problems/samplexss/statement/problem.en.md
new file mode 100644
index 00000000..eba8940f
--- /dev/null
+++ b/tests/problems/samplexss/statement/problem.en.md
@@ -0,0 +1 @@
+XSS via sample?
diff --git a/tests/problems/specialcharacterssample/data/sample/1.ans b/tests/problems/specialcharacterssample/data/sample/1.ans
new file mode 100644
index 00000000..e66448f5
--- /dev/null
+++ b/tests/problems/specialcharacterssample/data/sample/1.ans
@@ -0,0 +1 @@
+Nice!
diff --git a/tests/problems/specialcharacterssample/data/sample/1.in b/tests/problems/specialcharacterssample/data/sample/1.in
new file mode 100644
index 00000000..950eee18
--- /dev/null
+++ b/tests/problems/specialcharacterssample/data/sample/1.in
@@ -0,0 +1 @@
+0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
diff --git a/tests/problems/specialcharacterssample/data/sample/testdata.yaml b/tests/problems/specialcharacterssample/data/sample/testdata.yaml
new file mode 100644
index 00000000..8034585a
--- /dev/null
+++ b/tests/problems/specialcharacterssample/data/sample/testdata.yaml
@@ -0,0 +1,5 @@
+on_reject: continue
+range: 0 0
+accept_score: 0
+grader_flags: first_error
+input_validator_flags: nFive=0
diff --git a/tests/problems/specialcharacterssample/data/testdata.yaml b/tests/problems/specialcharacterssample/data/testdata.yaml
new file mode 100644
index 00000000..6e832954
--- /dev/null
+++ b/tests/problems/specialcharacterssample/data/testdata.yaml
@@ -0,0 +1,3 @@
+on_reject: continue
+range: 0 2
+grader_flags: ignore_sample
diff --git a/tests/problems/specialcharacterssample/problem.yaml b/tests/problems/specialcharacterssample/problem.yaml
new file mode 100644
index 00000000..21125ad7
--- /dev/null
+++ b/tests/problems/specialcharacterssample/problem.yaml
@@ -0,0 +1,2 @@
+problem_format_version: 2023-07
+name: Special Characters Sample
diff --git a/tests/problems/specialcharacterssample/statement/problem.en.md b/tests/problems/specialcharacterssample/statement/problem.en.md
new file mode 100644
index 00000000..abf3e60b
--- /dev/null
+++ b/tests/problems/specialcharacterssample/statement/problem.en.md
@@ -0,0 +1 @@
+All printable ASCII characters in sample
diff --git a/tests/problems/statementxss/problem.yaml b/tests/problems/statementxss/problem.yaml
new file mode 100644
index 00000000..a728c041
--- /dev/null
+++ b/tests/problems/statementxss/problem.yaml
@@ -0,0 +1,2 @@
+problem_format_version: 2023-07
+name: XSS
diff --git a/tests/problems/statementxss/statement/problem.en.md b/tests/problems/statementxss/statement/problem.en.md
new file mode 100644
index 00000000..1a555545
--- /dev/null
+++ b/tests/problems/statementxss/statement/problem.en.md
@@ -0,0 +1,26 @@
+Various XSS methods. Hopefully the sanitizer doesn't let any of them through.
+
+
+<script>
+   alert("Hello world!");
+</script>
+
+
+<a href="#" onclick="alert('XSS')">Click me</a>
+
+<svg onload=alert('XSS')></svg>
+
+<a href="javascript:alert('XSS')">Click me</a>
+
+<input type="text" value="<script>alert('XSS')</script>">
+
+<script>eval('\x61\x6c\x65\x72\x74\x28\x27\x58\x53\x53\x27\x29')</script>
+
+<svg><script>alert('XSS')</script></svg>
+
+<iframe src="javascript:alert('XSS')"></iframe>
+
+<math><mtext><script>alert('XSS')</script></mtext></math>
+
+<div style="background:url(javascript:alert('XSS'))">
+
diff --git a/tests/problems/twofootnotes/problem.yaml b/tests/problems/twofootnotes/problem.yaml
new file mode 100644
index 00000000..e8c5ca31
--- /dev/null
+++ b/tests/problems/twofootnotes/problem.yaml
@@ -0,0 +1,2 @@
+problem_format_version: 2023-07
+name: Footnote Test 2
diff --git a/tests/problems/twofootnotes/statement/problem.en.md b/tests/problems/twofootnotes/statement/problem.en.md
new file mode 100644
index 00000000..d95657ad
--- /dev/null
+++ b/tests/problems/twofootnotes/statement/problem.en.md
@@ -0,0 +1,9 @@
+Footnote test 2
+
+[^1]
+
+[^2]
+
+[^1]: [https://sv.wikipedia.org/wiki/Interferens](https://sv.wikipedia.org/wiki/Interferens)
+
+[^2]: [https://sv.wikipedia.org/wiki/Interferens](https://sv.wikipedia.org/wiki/Interferens)
diff --git a/problemtools/tests/test_config.py b/tests/test_config.py
similarity index 91%
rename from problemtools/tests/test_config.py
rename to tests/test_config.py
index ed4b7360..3307122d 100644
--- a/problemtools/tests/test_config.py
+++ b/tests/test_config.py
@@ -3,10 +3,11 @@
 
 from problemtools import config
 
+
 def config_paths_mock():
     import os
-    return [os.path.join(os.path.dirname(__file__), 'config1'),
-            os.path.join(os.path.dirname(__file__), 'config2')]
+
+    return [os.path.join(os.path.dirname(__file__), 'config1'), os.path.join(os.path.dirname(__file__), 'config2')]
 
 
 def test_load_basic_config(monkeypatch):
diff --git a/problemtools/tests/test_languages.py b/tests/test_languages.py
similarity index 74%
rename from problemtools/tests/test_languages.py
rename to tests/test_languages.py
index 01057a24..024558c5 100644
--- a/problemtools/tests/test_languages.py
+++ b/tests/test_languages.py
@@ -10,16 +10,17 @@
 class Language_test(TestCase):
     @staticmethod
     def __language_dict():
-        return {'name': 'A Language',
-                'priority': 100,
-                'files': '*.foo *.bar',
-                'shebang': '.*',
-                'compile': 'echo {path} {files} {binary}',
-                'run': '{binary} {memlim}'
-                }
+        return {
+            'name': 'A Language',
+            'priority': 100,
+            'files': '*.foo *.bar',
+            'shebang': '.*',
+            'compile': 'echo {path} {files} {binary}',
+            'run': '{binary} {memlim}',
+        }
 
     def test_create(self):
-        lang = languages.Language('langid', self.__language_dict())
+        languages.Language('langid', self.__language_dict())
 
     def test_update(self):
         lang = languages.Language('langid', self.__language_dict())
@@ -34,7 +35,7 @@ def test_update(self):
         assert lang.files == ['*']
 
         lang.update({'shebang': 'new.*end'})
-        assert lang.shebang.match('newfilend')
+        assert lang.shebang is not None and lang.shebang.match('newfilend')
 
         with pytest.raises(languages.LanguageConfigError):
             # ambiguous entry point
@@ -48,13 +49,12 @@ def test_update(self):
         lang.update({'run': 'newrun {mainclass}'})
         assert lang.run == 'newrun {mainclass}'
 
-
     def test_invalid_id(self):
         vals = self.__language_dict()
         with pytest.raises(TypeError):
-            languages.Language(None, vals)
+            languages.Language(None, vals)  # type: ignore
         with pytest.raises(TypeError):
-            languages.Language(42, vals)
+            languages.Language(42, vals)  # type: ignore
         with pytest.raises(languages.LanguageConfigError):
             languages.Language('åäö', vals)
         with pytest.raises(languages.LanguageConfigError):
@@ -62,28 +62,24 @@ def test_invalid_id(self):
         with pytest.raises(languages.LanguageConfigError):
             languages.Language('Capital', vals)
 
-
     def test_missing_name(self):
         vals = self.__language_dict()
         del vals['name']
         with pytest.raises(languages.LanguageConfigError):
             languages.Language('id', vals)
 
-
     def test_invalid_name(self):
         vals = self.__language_dict()
         vals['name'] = ['A List']
         with pytest.raises(languages.LanguageConfigError):
             languages.Language('id', vals)
 
-
     def test_missing_priority(self):
         vals = self.__language_dict()
         del vals['priority']
         with pytest.raises(languages.LanguageConfigError):
             languages.Language('id', vals)
 
-
     def test_invalid_priority(self):
         vals = self.__language_dict()
         vals['priority'] = 2.3
@@ -99,33 +95,28 @@ def test_missing_files(self):
         with pytest.raises(languages.LanguageConfigError):
             languages.Language('id', vals)
 
-
     def test_invalid_files(self):
         vals = self.__language_dict()
         vals['files'] = ['*.cc', '*.cpp']
         with pytest.raises(languages.LanguageConfigError):
             languages.Language('id', vals)
 
-
     def test_without_shebang(self):
         vals = self.__language_dict()
         del vals['shebang']
         languages.Language('id', vals)
 
-
     def test_invalid_shebang(self):
         vals = self.__language_dict()
         vals['shebang'] = '(Not an RE'
         with pytest.raises(re.error):
             languages.Language('id', vals)
 
-
     def test_without_compile(self):
         vals = self.__language_dict()
         del vals['compile']
         languages.Language('id', vals)
 
-
     def test_invalid_compile(self):
         vals = self.__language_dict()
         vals['compile'] = ['gcc', '{files}']
@@ -135,14 +126,12 @@ def test_invalid_compile(self):
         with pytest.raises(languages.LanguageConfigError):
             languages.Language('id', vals)
 
-
     def test_missing_run(self):
         vals = self.__language_dict()
         del vals['run']
         with pytest.raises(languages.LanguageConfigError):
             languages.Language('id', vals)
 
-
     def test_invalid_run(self):
         vals = self.__language_dict()
         vals['run'] = ['python3', '{mainfile}']
@@ -152,7 +141,6 @@ def test_invalid_run(self):
         with pytest.raises(languages.LanguageConfigError):
             languages.Language('id', vals)
 
-
     def test_good_entrypoints(self):
         vals = self.__language_dict()
 
@@ -168,7 +156,6 @@ def test_good_entrypoints(self):
         vals['run'] = 'echo {mainclass}'
         languages.Language('id', vals)
 
-
     def test_bad_entrypoints(self):
         vals = self.__language_dict()
 
@@ -184,9 +171,7 @@ def test_bad_entrypoints(self):
             languages.Language('id', vals)
 
 
-
-__EXAMPLES_PATH = os.path.join(os.path.dirname(__file__),
-                              'languages_examples')
+__EXAMPLES_PATH = os.path.join(os.path.dirname(__file__), 'languages_examples')
 
 
 def examples_path(test_file):
@@ -197,42 +182,51 @@ class Languages_test(TestCase):
     def test_empty_languages(self):
         lang = languages.Languages()
         assert lang.languages == {}
-        assert lang.detect_language(
-            ['foo.cpp', 'foo.c', 'foo.py','foo.java']) is None
-
+        assert lang.detect_language(['foo.cpp', 'foo.c', 'foo.py', 'foo.java']) is None
 
     def test_duplicate_prio(self):
         lang = languages.Languages()
-        config = {'c': {'name': "C",
-                        'priority': 42,
-                        'files': "*.c",
-                        'compile': "/usr/bin/gcc -g -O2 -std=gnu99 -static -o {binary} {files} -lm",
-                        'run': "{binary}"},
-                  'cpp': {'name': "C++",
-                          'priority': 42,
-                          'files': "*.cc *.C *.cpp *.cxx *.c++",
-                          'compile': "/usr/bin/g++ -g -O2 -std=gnu++11 -static -o {binary} {files}",
-                          'run': "{binary}"}}
+        config = {
+            'c': {
+                'name': 'C',
+                'priority': 42,
+                'files': '*.c',
+                'compile': '/usr/bin/gcc -g -O2 -std=gnu99 -static -o {binary} {files} -lm',
+                'run': '{binary}',
+            },
+            'cpp': {
+                'name': 'C++',
+                'priority': 42,
+                'files': '*.cc *.C *.cpp *.cxx *.c++',
+                'compile': '/usr/bin/g++ -g -O2 -std=gnu++11 -static -o {binary} {files}',
+                'run': '{binary}',
+            },
+        }
 
         with pytest.raises(languages.LanguageConfigError):
             lang.update(config)
 
-
     def test_invalid_format(self):
         lang = languages.Languages()
         # Dict of strings instead of dict of dict
         conf1 = {'c': 'C'}
         # List instead of dict
-        conf2 = [{'name': "C",
-                  'priority': 1,
-                  'files': "*.c",
-                  'compile': "/usr/bin/gcc -g -O2 -std=gnu99 -static -o {binary} {files} -lm",
-                  'run': "{binary}"},
-                 {'name': "C++",
-                  'priority': 2,
-                  'files': "*.cc *.C *.cpp *.cxx *.c++",
-                  'compile': "/usr/bin/g++ -g -O2 -std=gnu++11 -static -o {binary} {files}",
-                  'run': "{binary}"}]
+        conf2 = [
+            {
+                'name': 'C',
+                'priority': 1,
+                'files': '*.c',
+                'compile': '/usr/bin/gcc -g -O2 -std=gnu99 -static -o {binary} {files} -lm',
+                'run': '{binary}',
+            },
+            {
+                'name': 'C++',
+                'priority': 2,
+                'files': '*.cc *.C *.cpp *.cxx *.c++',
+                'compile': '/usr/bin/g++ -g -O2 -std=gnu++11 -static -o {binary} {files}',
+                'run': '{binary}',
+            },
+        ]
         conf3 = None
         with pytest.raises(languages.LanguageConfigError):
             lang.update(conf1)
@@ -241,30 +235,19 @@ def test_invalid_format(self):
         with pytest.raises(languages.LanguageConfigError):
             lang.update(conf3)
 
-
     def test_empty(self):
         lang = languages.Languages()
         lang.update({})
         assert lang.languages == {}
 
-
     def test_zoo(self):
         langs = languages.Languages()
 
-        zoo = {'zoo': {'name': "Zoo",
-                       'priority': 10,
-                       'files': "*.zoo",
-                       'run': "{binary}"},
-               'zoork': {'name': "Zoork",
-                         'priority': 20,
-                         'files': "*.zoo",
-                         'shebang': ">.*Zoork",
-                         'run': "{binary}"},
-               'zoopp': {'name': "Zoo++",
-                         'priority': 0,
-                         'files': "*.zoo *.zpp",
-                         'run': "{binary}"}
-               }
+        zoo = {
+            'zoo': {'name': 'Zoo', 'priority': 10, 'files': '*.zoo', 'run': '{binary}'},
+            'zoork': {'name': 'Zoork', 'priority': 20, 'files': '*.zoo', 'shebang': '>.*Zoork', 'run': '{binary}'},
+            'zoopp': {'name': 'Zoo++', 'priority': 0, 'files': '*.zoo *.zpp', 'run': '{binary}'},
+        }
 
         langs.update(zoo)
 
diff --git a/tests/test_latex.py b/tests/test_latex.py
new file mode 100644
index 00000000..60c973dc
--- /dev/null
+++ b/tests/test_latex.py
@@ -0,0 +1,28 @@
+from pathlib import Path
+import tempfile
+
+from problemtools import problem2pdf
+
+
+def test_pdf_render_verifyproblem():
+    # Same options as in verifyproblem
+    options = problem2pdf.get_parser().parse_args([''])
+    problem_path = Path(__file__).parent / '..' / 'examples' / 'guess'
+    options.problem = str(problem_path.resolve())
+    options.language = 'en'
+    options.nopdf = True
+    options.quiet = True
+    if not problem2pdf.convert(options):
+        assert False, 'PDF conversion failed'
+
+
+def test_pdf_render_problem2pdf():
+    # Same options as typical problem2pdf usage
+    with tempfile.TemporaryDirectory() as temp_dir:
+        problem_path = Path(__file__).parent / '..' / 'examples' / 'guess'
+        temp_filename = Path(temp_dir) / 'guess.pdf'
+        options = problem2pdf.get_parser().parse_args(['-o', str(temp_filename), '-l', 'en', '-q', str(problem_path.resolve())])
+        if not problem2pdf.convert(options):
+            assert False, 'PDF conversion failed'
+        with open(temp_filename, 'rb') as temp_file:
+            assert temp_file.read(5) == b'%PDF-', 'Output header does not look like a PDF.'
diff --git a/tests/test_markdown.py b/tests/test_markdown.py
new file mode 100644
index 00000000..56fa4b66
--- /dev/null
+++ b/tests/test_markdown.py
@@ -0,0 +1,57 @@
+from pathlib import Path
+
+import pytest
+from tests.test_xss import render, renderpdf
+from problemtools.statement_util import find_footnotes
+
+# TODO: add when guess is updated to 2023-07
+# def test_pdf_render():
+#     with tempfile.TemporaryDirectory() as temp_dir:
+#         problem_path = Path(__file__).parent / '..' / 'examples' / 'guess'
+#         args, _unknown = problem2pdf.get_parser().parse_known_args(
+#                 ['--problem', str(problem_path.resolve()), '-l', 'sv', '--dest-dir', str(temp_dir)]
+#             )
+#         problem2pdf.convert(args)
+
+
+def test_sample_escaping():
+    problem_path = Path(__file__).parent / 'problems' / 'specialcharacterssample'
+    html = render(problem_path)
+    all_printable = r"""0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
+    # We escape &, < and >
+    all_printable = all_printable.replace('&', '&amp;')
+    all_printable = all_printable.replace('<', '&lt;')
+    all_printable = all_printable.replace('>', '&gt;')
+    assert all_printable in html
+
+
+def test_footnotes():
+    # We always want footnotes to be at the bottom
+    # When we insert samples, we need to insert them right above the first footnote
+    # To do this, we search for a string (very fragile)
+    problem_path = Path(__file__).parent / 'problems' / 'footnote'
+    html = render(problem_path)
+    assert find_footnotes(html) is not None
+
+    problem_path = Path(__file__).parent / 'problems' / 'twofootnotes'
+    html = render(problem_path)
+    assert find_footnotes(html) is not None
+
+
+def test_footnotes_href():
+    # We use allowlist-based id values for footnotes. Ensure they have not changed
+    problem_path = Path(__file__).parent / 'problems' / 'footnote'
+    html = render(problem_path)
+    assert 'fn1' in html and 'fnref1' in html
+
+
+def test_invalid_image_throws():
+    # If images can point to img that doesn't exist, it's arbitrary web request
+    for problem in ('imgrequest', 'imgrequest2'):
+        problem_path = Path(__file__).parent / 'problems' / problem
+        with pytest.raises(ValueError):
+            render(problem_path)
+
+    # Pandoc won't make a web request for imgrequest2
+    with pytest.raises(ValueError):
+        renderpdf(Path(__file__).parent / 'problems' / 'imgrequest')
diff --git a/tests/test_metadata.py b/tests/test_metadata.py
new file mode 100644
index 00000000..9652c906
--- /dev/null
+++ b/tests/test_metadata.py
@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+from pathlib import Path
+
+import pytest
+
+from pydantic import ValidationError
+from problemtools import metadata
+from problemtools.formatversion import FormatVersion
+
+# A few quick tests of config parsing. pytest structure isn't great here, so code gets repetitive, but I wanted *something* basic in place at least.
+
+
+def test_parse_empty_legacy():
+    m = metadata.parse_metadata(FormatVersion.LEGACY, {})
+    # Just check off a few random things
+    assert not m.name
+    assert not m.source
+    assert not m.credits.authors
+
+
+def test_parse_legacy_with_problem_names():
+    m = metadata.parse_metadata(FormatVersion.LEGACY, {}, {'en': 'Hello World!'})
+    assert m.name['en'] == 'Hello World!'
+
+
+def test_parse_empty_2023_fails():
+    with pytest.raises(ValidationError):
+        metadata.parse_metadata(FormatVersion.V_2023_07, {}, {'en': 'Hello World!'})
+
+
+@pytest.fixture
+def minimal_2023_conf():
+    return {
+        'problem_format_version': '2023-07-draft',
+        'uuid': '46fa942f-44c3-46c0-8ddc-22e02d2e5d2b',
+        'name': {'en': 'Hello World!'},
+    }
+
+
+def test_parse_minimal_2023(minimal_2023_conf):
+    m = metadata.parse_metadata(FormatVersion.V_2023_07, minimal_2023_conf, {'en': 'Hello World!'})
+    assert m.name['en'] == 'Hello World!'
+    assert not m.source
+    assert not m.credits.authors
+
+
+def test_parse_typo_fails(minimal_2023_conf):
+    c = minimal_2023_conf
+    c['limits'] = {'typo': 1}
+    with pytest.raises(ValidationError):
+        metadata.parse_metadata(FormatVersion.V_2023_07, c, {'en': 'Hello World!'})
+
+
+def test_parse_single_author_2023(minimal_2023_conf):
+    c = minimal_2023_conf
+    c['credits'] = ' \t  Authy McAuth \t <authy@mcauth.example> \t\t  '  # Add some extra whitespace to check that we strip
+    m = metadata.parse_metadata(FormatVersion.V_2023_07, c, {'en': 'Hello World!'})
+    assert len(m.credits.authors) == 1
+    assert m.credits.authors[0].name == 'Authy McAuth'
+    assert m.credits.authors[0].email == 'authy@mcauth.example'
+
+
+def test_parse_single_source_2023(minimal_2023_conf):
+    c = minimal_2023_conf
+    c['source'] = 'NWERC 2024'
+    m = metadata.parse_metadata(FormatVersion.V_2023_07, c, {'en': 'Hello World!'})
+    assert len(m.source) == 1
+    assert m.source[0].name == 'NWERC 2024'
+    assert m.source[0].url is None
+
+
+def test_parse_multi_source(minimal_2023_conf):
+    c = minimal_2023_conf
+    c['source'] = [
+        {'name': 'NWERC 2024', 'url': 'https://2024.nwerc.example/contest'},
+        'SWERC 2024',
+        {'name': 'SEERC 2024'},
+    ]
+    m = metadata.parse_metadata(FormatVersion.V_2023_07, c, {'en': 'Hello World!'})
+    assert len(m.source) == 3
+    assert m.source[0].name == 'NWERC 2024'
+    assert m.source[0].url == 'https://2024.nwerc.example/contest'
+    assert m.source[1].name == 'SWERC 2024'
+    assert m.source[1].url is None
+    assert m.source[2].name == 'SEERC 2024'
+    assert m.source[2].url is None
+
+
+def test_parse_complex_type(minimal_2023_conf):
+    c = minimal_2023_conf
+    c['type'] = ['scoring', 'multi-pass', 'interactive']
+    m = metadata.parse_metadata(FormatVersion.V_2023_07, c, {'en': 'Hello World!'})
+    assert len(m.type) == 3
+    assert metadata.ProblemType.SCORING in m.type
+    assert metadata.ProblemType.MULTI_PASS in m.type
+    assert metadata.ProblemType.INTERACTIVE in m.type
+    assert not m.is_pass_fail()
+    assert m.is_scoring()
+    assert m.is_interactive()
+    assert m.is_multi_pass()
+    assert not m.is_submit_answer()
+
+
+def test_load_hello():
+    m, _ = metadata.load_metadata(Path(__file__).parent / 'hello')
+    assert m.name['en'] == 'Hello World!'
+    assert m.name['sv'] == 'Hej Världen!'
+    assert len(m.source) == 1
+    assert m.source[0].name == 'Kattis'
+    assert m.source[0].url is None
+    assert m.license is metadata.License.PUBLIC_DOMAIN
+    assert len(m.type) == 1
+    assert m.type[0] is metadata.ProblemType.PASS_FAIL
+    assert m.is_pass_fail()
+    assert not m.is_scoring()
+    assert not m.is_interactive()
+    assert not m.is_multi_pass()
+    assert not m.is_submit_answer()
diff --git a/tests/test_output_validator.py b/tests/test_output_validator.py
new file mode 100644
index 00000000..552901a0
--- /dev/null
+++ b/tests/test_output_validator.py
@@ -0,0 +1,25 @@
+import random
+import pathlib
+import string
+import tempfile
+
+from problemtools.verifyproblem import OutputValidators
+
+
+def test_output_validator_feedback():
+    r = random.Random(0)
+    with tempfile.TemporaryDirectory() as directory:
+        feedback = pathlib.Path(directory) / 'feedback.txt'
+        text = ''.join(r.choices(string.printable))
+        feedback.write_text(text)
+        data = OutputValidators._get_feedback(directory)
+        assert data is not None and text in data
+
+
+def test_output_validator_feedback_non_unicode():
+    r = random.Random(0)
+    with tempfile.TemporaryDirectory() as directory:
+        feedback = pathlib.Path(directory) / 'feedback.txt'
+        feedback.write_bytes(r.randbytes(1024))
+        # Just test that this does not throw an error
+        OutputValidators._get_feedback(directory)
diff --git a/problemtools/tests/test_run_limit.py b/tests/test_run_limit.py
similarity index 100%
rename from problemtools/tests/test_run_limit.py
rename to tests/test_run_limit.py
diff --git a/tests/test_verify_hello.py b/tests/test_verify_hello.py
new file mode 100644
index 00000000..6f5ac3c4
--- /dev/null
+++ b/tests/test_verify_hello.py
@@ -0,0 +1,33 @@
+import pathlib
+import problemtools.verifyproblem as verify
+
+
+def test_load_hello():
+    directory = pathlib.Path(__file__).parent / 'hello'
+    string = str(directory.resolve())
+
+    args = verify.argparser().parse_args([string])
+    verify.initialize_logging(args)
+    context = verify.Context(args, None)
+
+    with verify.Problem(string, args) as p:
+        p.load()
+        assert p.shortname == 'hello'
+        # pytest and fork don't go along very well, so just run aspects that work without run
+        assert p.config.check(context)
+        assert p.attachments.check(context)
+        assert p.is_pass_fail()
+        assert not p.is_scoring()
+        assert not p.is_interactive()
+        assert not p.is_multi_pass()
+        assert not p.is_submit_answer()
+
+
+def test_load_twice():
+    directory = pathlib.Path(__file__).parent / 'hello'
+    string = str(directory.resolve())
+
+    args = verify.argparser().parse_args([string])
+    with verify.Problem(string, args) as p:
+        p.load()
+        p.load()
diff --git a/tests/test_xss.py b/tests/test_xss.py
new file mode 100644
index 00000000..162c5994
--- /dev/null
+++ b/tests/test_xss.py
@@ -0,0 +1,41 @@
+import os
+from pathlib import Path
+from problemtools import problem2html
+from problemtools import problem2pdf
+import tempfile
+
+
+def render(problem_path):
+    with tempfile.TemporaryDirectory() as temp_dir:
+        args, _unknown = problem2html.get_parser().parse_known_args(
+            ['--problem', str(problem_path.resolve()), '--dest-dir', str(temp_dir)]
+        )
+        problem2html.convert(args)
+        with open(f'{temp_dir}/index.html', 'r') as f:
+            html = f.read()
+            return html
+
+
+def renderpdf(problem_path):
+    with tempfile.TemporaryDirectory() as temp_dir:
+        outpath = os.path.join(temp_dir, 'out.pdf')
+        args, _unknown = problem2pdf.get_parser().parse_known_args(['--problem', str(problem_path.resolve()), '--o', outpath])
+        problem2pdf.convert(args)
+
+
+def test_no_xss_statement():
+    problem_path = Path(__file__).parent / 'problems' / 'statementxss'
+    html = render(problem_path)
+    assert 'alert' not in html
+
+
+def test_no_xss_problemname():
+    problem_path = Path(__file__).parent / 'problems' / 'problemnamexss'
+    html = render(problem_path)
+    assert '<script>' not in html
+
+
+def test_no_xss_sample():
+    problem_path = Path(__file__).parent / 'problems' / 'samplexss'
+    html = render(problem_path)
+    assert '<script>' not in html