Skip to content

Commit c321a88

Browse files
committed
Hol-Light: Add Hol-light proof framework and Forward NTT proof
Add HOL-Light framework with NTT proof. This includes: - list_proofs.sh, build-proof.sh, dump_bytecode.ml utilities - mldsa-specs.ml and mldsa-utils.ml to support HOL-Light proofs - HOL-Light proof for ML-DSA x86 AVX2 NTT in mldsa_ntt.ml - Addition of hol_light to scripts/tests to wrap and run proofs - READMEs for usage descriptions Signed-off-by: Jake Massimo <jakemas@amazon.com>
1 parent f6b1162 commit c321a88

File tree

16 files changed

+6108
-9
lines changed

16 files changed

+6108
-9
lines changed

flake.nix

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@
100100
}).overrideAttrs (old: {
101101
shellHook = ''
102102
export PATH=$PWD/scripts:$PATH
103-
# Set PROOF_DIR_ARM based on where we entered the shell
104-
export PROOF_DIR_ARM="$PWD/proofs/hol_light/arm"
103+
# Set proof directory for x86_64
104+
export PROOF_DIR_X86_64="$PWD/proofs/hol_light/x86_64"
105105
'';
106106
});
107107
devShells.ci = util.mkShell {

nix/hol_light/0005-Configure-hol-sh-for-mldsa-native.patch

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ index 1311255..8b2bc36 100755
2121
+SITELIB=$(dirname $(ocamlfind query findlib 2>/dev/null) 2>/dev/null)
2222
+
2323
+# Set HOLLIGHT_LOAD_PATH to include S2N_BIGNUM_DIR and mldsa-native proofs
24-
+export HOLLIGHT_LOAD_PATH="${S2N_BIGNUM_DIR}:${PROOF_DIR_ARM}:${HOLLIGHT_LOAD_PATH}"
24+
+export HOLLIGHT_LOAD_PATH="${S2N_BIGNUM_DIR}:${PROOF_DIR_X86_64}:${HOLLIGHT_LOAD_PATH}"
2525
+
2626
+# Change to mldsa-native proofs directory if set, so define_from_elf can find object files
27-
+[ -n "${PROOF_DIR_ARM}" ] && cd "${PROOF_DIR_ARM}"
27+
+[ -n "${PROOF_DIR_X86_64}" ] && cd "${PROOF_DIR_X86_64}"
2828
+
2929
+${LINE_EDITOR} ${HOLLIGHT_DIR}/ocaml-hol -init ${HOL_ML_PATH} -I ${HOLLIGHT_DIR} ${SITELIB:+-I "$SITELIB"}
3030
diff --git a/hol_4.sh b/hol_4.sh

nix/s2n_bignum/default.nix

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
{ stdenv, fetchFromGitHub, writeText, ... }:
55
stdenv.mkDerivation rec {
66
pname = "s2n_bignum";
7-
version = "1fcccc89cc7762ae5c56ec660f25b5f1358ba308";
7+
version = "2ab2252b8505e58a7c3392f8ad823782032b61e7";
88
src = fetchFromGitHub {
9-
owner = "jargh";
10-
repo = "s2n-bignum-dev";
9+
owner = "awslabs";
10+
repo = "s2n-bignum";
1111
rev = "${version}";
12-
hash = "sha256-hSJ2WlrwVlTF3wSdMfdBbovBXMG5vltfPxp36hOMd5c=";
12+
hash = "sha256-7lil3jAFo5NiyNOSBYZcRjduXkotV3x4PlxXSKt63M8=";
1313
};
1414
setupHook = writeText "setup-hook.sh" ''
1515
export S2N_BIGNUM_DIR="$1"

proofs/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,7 @@ This directory contains material related to the formal verification of the sourc
77
## C verification: CBMC
88

99
We use the [C Bounded Model Checker (CBMC)](https://github.com/diffblue/cbmc) to show the absence of various classes of undefined behaviour in the mldsa-native C source, including out of bounds memory accesses and integer overflows. See [proofs/cbmc](cbmc), or the [proof_guide](https://github.com/pq-code-package/mlkem-native/blob/main/proofs/cbmc/proof_guide.md).
10+
11+
## Assembly verification: HOL-Light
12+
13+
We use the [HOL-Light](https://github.com/jrh13/hol-light) interactive theorem prover alongside the verification infrastructure from [s2n-bignum](https://github.com/awslabs/s2n-bignum) to show the functional correctness of various highly optimized assembly routines in mlkem-native at the object-code level. See [proofs/hol_light/x86_64](hol_light/x86_64).

proofs/hol_light/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
2+
**/*.o
3+
**/*.native
4+
**/*.correct

proofs/hol_light/x86_64/Makefile

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#############################################################################
2+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0
4+
#############################################################################
5+
6+
#
7+
# This Makefile is derived from the Makefile x86/Makefile in s2n-bignum.
8+
# - Remove all s2n-bignum proofs and tutorial, add mldsa-native proofs
9+
# - Minor path modifications to support base theories from s2n-bignum
10+
# to reside in a separate read-only directory
11+
#
12+
13+
.DEFAULT_GOAL := run_proofs
14+
15+
OSTYPE_RESULT=$(shell uname -s)
16+
ARCHTYPE_RESULT=$(shell uname -m)
17+
18+
SRC ?= $(S2N_BIGNUM_DIR)
19+
SRC_X86 ?= $(SRC)/x86
20+
21+
# Add explicit language input parameter to cpp, otherwise the use of #n for
22+
# numeric literals in x86 code is a problem when used inside #define macros
23+
# since normally that means stringization.
24+
#
25+
# Some clang-based preprocessors seem to behave differently, and get confused
26+
# by single-quote characters in comments, so we eliminate // comments first.
27+
28+
ifeq ($(OSTYPE_RESULT),Darwin)
29+
PREPROCESS=sed -e 's/\/\/.*//' | $(CC) -E -xassembler-with-cpp -
30+
else
31+
PREPROCESS=$(CC) -E -xassembler-with-cpp -
32+
endif
33+
34+
# Generally GNU-type assemblers are happy with multiple instructions on
35+
# a line, but we split them up anyway just in case.
36+
37+
SPLIT=tr ';' '\n'
38+
39+
# If actually on an x86_64 machine, just use the assembler (as). Otherwise
40+
# use a cross-assembling version so that the code can still be assembled
41+
# and the proofs checked against the object files (though you won't be able
42+
# to run code without additional emulation infrastructure). For the clang
43+
# version on OS X we just add the "-arch x86_64" option. For the Linux/gcc
44+
# toolchain we assume the presence of the special cross-assembler. This
45+
# can be installed via something like:
46+
#
47+
# sudo apt-get install binutils-x86-64-linux-gnu
48+
49+
ifeq ($(ARCHTYPE_RESULT),x86_64)
50+
ASSEMBLE=as
51+
OBJDUMP=objdump -d
52+
else
53+
ifeq ($(OSTYPE_RESULT),Darwin)
54+
ASSEMBLE=as -arch x86_64
55+
OBJDUMP=otool -tvV
56+
else
57+
ASSEMBLE=x86_64-linux-gnu-as
58+
OBJDUMP=x86_64-linux-gnu-objdump -d
59+
endif
60+
endif
61+
62+
OBJ = mldsa/mldsa_ntt.o
63+
64+
# Build object files from assembly sources
65+
$(OBJ): %.o : %.S
66+
@echo "Preparing $@ ..."
67+
@echo "AS: `$(ASSEMBLE) --version`"
68+
@echo "OBJDUMP: `$(OBJDUMP) --version`"
69+
$(Q)[ -d $(@D) ] || mkdir -p $(@D)
70+
cat $< | $(PREPROCESS) | $(SPLIT) | $(ASSEMBLE) -o $@ -
71+
# MacOS may generate relocations in non-text sections that break
72+
# the object file parser in HOL-Light
73+
strip $@
74+
75+
clean:; rm -f */*.o */*/*.o */*.correct */*.native
76+
77+
# Proof-related parts
78+
#
79+
# The proof files are all independent, though each one loads the
80+
# same common infrastructure "base.ml". So you can potentially
81+
# run the proofs in parallel for more speed, e.g.
82+
#
83+
# nohup make -j 16 proofs &
84+
#
85+
# If you build hol-light yourself (see https://github.com/jrh13/hol-light)
86+
# in your home directory, and do "make" inside the subdirectory hol-light,
87+
# then the following HOLDIR setting should be right:
88+
89+
HOLDIR?=$(HOLLIGHTDIR)
90+
HOLLIGHT:=$(HOLDIR)/hol.sh
91+
92+
BASE?=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
93+
94+
PROOF_BINS = $(OBJ:.o=.native)
95+
PROOF_LOGS = $(OBJ:.o=.correct)
96+
97+
# Build precompiled binary for dumping bytecodes
98+
proofs/dump_bytecode.native: proofs/dump_bytecode.ml $(OBJ)
99+
./proofs/build-proof.sh $(BASE)/$< "$(HOLLIGHT)" "$@"
100+
101+
# Build precompiled native binaries of HOL Light proofs
102+
103+
.SECONDEXPANSION:
104+
%.native: proofs/$$(*F).ml %.o ; ./proofs/build-proof.sh $(BASE)/$< "$(HOLLIGHT)" "$@"
105+
106+
# Run them and print the standard output+error at *.correct
107+
%.correct: %.native
108+
$< 2>&1 | tee $@
109+
@if (grep -i "error:\|exception:" "$@" >/dev/null); then \
110+
echo "$< had errors!"; \
111+
exit 1; \
112+
else \
113+
echo "$< OK"; \
114+
fi
115+
116+
build_proofs: $(PROOF_BINS);
117+
run_proofs: build_proofs $(PROOF_LOGS);
118+
119+
proofs: run_proofs ; $(SRC)/tools/count-proofs.sh .
120+
121+
dump_bytecode: proofs/dump_bytecode.native
122+
./$<
123+
124+
.PHONY: proofs build_proofs run_proofs sematest clean dump_bytecode
125+
126+
# Always run sematest regardless of dependency check
127+
FORCE: ;
128+
# Always use max. # of cores because in Makefile one cannot get the passed number of -j.
129+
# A portable way of getting the number of max. cores:
130+
# https://stackoverflow.com/a/23569003/1488216
131+
NUM_CORES_FOR_SEMATEST = $(shell getconf _NPROCESSORS_ONLN)
132+
sematest: FORCE $(OBJ) $(SRC_X86)/proofs/simulator_iclasses.ml $(SRC_X86)/proofs/simulator.native
133+
$(SRC)/tools/run-sematest.sh x86 $(NUM_CORES_FOR_SEMATEST)

proofs/hol_light/x86_64/README.md

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
[//]: # (SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0)
2+
3+
# HOL Light functional correctness proofs
4+
5+
This directory contains functional correctness proofs for all x86_64 assembly routines
6+
used in mldsa-native. The proofs were largely developed by John Harrison and Jake Massimo,
7+
and are written in the [HOL Light](https://hol-light.github.io/) theorem
8+
prover, utilizing the assembly verification infrastructure from [s2n-bignum](https://github.com/awslabs/s2n-bignum).
9+
10+
Each function is proved in a separate `.ml` file in [proofs/](proofs). Each file
11+
contains the byte code being verified, as well as the specification that is being
12+
proved.
13+
14+
## Primer
15+
16+
Proofs are 'post-hoc' in the sense that HOL-Light/s2n-bignum operate on the final object code. In particular, the means by which the code was generated need not be trusted.
17+
18+
Specifications are essentially [Hoare triples](https://en.wikipedia.org/wiki/Hoare_logic), with the noteworthy difference that the program is implicit as the content of memory at the PC; which is asserted to
19+
be the code under verification as part of the precondition. For example, the following is the specification of the `mldsa_ntt` function:
20+
21+
```ocaml
22+
(* For all (abbreviated by `!` in HOL):
23+
- a: Polynomial coefficients pointer
24+
- zetas: NTT constants pointer
25+
- x: Original polynomial coefficients
26+
- pc: Current value of Program Counter (PC)
27+
- stackpointer: Stack pointer
28+
- returnaddress: Return address on the stack *)
29+
`!a zetas x pc stackpointer returnaddress.
30+
(* Alignment and non-overlapping requirements *)
31+
aligned 32 a /\
32+
aligned 32 zetas /\
33+
nonoverlapping (word pc,LENGTH mldsa_ntt_mc) (a, 1024) /\
34+
nonoverlapping (word pc,LENGTH mldsa_ntt_mc) (zetas, 2496) /\
35+
nonoverlapping (a, 1024) (zetas, 2496) /\
36+
nonoverlapping (stackpointer,8) (a, 1024) /\
37+
nonoverlapping (stackpointer,8) (zetas, 2496)
38+
==> ensures x86
39+
(* Precondition *)
40+
(\s. (* The memory at the current PC is the byte-code of mldsa_ntt() *)
41+
bytes_loaded s (word pc) mldsa_ntt_mc /\
42+
read RIP s = word pc /\
43+
read RSP s = stackpointer /\
44+
(* The return address is on the stack *)
45+
read (memory :> bytes64 stackpointer) s = returnaddress /\
46+
(* Arguments are passed via C calling convention *)
47+
C_ARGUMENTS [a; zetas] s /\
48+
(* NTT constants are properly loaded *)
49+
wordlist_from_memory(zetas,624) s = MAP (iword: int -> 32 word) mldsa_complete_qdata /\
50+
(* Input bounds checking *)
51+
(!i. i < 256 ==> abs(ival(x i)) <= &8380416) /\
52+
(* Give a name to the memory contents at the source pointer *)
53+
!i. i < 256
54+
==> read(memory :> bytes32(word_add a (word(4 * i)))) s = x i)
55+
(* Postcondition: Eventually we reach a state where ... *)
56+
(\s.
57+
(* The PC is the return address *)
58+
read RIP s = returnaddress /\
59+
(* Stack pointer is adjusted *)
60+
read RSP s = word_add stackpointer (word 8) /\
61+
(* The integers represented by the final memory contents
62+
* are congruent to the ML-DSA forward NTT transformation
63+
* of the original coefficients, modulo 8380417, with proper bounds *)
64+
!i. i < 256
65+
==> let zi = read(memory :> bytes32(word_add a (word(4 * i)))) s in
66+
(ival zi == mldsa_forward_ntt (ival o x) i) (mod &8380417) /\
67+
abs(ival zi) <= &42035261)
68+
(* Footprint: The program may modify (only) the ABI permitted registers
69+
* and flags, stack pointer, and the memory contents at the source pointer. *)
70+
(MAYCHANGE [RSP] ,, MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,,
71+
MAYCHANGE [memory :> bytes(a,1024)])`
72+
```
73+
74+
## Reproducing the proofs
75+
76+
To reproduce the proofs, enter the nix shell via
77+
78+
```bash
79+
nix develop --experimental-features 'nix-command flakes'
80+
```
81+
82+
from mldsa-native's base directory. Then
83+
84+
```bash
85+
make -C proofs/hol_light/x86_64
86+
```
87+
88+
will build and run the proofs. Note that this make take hours even on powerful machines.
89+
90+
For convenience, you can also use `tests hol_light` which wraps the `make` invocation above; see `tests hol_light --help`.
91+
92+
## What is covered?
93+
94+
All x86_64 assembly routines used in mldsa-native are covered. Currently this includes:
95+
96+
- ML-DSA Arithmetic:
97+
* x86_64 forward NTT: [mldsa_ntt.S](mldsa/mldsa_ntt.S)
98+
99+
The NTT function is optimized using AVX2 instructions and follows the s2n-bignum x86_64 assembly verification patterns.
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/usr/bin/env bash
2+
# Copyright (c) The mldsa-native project authors
3+
# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4+
#
5+
# This tiny script just lists the names of source files for which
6+
# we have a spec and proof in HOL-Light.
7+
8+
ROOT=$(git rev-parse --show-toplevel)
9+
cd $ROOT
10+
ls -1 proofs/hol_light/x86_64/mldsa/*.S | cut -d '/' -f 5 | sed 's/\.S//'

0 commit comments

Comments
 (0)