Skip to content

Commit 9a84015

Browse files
committed
Add digit separator support for binary, octal, and hexadecimal numbers
Implements the digit separator specification from: https://www.complang.tuwien.ac.at/ulrich/iso-prolog/digit_separators Changes: - Add digit separator support (underscore with optional whitespace/comments) for hexadecimal (0x), octal (0o), and binary (0b) number literals - Decimal numbers already had digit separator support - Add helper functions: skip_underscore_in_hexadecimal/octal/binary - Update hexadecimal_constant, octal_constant, and binary_constant functions Tests: - Add 16 Rust unit tests in src/tests/parse_tokens.rs - Add 18 Prolog integration tests in src/tests/digit_separators.pl - Add CLI test configuration in tests/scryer/cli/src_tests/digit_separators.toml - All tests verify: basic underscores, multiple underscores, whitespace after underscores, and comments between digits Examples: 0xDE_AD, 0xFF_ 00, 0xDE_ /* comment */ AD 0o7_6, 0o77_ 00, 0o1_ /* octal */ 23 0b10_11, 0b1111_ 0000, 0b10_ /* binary */ 11 Reference: mthom#3132 (comment)
1 parent e4d9692 commit 9a84015

File tree

4 files changed

+284
-42
lines changed

4 files changed

+284
-42
lines changed

src/parser/lexer.rs

Lines changed: 78 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -489,20 +489,14 @@ impl<'a, R: CharRead> Lexer<'a, R> {
489489
if hexadecimal_digit_char!(c) {
490490
let mut token = String::with_capacity(16);
491491

492-
loop {
493-
if hexadecimal_digit_char!(c) {
494-
self.skip_char(c);
495-
token.push(c);
496-
c = match self.lookahead_char() {
497-
Ok(c) => c,
498-
Err(e) if e.is_unexpected_eof() => {
499-
break;
500-
}
501-
Err(e) => return Err(e),
502-
};
503-
} else {
504-
break;
505-
}
492+
self.skip_char(c);
493+
token.push(c);
494+
c = try_nt!(token, self.skip_underscore_in_hexadecimal());
495+
496+
while hexadecimal_digit_char!(c) {
497+
token.push(c);
498+
self.skip_char(c);
499+
c = try_nt!(token, self.skip_underscore_in_hexadecimal());
506500
}
507501

508502
self.parse_integer_by_radix(&token, 16)
@@ -520,20 +514,14 @@ impl<'a, R: CharRead> Lexer<'a, R> {
520514
if octal_digit_char!(c) {
521515
let mut token = String::with_capacity(16);
522516

523-
loop {
524-
if octal_digit_char!(c) {
525-
self.skip_char(c);
526-
token.push(c);
527-
c = match self.lookahead_char() {
528-
Ok(c) => c,
529-
Err(e) if e.is_unexpected_eof() => {
530-
break;
531-
}
532-
Err(e) => return Err(e),
533-
};
534-
} else {
535-
break;
536-
}
517+
self.skip_char(c);
518+
token.push(c);
519+
c = try_nt!(token, self.skip_underscore_in_octal());
520+
521+
while octal_digit_char!(c) {
522+
token.push(c);
523+
self.skip_char(c);
524+
c = try_nt!(token, self.skip_underscore_in_octal());
537525
}
538526

539527
self.parse_integer_by_radix(&token, 8)
@@ -551,20 +539,14 @@ impl<'a, R: CharRead> Lexer<'a, R> {
551539
if binary_digit_char!(c) {
552540
let mut token = String::with_capacity(16);
553541

554-
loop {
555-
if binary_digit_char!(c) {
556-
self.skip_char(c);
557-
token.push(c);
558-
c = match self.lookahead_char() {
559-
Ok(c) => c,
560-
Err(e) if e.is_unexpected_eof() => {
561-
break;
562-
}
563-
Err(e) => return Err(e),
564-
};
565-
} else {
566-
break;
567-
}
542+
self.skip_char(c);
543+
token.push(c);
544+
c = try_nt!(token, self.skip_underscore_in_binary());
545+
546+
while binary_digit_char!(c) {
547+
token.push(c);
548+
self.skip_char(c);
549+
c = try_nt!(token, self.skip_underscore_in_binary());
568550
}
569551

570552
self.parse_integer_by_radix(&token, 2)
@@ -693,6 +675,60 @@ impl<'a, R: CharRead> Lexer<'a, R> {
693675
}
694676
}
695677

678+
fn skip_underscore_in_hexadecimal(&mut self) -> Result<char, ParserError> {
679+
let mut c = self.lookahead_char()?;
680+
681+
if c == '_' {
682+
self.skip_char(c);
683+
self.scan_for_layout()?;
684+
c = self.lookahead_char()?;
685+
686+
if hexadecimal_digit_char!(c) {
687+
Ok(c)
688+
} else {
689+
Err(ParserError::ParseBigInt(self.line_num, self.col_num))
690+
}
691+
} else {
692+
Ok(c)
693+
}
694+
}
695+
696+
fn skip_underscore_in_octal(&mut self) -> Result<char, ParserError> {
697+
let mut c = self.lookahead_char()?;
698+
699+
if c == '_' {
700+
self.skip_char(c);
701+
self.scan_for_layout()?;
702+
c = self.lookahead_char()?;
703+
704+
if octal_digit_char!(c) {
705+
Ok(c)
706+
} else {
707+
Err(ParserError::ParseBigInt(self.line_num, self.col_num))
708+
}
709+
} else {
710+
Ok(c)
711+
}
712+
}
713+
714+
fn skip_underscore_in_binary(&mut self) -> Result<char, ParserError> {
715+
let mut c = self.lookahead_char()?;
716+
717+
if c == '_' {
718+
self.skip_char(c);
719+
self.scan_for_layout()?;
720+
c = self.lookahead_char()?;
721+
722+
if binary_digit_char!(c) {
723+
Ok(c)
724+
} else {
725+
Err(ParserError::ParseBigInt(self.line_num, self.col_num))
726+
}
727+
} else {
728+
Ok(c)
729+
}
730+
}
731+
696732
fn parse_integer_by_radix(&mut self, token: &str, radix: u32) -> Result<GInteger, ParserError> {
697733
i64::from_str_radix(token, radix)
698734
.map(|n| {

src/tests/digit_separators.pl

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
:- module(digit_separators_tests, []).
2+
:- use_module(test_framework).
3+
4+
test("decimal with single underscore", (
5+
X = 1_000,
6+
X =:= 1000
7+
)).
8+
9+
test("decimal with multiple underscores", (
10+
X = 1_000_000,
11+
X =:= 1000000
12+
)).
13+
14+
test("decimal with underscore and whitespace", (
15+
X = 123_ 456,
16+
X =:= 123456
17+
)).
18+
19+
test("decimal with underscore and comment", (
20+
X = 123_ /* comment */ 456,
21+
X =:= 123456
22+
)).
23+
24+
test("hexadecimal with single underscore", (
25+
X = 0xDE_AD,
26+
X =:= 0xDEAD
27+
)).
28+
29+
test("hexadecimal with multiple underscores", (
30+
X = 0x1_2_3_4,
31+
X =:= 0x1234
32+
)).
33+
34+
test("hexadecimal with underscore and whitespace", (
35+
X = 0xFF_ 00,
36+
X =:= 0xFF00
37+
)).
38+
39+
test("hexadecimal with underscore and comment", (
40+
X = 0xDE_ /* test */ AD,
41+
X =:= 0xDEAD
42+
)).
43+
44+
test("octal with single underscore", (
45+
X = 0o7_6,
46+
X =:= 0o76
47+
)).
48+
49+
test("octal with multiple underscores", (
50+
X = 0o1_2_3,
51+
X =:= 0o123
52+
)).
53+
54+
test("octal with underscore and whitespace", (
55+
X = 0o77_ 00,
56+
X =:= 0o7700
57+
)).
58+
59+
test("octal with underscore and comment", (
60+
X = 0o1_ /* octal */ 23,
61+
X =:= 0o123
62+
)).
63+
64+
test("binary with single underscore", (
65+
X = 0b10_11,
66+
X =:= 0b1011
67+
)).
68+
69+
test("binary with multiple underscores", (
70+
X = 0b1_0_1_0,
71+
X =:= 0b1010
72+
)).
73+
74+
test("binary with underscore and whitespace", (
75+
X = 0b1111_ 0000,
76+
X =:= 0b11110000
77+
)).
78+
79+
test("binary with underscore and comment", (
80+
X = 0b10_ /* binary */ 11,
81+
X =:= 0b1011
82+
)).
83+
84+
test("large decimal number with separators", (
85+
X = 999_999_999,
86+
X =:= 999999999
87+
)).
88+
89+
test("hexadecimal case insensitive", (
90+
X1 = 0xAB_CD,
91+
X2 = 0xab_cd,
92+
X1 =:= X2
93+
)).

src/tests/parse_tokens.rs

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,115 @@ fn comment_then_eof() -> Result<(), ParserError> {
107107
assert!(read_all_tokens("% only a comment").is_err());
108108
Ok(())
109109
}
110+
111+
#[test]
112+
fn decimal_with_underscore() -> Result<(), ParserError> {
113+
let tokens = read_all_tokens("1_000")?;
114+
assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(1000)))]);
115+
Ok(())
116+
}
117+
118+
#[test]
119+
fn decimal_with_multiple_underscores() -> Result<(), ParserError> {
120+
let tokens = read_all_tokens("1_000_000")?;
121+
assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(1000000)))]);
122+
Ok(())
123+
}
124+
125+
#[test]
126+
fn decimal_with_underscore_and_whitespace() -> Result<(), ParserError> {
127+
let tokens = read_all_tokens("123_ 456")?;
128+
assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(123456)))]);
129+
Ok(())
130+
}
131+
132+
#[test]
133+
fn decimal_with_underscore_and_comment() -> Result<(), ParserError> {
134+
let tokens = read_all_tokens("123_ /* comment */ 456")?;
135+
assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(123456)))]);
136+
Ok(())
137+
}
138+
139+
#[test]
140+
fn hexadecimal_with_underscore() -> Result<(), ParserError> {
141+
let tokens = read_all_tokens("0xDE_AD")?;
142+
assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(0xDEAD)))]);
143+
Ok(())
144+
}
145+
146+
#[test]
147+
fn hexadecimal_with_multiple_underscores() -> Result<(), ParserError> {
148+
let tokens = read_all_tokens("0x1_2_3_4")?;
149+
assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(0x1234)))]);
150+
Ok(())
151+
}
152+
153+
#[test]
154+
fn hexadecimal_with_underscore_and_whitespace() -> Result<(), ParserError> {
155+
let tokens = read_all_tokens("0xFF_ 00")?;
156+
assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(0xFF00)))]);
157+
Ok(())
158+
}
159+
160+
#[test]
161+
fn hexadecimal_with_underscore_and_comment() -> Result<(), ParserError> {
162+
let tokens = read_all_tokens("0xDE_ /* test */ AD")?;
163+
assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(0xDEAD)))]);
164+
Ok(())
165+
}
166+
167+
#[test]
168+
fn octal_with_underscore() -> Result<(), ParserError> {
169+
let tokens = read_all_tokens("0o7_6")?;
170+
assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(0o76)))]);
171+
Ok(())
172+
}
173+
174+
#[test]
175+
fn octal_with_multiple_underscores() -> Result<(), ParserError> {
176+
let tokens = read_all_tokens("0o1_2_3")?;
177+
assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(0o123)))]);
178+
Ok(())
179+
}
180+
181+
#[test]
182+
fn octal_with_underscore_and_whitespace() -> Result<(), ParserError> {
183+
let tokens = read_all_tokens("0o77_ 00")?;
184+
assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(0o7700)))]);
185+
Ok(())
186+
}
187+
188+
#[test]
189+
fn octal_with_underscore_and_comment() -> Result<(), ParserError> {
190+
let tokens = read_all_tokens("0o1_ /* octal */ 23")?;
191+
assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(0o123)))]);
192+
Ok(())
193+
}
194+
195+
#[test]
196+
fn binary_with_underscore() -> Result<(), ParserError> {
197+
let tokens = read_all_tokens("0b10_11")?;
198+
assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(0b1011)))]);
199+
Ok(())
200+
}
201+
202+
#[test]
203+
fn binary_with_multiple_underscores() -> Result<(), ParserError> {
204+
let tokens = read_all_tokens("0b1_0_1_0")?;
205+
assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(0b1010)))]);
206+
Ok(())
207+
}
208+
209+
#[test]
210+
fn binary_with_underscore_and_whitespace() -> Result<(), ParserError> {
211+
let tokens = read_all_tokens("0b1111_ 0000")?;
212+
assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(0b11110000)))]);
213+
Ok(())
214+
}
215+
216+
#[test]
217+
fn binary_with_underscore_and_comment() -> Result<(), ParserError> {
218+
let tokens = read_all_tokens("0b10_ /* binary */ 11")?;
219+
assert_eq!(tokens, [Token::Literal(Literal::Fixnum(Fixnum::build_with(0b1011)))]);
220+
Ok(())
221+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
args = ["-f", "--no-add-history", "src/tests/digit_separators.pl", "-f", "-g", "use_module(library(digit_separators_tests)), digit_separators_tests:main_quiet(digit_separators_tests)"]

0 commit comments

Comments
 (0)