From e3820315b009209965a4be5bf786013ab6e1cd3b Mon Sep 17 00:00:00 2001 From: Mario Perez Date: Mon, 19 Jul 2021 16:21:36 +0200 Subject: [PATCH 1/5] feat: allows braces on citekey --- lib/bibtex/lexer.rb | 11 +++++++---- test/bibtex/test_lexer.rb | 18 +++++++++++------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/lib/bibtex/lexer.rb b/lib/bibtex/lexer.rb index ff57337..f255aa9 100644 --- a/lib/bibtex/lexer.rb +++ b/lib/bibtex/lexer.rb @@ -58,8 +58,8 @@ class Lexer string: /string/io, comment: /comment\b/io, preamble: /preamble\b/io, - key: %r{\s*[[:alpha:][:digit:] /:_!$\?\.%+;&\*'"-]+,}io, - optional_key: %r{\s*[[:alpha:][:digit:] /:_!$\?\.%+;&\*'"-]*,}io + key: %r{\s*[[:alpha:][:digit:] /:_!$\?\.%+;&\*'"-\{\}]+,}io, + optional_key: %r{\s*[[:alpha:][:digit:] /:_!$\?\.%+;&\*'"-\{\}]*,}io } MODE = Hash.new(:meta).merge( @@ -120,7 +120,7 @@ def next_token @stack.shift end - # Returns true if the lexer is currenty parsing a BibTeX object. + # Returns true if the lexer is currently parsing a BibTeX object. def bibtex_mode? MODE[@mode] == :bibtex end @@ -308,7 +308,10 @@ def enter_object push([:LBRACE, '{']) @mode = :content if @brace_level > 1 || @brace_level == 1 && active?(:comment) - push [:KEY, @scanner.matched.chop.strip] if @scanner.scan(Lexer.patterns[allow_missing_keys? ? :optional_key : :key]) + if @scanner.scan(Lexer.patterns[allow_missing_keys? ? :optional_key : :key]) + key = @scanner.matched.chop.strip.tr('{}', '') + push [:KEY, key] + end end else diff --git a/test/bibtex/test_lexer.rb b/test/bibtex/test_lexer.rb index 7dc100b..5904c91 100644 --- a/test/bibtex/test_lexer.rb +++ b/test/bibtex/test_lexer.rb @@ -7,33 +7,37 @@ class LexerTest < Minitest::Spec end it 'strips line breaks by default' do - Lexer.new.analyse(%(@string{ x = "foo\nbar" })).stack[-3].must_be :==, + _(Lexer.new.analyse(%(@string{ x = "foo\nbar" })).stack[-3]).must_be :==, [:STRING_LITERAL, 'foo bar'] end it 'strips whitespace after line breaks by default' do - Lexer.new.analyse(%(@string{ x = "foo\n bar" })).stack[-3].must_be :==, + _(Lexer.new.analyse(%(@string{ x = "foo\n bar" })).stack[-3]).must_be :==, [:STRING_LITERAL, 'foo bar'] end it 'matches KEY tokens' do - Lexer.new.analyse('@misc{foo, }').symbols.must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] + _(Lexer.new.analyse('@misc{foo, }').symbols).must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] end it 'matches KEY tokens with non-ascii characters' do - Lexer.new.analyse('@misc{löwe, }').symbols.must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] + _(Lexer.new.analyse('@misc{löwe, }').symbols).must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] end it 'matches KEY tokens after whitespace' do - Lexer.new.analyse('@misc{ foo, }').symbols.must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] + _(Lexer.new.analyse('@misc{ foo, }').symbols).must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] + end + + it 'matches KEY tokens with braces' do + _(Lexer.new.analyse('@misc{foo:{123}, }').symbols).must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] end it "doesn't start a comment for types starting with but not equal @comment" do - Lexer.new.analyse('@commentary{staudinger, }').symbols.must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] + _(Lexer.new.analyse('@commentary{staudinger, }').symbols).must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] end it "doesn't start a preamble for types starting with but not equal @preamble" do - Lexer.new.analyse('@preamblestring{ preamble }').symbols.must_be :==, [:AT, :NAME, :LBRACE, :NAME, :RBRACE, false] + _(Lexer.new.analyse('@preamblestring{ preamble }').symbols).must_be :==, [:AT, :NAME, :LBRACE, :NAME, :RBRACE, false] end end end From caf2253ea50e1896124f5a949c46d5c6953ab3ce Mon Sep 17 00:00:00 2001 From: Mario Perez Date: Wed, 21 Jul 2021 10:14:34 +0200 Subject: [PATCH 2/5] refactor: extracts parse_key - do not remove braces - fixes test helper --- lib/bibtex/lexer.rb | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/bibtex/lexer.rb b/lib/bibtex/lexer.rb index f255aa9..2902485 100644 --- a/lib/bibtex/lexer.rb +++ b/lib/bibtex/lexer.rb @@ -302,16 +302,12 @@ def enter_object @mode = @active_object = :entry push [:NAME, @scanner.matched] - # TODO: DRY - try to parse key if @scanner.scan(Lexer.patterns[:lbrace]) @brace_level += 1 push([:LBRACE, '{']) @mode = :content if @brace_level > 1 || @brace_level == 1 && active?(:comment) - if @scanner.scan(Lexer.patterns[allow_missing_keys? ? :optional_key : :key]) - key = @scanner.matched.chop.strip.tr('{}', '') - push [:KEY, key] - end + parse_key end else @@ -319,6 +315,13 @@ def enter_object end end + def parse_key + return unless @scanner.scan(Lexer.patterns[allow_missing_keys? ? :optional_key : :key]) + + key = @scanner.matched.chop.strip + push [:KEY, key] + end + # Called when parser leaves a BibTeX object. def leave_object @mode = :meta From 6f5734b559b7c6c2f26a1c44e06e35a8f9522307 Mon Sep 17 00:00:00 2001 From: Mario Perez Date: Fri, 22 Nov 2024 21:27:47 +0100 Subject: [PATCH 3/5] refactor: switch placement of curly braces --- lib/bibtex/lexer.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/bibtex/lexer.rb b/lib/bibtex/lexer.rb index 2902485..e88fef7 100644 --- a/lib/bibtex/lexer.rb +++ b/lib/bibtex/lexer.rb @@ -58,8 +58,8 @@ class Lexer string: /string/io, comment: /comment\b/io, preamble: /preamble\b/io, - key: %r{\s*[[:alpha:][:digit:] /:_!$\?\.%+;&\*'"-\{\}]+,}io, - optional_key: %r{\s*[[:alpha:][:digit:] /:_!$\?\.%+;&\*'"-\{\}]*,}io + key: %r{\s*[[:alpha:][:digit:] /:_!$\?\.%+;&\*'"\{\}-]+,}io, + optional_key: %r{\s*[[:alpha:][:digit:] /:_!$\?\.%+;&\*'"\{\}-]*,}io } MODE = Hash.new(:meta).merge( From 6bac427e911437df43224cb8b59c64bc85b30cc5 Mon Sep 17 00:00:00 2001 From: Mario Perez Date: Sat, 19 Jul 2025 17:09:40 +0200 Subject: [PATCH 4/5] adds logger to silence warning --- Gemfile | 1 + Gemfile.lock | 2 ++ 2 files changed, 3 insertions(+) diff --git a/Gemfile b/Gemfile index 2573d3f..054a91d 100644 --- a/Gemfile +++ b/Gemfile @@ -2,6 +2,7 @@ source 'https://rubygems.org' gemspec gem 'json', '~>2.0', platforms: %i[mri_18 jruby] +gem 'logger' gem 'rdf', '~>3.0' gem 'rdf-vocab', '~>3.0' diff --git a/Gemfile.lock b/Gemfile.lock index 174f602..c8287cb 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -53,6 +53,7 @@ GEM json (2.7.1-java) latex-decode (0.4.0) link_header (0.0.8) + logger (1.7.0) mini_mime (1.1.5) minitest (5.20.0) multi_test (1.1.0) @@ -99,6 +100,7 @@ DEPENDENCIES gnuplot iconv json (~> 2.0) + logger minitest rake rdf (~> 3.0) From d11047b2f3ddb9c3903c86bf64b5f62fd74df400 Mon Sep 17 00:00:00 2001 From: Mario Perez Date: Sat, 19 Jul 2025 17:37:56 +0200 Subject: [PATCH 5/5] fixes test --- lib/bibtex/lexer.rb | 4 ++-- test/test_bibtex.rb | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/bibtex/lexer.rb b/lib/bibtex/lexer.rb index e88fef7..60256ed 100644 --- a/lib/bibtex/lexer.rb +++ b/lib/bibtex/lexer.rb @@ -58,8 +58,8 @@ class Lexer string: /string/io, comment: /comment\b/io, preamble: /preamble\b/io, - key: %r{\s*[[:alpha:][:digit:] /:_!$\?\.%+;&\*'"\{\}-]+,}io, - optional_key: %r{\s*[[:alpha:][:digit:] /:_!$\?\.%+;&\*'"\{\}-]*,}io + key: %r{\s*[[:alpha:][:digit:] /:_!$\?\.%+;&\*'"{}-]+,}io, + optional_key: %r{\s*[[:alpha:][:digit:] /:_!$\?\.%+;&\*'"{}-]*,}io } MODE = Hash.new(:meta).merge( diff --git a/test/test_bibtex.rb b/test/test_bibtex.rb index 3b9ffee..8701c88 100644 --- a/test/test_bibtex.rb +++ b/test/test_bibtex.rb @@ -101,19 +101,22 @@ def test_logger_can_be_assigned end def test_missing_key - assert_raises(BibTeX::ParseError) do + error = assert_raises(BibTeX::ParseError) do BibTeX.parse(<