diff --git a/Gemfile b/Gemfile index 2573d3f..054a91d 100644 --- a/Gemfile +++ b/Gemfile @@ -2,6 +2,7 @@ source 'https://rubygems.org' gemspec gem 'json', '~>2.0', platforms: %i[mri_18 jruby] +gem 'logger' gem 'rdf', '~>3.0' gem 'rdf-vocab', '~>3.0' diff --git a/Gemfile.lock b/Gemfile.lock index 174f602..c8287cb 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -53,6 +53,7 @@ GEM json (2.7.1-java) latex-decode (0.4.0) link_header (0.0.8) + logger (1.7.0) mini_mime (1.1.5) minitest (5.20.0) multi_test (1.1.0) @@ -99,6 +100,7 @@ DEPENDENCIES gnuplot iconv json (~> 2.0) + logger minitest rake rdf (~> 3.0) diff --git a/lib/bibtex/lexer.rb b/lib/bibtex/lexer.rb index ff57337..60256ed 100644 --- a/lib/bibtex/lexer.rb +++ b/lib/bibtex/lexer.rb @@ -58,8 +58,8 @@ class Lexer string: /string/io, comment: /comment\b/io, preamble: /preamble\b/io, - key: %r{\s*[[:alpha:][:digit:] /:_!$\?\.%+;&\*'"-]+,}io, - optional_key: %r{\s*[[:alpha:][:digit:] /:_!$\?\.%+;&\*'"-]*,}io + key: %r{\s*[[:alpha:][:digit:] /:_!$\?\.%+;&\*'"{}-]+,}io, + optional_key: %r{\s*[[:alpha:][:digit:] /:_!$\?\.%+;&\*'"{}-]*,}io } MODE = Hash.new(:meta).merge( @@ -120,7 +120,7 @@ def next_token @stack.shift end - # Returns true if the lexer is currenty parsing a BibTeX object. + # Returns true if the lexer is currently parsing a BibTeX object. def bibtex_mode? MODE[@mode] == :bibtex end @@ -302,13 +302,12 @@ def enter_object @mode = @active_object = :entry push [:NAME, @scanner.matched] - # TODO: DRY - try to parse key if @scanner.scan(Lexer.patterns[:lbrace]) @brace_level += 1 push([:LBRACE, '{']) @mode = :content if @brace_level > 1 || @brace_level == 1 && active?(:comment) - push [:KEY, @scanner.matched.chop.strip] if @scanner.scan(Lexer.patterns[allow_missing_keys? ? :optional_key : :key]) + parse_key end else @@ -316,6 +315,13 @@ def enter_object end end + def parse_key + return unless @scanner.scan(Lexer.patterns[allow_missing_keys? ? :optional_key : :key]) + + key = @scanner.matched.chop.strip + push [:KEY, key] + end + # Called when parser leaves a BibTeX object. def leave_object @mode = :meta diff --git a/test/bibtex/test_lexer.rb b/test/bibtex/test_lexer.rb index 7dc100b..5904c91 100644 --- a/test/bibtex/test_lexer.rb +++ b/test/bibtex/test_lexer.rb @@ -7,33 +7,37 @@ class LexerTest < Minitest::Spec end it 'strips line breaks by default' do - Lexer.new.analyse(%(@string{ x = "foo\nbar" })).stack[-3].must_be :==, + _(Lexer.new.analyse(%(@string{ x = "foo\nbar" })).stack[-3]).must_be :==, [:STRING_LITERAL, 'foo bar'] end it 'strips whitespace after line breaks by default' do - Lexer.new.analyse(%(@string{ x = "foo\n bar" })).stack[-3].must_be :==, + _(Lexer.new.analyse(%(@string{ x = "foo\n bar" })).stack[-3]).must_be :==, [:STRING_LITERAL, 'foo bar'] end it 'matches KEY tokens' do - Lexer.new.analyse('@misc{foo, }').symbols.must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] + _(Lexer.new.analyse('@misc{foo, }').symbols).must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] end it 'matches KEY tokens with non-ascii characters' do - Lexer.new.analyse('@misc{löwe, }').symbols.must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] + _(Lexer.new.analyse('@misc{löwe, }').symbols).must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] end it 'matches KEY tokens after whitespace' do - Lexer.new.analyse('@misc{ foo, }').symbols.must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] + _(Lexer.new.analyse('@misc{ foo, }').symbols).must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] + end + + it 'matches KEY tokens with braces' do + _(Lexer.new.analyse('@misc{foo:{123}, }').symbols).must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] end it "doesn't start a comment for types starting with but not equal @comment" do - Lexer.new.analyse('@commentary{staudinger, }').symbols.must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] + _(Lexer.new.analyse('@commentary{staudinger, }').symbols).must_be :==, [:AT, :NAME, :LBRACE, :KEY, :RBRACE, false] end it "doesn't start a preamble for types starting with but not equal @preamble" do - Lexer.new.analyse('@preamblestring{ preamble }').symbols.must_be :==, [:AT, :NAME, :LBRACE, :NAME, :RBRACE, false] + _(Lexer.new.analyse('@preamblestring{ preamble }').symbols).must_be :==, [:AT, :NAME, :LBRACE, :NAME, :RBRACE, false] end end end diff --git a/test/test_bibtex.rb b/test/test_bibtex.rb index 3b9ffee..8701c88 100644 --- a/test/test_bibtex.rb +++ b/test/test_bibtex.rb @@ -101,19 +101,22 @@ def test_logger_can_be_assigned end def test_missing_key - assert_raises(BibTeX::ParseError) do + error = assert_raises(BibTeX::ParseError) do BibTeX.parse(<