From 459631224023bed71686c0015b976fc1896fa41a Mon Sep 17 00:00:00 2001
From: Chris Coetzee <chriscz@users.noreply.github.com>
Date: Tue, 31 Mar 2020 14:14:01 +0200
Subject: [PATCH 1/5] Add support for unhexing values generated by auditd

---
 lib/audit_log_parser.rb       | 25 ++++++++++++++++++++++---
 spec/audit_log_parser_spec.rb | 23 ++++++++++++++++++++++-
 2 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/lib/audit_log_parser.rb b/lib/audit_log_parser.rb
index d486ed7..6ad1473 100644
--- a/lib/audit_log_parser.rb
+++ b/lib/audit_log_parser.rb
@@ -4,13 +4,16 @@
 class AuditLogParser
   class Error < StandardError; end
 
-  def self.parse(src, flatten: false)
+  # audit always uses uppercase hex digits. Fortunately addresses are generally lower-case.
+  HEX_RE = /^[A-F0-9]{8,}$/
+
+  def self.parse(src, flatten: false, unhex: false)
     src.each_line.map do |line|
-      parse_line(line, flatten: flatten)
+      parse_line(line, flatten: flatten, unhex: unhex)
     end
   end
 
-  def self.parse_line(line, flatten: false)
+  def self.parse_line(line, flatten: false, unhex: false)
     line = line.strip
 
     if line !~ /type=\w+ msg=audit\([\d.:]+\): */
@@ -22,10 +25,26 @@ def self.parse_line(line, flatten: false)
     header.sub!(/: *\z/, '')
     header = parse_header(header)
     body = parse_body(body.strip)
+
+    if unhex
+      unhex_hash!(header)
+      unhex_hash!(body)
+    end
+
     result = {'header' => header, 'body' => body}
     flatten ? flatten_hash(result) : result
   end
 
+  def self.unhex_hash!(hash)
+    hash.each do |key, value|
+      if value.kind_of?(Hash)
+        unhex_hash!(value)
+      elsif (value.length % 2) == 0 && HEX_RE.match?(value)
+        value[0..-1] = [value].pack("H*")
+      end
+    end
+  end
+
   def self.parse_header(header)
     result = {}
 
diff --git a/spec/audit_log_parser_spec.rb b/spec/audit_log_parser_spec.rb
index 125943e..e38c8d1 100644
--- a/spec/audit_log_parser_spec.rb
+++ b/spec/audit_log_parser_spec.rb
@@ -1,7 +1,18 @@
 RSpec.describe AuditLogParser do
+  let(:unhex_audit_log) do
+    {
+      %q{type=PROCTITLE msg=audit(1585655101.154:27786): proctitle=2F62696E2F7368002D6300636F6D6D616E64202D762064656269616E2D736131203E202F6465762F6E756C6C2026262064656269616E2D73613120312031} => 
+      {"header"=>{"type"=>"PROCTITLE", "msg"=>"audit(1585655101.154:27786)"},
+      "body"=>
+        {
+          "proctitle" => "/bin/sh\u0000-c\u0000command -v debian-sa1 > /dev/null && debian-sa1 1 1", 
+        }
+      }
+    }
+  end
   let(:audit_log) do
     {
-      %q{type=SYSCALL msg=audit(1364481363.243:24287): arch=c000003e syscall=2 success=no exit=-13 a0=7fffd19c5592 a1=0 a2=7fffd19c4b50 a3=a items=1 ppid=2686 pid=3538 auid=500 uid=500 gid=500 euid=500 suid=500 fsuid=500 egid=500 sgid=500 fsgid=500 tty=pts0 ses=1 comm="cat" exe="/bin/cat" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key="sshd_config"} =>
+      %q{type=SYSCALL msg=audit(1364481363.243:24287): arch=c000003e syscall=2 success=no exit=-13 a0=7fffd19c5592 a1=0 a2=7fffd19c4b50 a3=a items=1 ppid=2686 pid=3538 auid=500 uid=500 gid=500 euid=500 suid=500 fsuid=500 egid=500 sgid=500 fsgid=500  tty=pts0 ses=1 comm="cat" exe="/bin/cat" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key="sshd_config"} =>
       {"header"=>{"type"=>"SYSCALL", "msg"=>"audit(1364481363.243:24287)"},
       "body"=>
         {"arch"=>"c000003e",
@@ -95,6 +106,16 @@
       expect(AuditLogParser.parse(lines)).to eq audit_log.values
     end
 
+    specify '#parse correctly unhex proctitle' do
+      lines = unhex_audit_log.keys.join("\n")
+      expect(AuditLogParser.parse(lines, unhex: true)).to eq unhex_audit_log.values
+    end
+
+    specify '#parse unhex does not affect unhexable' do
+      lines = audit_log.keys.join("\n")
+      expect(AuditLogParser.parse(lines, unhex: true)).to eq audit_log.values
+    end
+
     context 'when flatten' do
       specify '#parse can be parsed flatly' do
         lines = audit_log.keys.join("\n")

From 6af04bb843a4e63b244fe4c1e1cbb9e142825a78 Mon Sep 17 00:00:00 2001
From: Chris Coetzee <chriscz@users.noreply.github.com>
Date: Tue, 31 Mar 2020 14:31:49 +0200
Subject: [PATCH 2/5] Compatibility fix match? -> match

---
 lib/audit_log_parser.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/audit_log_parser.rb b/lib/audit_log_parser.rb
index 6ad1473..61cbbae 100644
--- a/lib/audit_log_parser.rb
+++ b/lib/audit_log_parser.rb
@@ -39,7 +39,7 @@ def self.unhex_hash!(hash)
     hash.each do |key, value|
       if value.kind_of?(Hash)
         unhex_hash!(value)
-      elsif (value.length % 2) == 0 && HEX_RE.match?(value)
+      elsif (value.length % 2) == 0 && HEX_RE.match(value)
         value[0..-1] = [value].pack("H*")
       end
     end

From 180422fcfd602524de2402b1d921041a14676191 Mon Sep 17 00:00:00 2001
From: Chris Coetzee <chriscz@users.noreply.github.com>
Date: Tue, 31 Mar 2020 15:15:32 +0200
Subject: [PATCH 3/5] Add support for specifying specific keys to unhex

---
 lib/audit_log_parser.rb       | 18 ++++++------
 spec/audit_log_parser_spec.rb | 54 ++++++++++++++++++++++++-----------
 2 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/lib/audit_log_parser.rb b/lib/audit_log_parser.rb
index 61cbbae..d9c944d 100644
--- a/lib/audit_log_parser.rb
+++ b/lib/audit_log_parser.rb
@@ -7,13 +7,14 @@ class Error < StandardError; end
   # audit always uses uppercase hex digits. Fortunately addresses are generally lower-case.
   HEX_RE = /^[A-F0-9]{8,}$/
 
-  def self.parse(src, flatten: false, unhex: false)
+  # @param unhex_keys [Array<String>] with * meaning all
+  def self.parse(src, flatten: false, unhex: false, unhex_keys: ['*'])
     src.each_line.map do |line|
-      parse_line(line, flatten: flatten, unhex: unhex)
+      parse_line(line, flatten: flatten, unhex: unhex, unhex_keys: unhex_keys)
     end
   end
 
-  def self.parse_line(line, flatten: false, unhex: false)
+  def self.parse_line(line, flatten: false, unhex: false, unhex_keys: ['*'])
     line = line.strip
 
     if line !~ /type=\w+ msg=audit\([\d.:]+\): */
@@ -27,19 +28,20 @@ def self.parse_line(line, flatten: false, unhex: false)
     body = parse_body(body.strip)
 
     if unhex
-      unhex_hash!(header)
-      unhex_hash!(body)
+      unhex_keys = unhex_keys.include?('*') ? :all : unhex_keys
+      unhex_hash!(header, unhex_keys)
+      unhex_hash!(body, unhex_keys)
     end
 
     result = {'header' => header, 'body' => body}
     flatten ? flatten_hash(result) : result
   end
 
-  def self.unhex_hash!(hash)
+  def self.unhex_hash!(hash, unhex_keys)
     hash.each do |key, value|
       if value.kind_of?(Hash)
-        unhex_hash!(value)
-      elsif (value.length % 2) == 0 && HEX_RE.match(value)
+        unhex_hash!(value, unhex_keys)
+      elsif (unhex_keys == :all || unhex_keys.include?(key)) && (value.length % 2) == 0 && HEX_RE.match(value)
         value[0..-1] = [value].pack("H*")
       end
     end
diff --git a/spec/audit_log_parser_spec.rb b/spec/audit_log_parser_spec.rb
index e38c8d1..0922daf 100644
--- a/spec/audit_log_parser_spec.rb
+++ b/spec/audit_log_parser_spec.rb
@@ -1,15 +1,4 @@
 RSpec.describe AuditLogParser do
-  let(:unhex_audit_log) do
-    {
-      %q{type=PROCTITLE msg=audit(1585655101.154:27786): proctitle=2F62696E2F7368002D6300636F6D6D616E64202D762064656269616E2D736131203E202F6465762F6E756C6C2026262064656269616E2D73613120312031} => 
-      {"header"=>{"type"=>"PROCTITLE", "msg"=>"audit(1585655101.154:27786)"},
-      "body"=>
-        {
-          "proctitle" => "/bin/sh\u0000-c\u0000command -v debian-sa1 > /dev/null && debian-sa1 1 1", 
-        }
-      }
-    }
-  end
   let(:audit_log) do
     {
       %q{type=SYSCALL msg=audit(1364481363.243:24287): arch=c000003e syscall=2 success=no exit=-13 a0=7fffd19c5592 a1=0 a2=7fffd19c4b50 a3=a items=1 ppid=2686 pid=3538 auid=500 uid=500 gid=500 euid=500 suid=500 fsuid=500 egid=500 sgid=500 fsgid=500  tty=pts0 ses=1 comm="cat" exe="/bin/cat" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key="sshd_config"} =>
@@ -106,16 +95,12 @@
       expect(AuditLogParser.parse(lines)).to eq audit_log.values
     end
 
-    specify '#parse correctly unhex proctitle' do
-      lines = unhex_audit_log.keys.join("\n")
-      expect(AuditLogParser.parse(lines, unhex: true)).to eq unhex_audit_log.values
-    end
-
     specify '#parse unhex does not affect unhexable' do
       lines = audit_log.keys.join("\n")
       expect(AuditLogParser.parse(lines, unhex: true)).to eq audit_log.values
     end
 
+
     context 'when flatten' do
       specify '#parse can be parsed flatly' do
         lines = audit_log.keys.join("\n")
@@ -124,6 +109,43 @@
     end
   end
 
+  context 'when unhex log' do
+    let(:unhex_audit_log) do
+      {
+        %q{type=PROCTITLE msg=audit(1585655101.154:27786): proctitle=2F62696E2F7368002D6300636F6D6D616E64202D762064656269616E2D736131203E202F6465762F6E756C6C2026262064656269616E2D73613120312031} => 
+        {"header"=>{"type"=>"PROCTITLE", "msg"=>"audit(1585655101.154:27786)"},
+        "body"=>
+          {
+            "proctitle" => "/bin/sh\u0000-c\u0000command -v debian-sa1 > /dev/null && debian-sa1 1 1", 
+          }
+        }
+      }
+    end
+
+    let(:unhex_specific_audit_log) do
+      {
+        %q{type=PROCTITLE msg=audit(1585655101.154:27786): proctitle=2F62696E2F7368002D6300636F6D6D616E64202D762064656269616E2D736131203E202F6465762F6E756C6C2026262064656269616E2D73613120312031 proctitle2=2F62696E2F7368002D6300636F6D6D616E64202D762064656269616E2D736131203E202F6465762F6E756C6C2026262064656269616E2D73613120312031 } => 
+        {"header"=>{"type"=>"PROCTITLE", "msg"=>"audit(1585655101.154:27786)"},
+        "body"=>
+          {
+            "proctitle" => "2F62696E2F7368002D6300636F6D6D616E64202D762064656269616E2D736131203E202F6465762F6E756C6C2026262064656269616E2D73613120312031", 
+            "proctitle2" => "/bin/sh\u0000-c\u0000command -v debian-sa1 > /dev/null && debian-sa1 1 1", 
+          }
+        }
+      }
+    end
+
+    specify '#parse correctly unhex proctitle' do
+      lines = unhex_audit_log.keys.join("\n")
+      expect(AuditLogParser.parse(lines, unhex: true)).to eq unhex_audit_log.values
+    end
+
+    specify '#parse correctly unhex specific keys' do
+      lines = unhex_specific_audit_log.keys.join("\n")
+      expect(AuditLogParser.parse(lines, unhex: true, unhex_keys: ['proctitle2'])).to eq unhex_specific_audit_log.values
+    end
+  end
+
   context 'when invalid log' do
     let(:invalid_log) do
       {

From dd15bbcad254d9b67ad45c2a5190c2ca64161d15 Mon Sep 17 00:00:00 2001
From: Chris Coetzee <chriscz@users.noreply.github.com>
Date: Tue, 31 Mar 2020 15:42:52 +0200
Subject: [PATCH 4/5] Make minimum length configurable for unhex

---
 lib/audit_log_parser.rb       | 20 ++++++++++----------
 spec/audit_log_parser_spec.rb | 17 +++++++++++++++++
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/lib/audit_log_parser.rb b/lib/audit_log_parser.rb
index d9c944d..502eb11 100644
--- a/lib/audit_log_parser.rb
+++ b/lib/audit_log_parser.rb
@@ -4,17 +4,17 @@
 class AuditLogParser
   class Error < StandardError; end
 
-  # audit always uses uppercase hex digits. Fortunately addresses are generally lower-case.
-  HEX_RE = /^[A-F0-9]{8,}$/
 
   # @param unhex_keys [Array<String>] with * meaning all
-  def self.parse(src, flatten: false, unhex: false, unhex_keys: ['*'])
+  def self.parse(src, flatten: false, unhex: false, unhex_keys: ['*'], unhex_min_length: 8)
+    # audit always uses uppercase hex digits. Fortunately addresses are generally lower-case.
+    unhex_re = /^[A-F0-9]{#{unhex_min_length},}$/  
     src.each_line.map do |line|
-      parse_line(line, flatten: flatten, unhex: unhex, unhex_keys: unhex_keys)
+      parse_line(line, flatten: flatten, unhex: unhex, unhex_keys: unhex_keys, unhex_re: unhex_re)
     end
   end
 
-  def self.parse_line(line, flatten: false, unhex: false, unhex_keys: ['*'])
+  def self.parse_line(line, flatten: false, unhex: false, unhex_keys: ['*'], unhex_re: /^[A-F0-9]{8,}/)
     line = line.strip
 
     if line !~ /type=\w+ msg=audit\([\d.:]+\): */
@@ -29,19 +29,19 @@ def self.parse_line(line, flatten: false, unhex: false, unhex_keys: ['*'])
 
     if unhex
       unhex_keys = unhex_keys.include?('*') ? :all : unhex_keys
-      unhex_hash!(header, unhex_keys)
-      unhex_hash!(body, unhex_keys)
+      unhex_hash!(header, unhex_keys, unhex_re)
+      unhex_hash!(body, unhex_keys, unhex_re)
     end
 
     result = {'header' => header, 'body' => body}
     flatten ? flatten_hash(result) : result
   end
 
-  def self.unhex_hash!(hash, unhex_keys)
+  def self.unhex_hash!(hash, unhex_keys, unhex_re)
     hash.each do |key, value|
       if value.kind_of?(Hash)
-        unhex_hash!(value, unhex_keys)
-      elsif (unhex_keys == :all || unhex_keys.include?(key)) && (value.length % 2) == 0 && HEX_RE.match(value)
+        unhex_hash!(value, unhex_keys, unhex_re)
+      elsif (unhex_keys == :all || unhex_keys.include?(key)) && (value.length % 2) == 0 && value =~ unhex_re
         value[0..-1] = [value].pack("H*")
       end
     end
diff --git a/spec/audit_log_parser_spec.rb b/spec/audit_log_parser_spec.rb
index 0922daf..9d44547 100644
--- a/spec/audit_log_parser_spec.rb
+++ b/spec/audit_log_parser_spec.rb
@@ -135,6 +135,18 @@
       }
     end
 
+    let(:unhex_length_audit_log) do
+      {
+        %q{type=PROCTITLE msg=audit(1585655101.154:27786): proctitle=2F62696E2F7368002D6300636F6D6D616E64202D762064656269616E2D736131203E202F6465762F6E756C6C2026262064656269616E2D73613120312031 } => 
+        {"header"=>{"type"=>"PROCTITLE", "msg"=>"audit(1585655101.154:27786)"},
+        "body"=>
+          {
+            "proctitle" => "2F62696E2F7368002D6300636F6D6D616E64202D762064656269616E2D736131203E202F6465762F6E756C6C2026262064656269616E2D73613120312031", 
+          }
+        }
+      }
+    end
+
     specify '#parse correctly unhex proctitle' do
       lines = unhex_audit_log.keys.join("\n")
       expect(AuditLogParser.parse(lines, unhex: true)).to eq unhex_audit_log.values
@@ -144,6 +156,11 @@
       lines = unhex_specific_audit_log.keys.join("\n")
       expect(AuditLogParser.parse(lines, unhex: true, unhex_keys: ['proctitle2'])).to eq unhex_specific_audit_log.values
     end
+
+    specify '#parse does not unhex short keys' do
+      lines = unhex_length_audit_log.keys.join("\n")
+      expect(AuditLogParser.parse(lines, unhex: true, unhex_keys: ['proctitle'], unhex_min_length: 10000)).to eq unhex_length_audit_log.values
+    end
   end
 
   context 'when invalid log' do

From bdac65602d02492763c443dec1d973778d1b7136 Mon Sep 17 00:00:00 2001
From: Chris Coetzee <chriscz@users.noreply.github.com>
Date: Tue, 31 Mar 2020 15:59:50 +0200
Subject: [PATCH 5/5] Push down unhex_min_length as fluentd requires it in
 parse_line

---
 lib/audit_log_parser.rb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/audit_log_parser.rb b/lib/audit_log_parser.rb
index 502eb11..c0aa407 100644
--- a/lib/audit_log_parser.rb
+++ b/lib/audit_log_parser.rb
@@ -8,13 +8,13 @@ class Error < StandardError; end
   # @param unhex_keys [Array<String>] with * meaning all
   def self.parse(src, flatten: false, unhex: false, unhex_keys: ['*'], unhex_min_length: 8)
     # audit always uses uppercase hex digits. Fortunately addresses are generally lower-case.
-    unhex_re = /^[A-F0-9]{#{unhex_min_length},}$/  
     src.each_line.map do |line|
-      parse_line(line, flatten: flatten, unhex: unhex, unhex_keys: unhex_keys, unhex_re: unhex_re)
+      parse_line(line, flatten: flatten, unhex: unhex, unhex_keys: unhex_keys, unhex_min_length: unhex_min_length)
     end
   end
 
-  def self.parse_line(line, flatten: false, unhex: false, unhex_keys: ['*'], unhex_re: /^[A-F0-9]{8,}/)
+  def self.parse_line(line, flatten: false, unhex: false, unhex_keys: ['*'], unhex_min_length: 8)
+    unhex_re = /^[A-F0-9]{#{unhex_min_length},}$/  
     line = line.strip
 
     if line !~ /type=\w+ msg=audit\([\d.:]+\): */