diff options
author | Jacob Helwig <jacob@technosorcery.net> | 2018-07-16 09:09:30 -0700 |
---|---|---|
committer | Jacob Helwig <jacob@technosorcery.net> | 2018-07-16 09:09:30 -0700 |
commit | be1d15019ddd7b51965fa204f6e837f83297e7c6 (patch) | |
tree | 09e4d10797933b2b5affbdb7ff5cde78f4af8c56 | |
parent | eec1c193d9043622bf27e162dfb8ffb248ae0caa (diff) | |
download | puppet-augeas_core-be1d15019ddd7b51965fa204f6e837f83297e7c6.tar.gz puppet-augeas_core-be1d15019ddd7b51965fa204f6e837f83297e7c6.tar.bz2 |
(maint) Move array parser logic into a util module
-rw-r--r-- | lib/puppet/provider/augeas/augeas.rb | 192 | ||||
-rw-r--r-- | lib/puppet_x/augeas/util/parser.rb | 215 | ||||
-rw-r--r-- | spec/unit/puppet_x/augeas/util/parser_spec.rb | 110 |
3 files changed, 331 insertions, 186 deletions
diff --git a/lib/puppet/provider/augeas/augeas.rb b/lib/puppet/provider/augeas/augeas.rb index b64b0b3..ee52ee6 100644 --- a/lib/puppet/provider/augeas/augeas.rb +++ b/lib/puppet/provider/augeas/augeas.rb @@ -18,11 +18,13 @@ require 'strscan' require 'puppet/util' require 'puppet/util/diff' require 'puppet/util/package' +require 'puppet_x/augeas/util/parser' Puppet::Type.type(:augeas).provide(:augeas) do include Puppet::Util include Puppet::Util::Diff include Puppet::Util::Package + include PuppetX::Augeas::Util::Parser confine feature: :augeas @@ -280,7 +282,7 @@ Puppet::Type.type(:augeas).provide(:augeas) do when '==' begin arg = clause_array.shift - new_array = to_array(arg) + new_array = parse_to_array(arg) return_value = (values == new_array) rescue fail(_('Invalid array in command: %{cmd}') % { cmd: cmd_array.join(' ') }) @@ -288,7 +290,7 @@ Puppet::Type.type(:augeas).provide(:augeas) do when '!=' begin arg = clause_array.shift - new_array = to_array(arg) + new_array = parse_to_array(arg) return_value = (values != new_array) rescue fail(_('Invalid array in command: %{cmd}') % { cmd: cmd_array.join(' ') }) @@ -336,7 +338,7 @@ Puppet::Type.type(:augeas).provide(:augeas) do when '==' begin arg = clause_array.shift - new_array = to_array(arg) + new_array = parse_to_array(arg) return_value = (result == new_array) rescue fail(_('Invalid array in command: %{cmd}') % { cmd: cmd_array.join(' ') }) @@ -344,7 +346,7 @@ Puppet::Type.type(:augeas).provide(:augeas) do when '!=' begin arg = clause_array.shift - new_array = to_array(arg) + new_array = parse_to_array(arg) return_value = (result != new_array) rescue fail(_('Invalid array in command: %{cmd}') % { cmd: cmd_array.join(' ') }) @@ -571,186 +573,4 @@ Puppet::Type.type(:augeas).provide(:augeas) do end end # rubocop:enable Style/GuardClause - - def to_array(string) - s = StringScanner.new(string) - match = array_open(s) - raise "Unable to parse array. Unexpected character at: #{s.rest}" if match.nil? - - array_content = array_values(s) - - match = array_close(s) - raise "Unable to parse array. Unexpected character at: #{s.rest}" if match.nil? - - array_content - end - private :to_array - - def array_open(scanner) - scanner.scan(%r{\s*\[\s*}) - end - private :array_open - - def array_close(scanner) - scanner.scan(%r{\s*\]\s*}) - end - private :array_close - - def array_separator(scanner) - scanner.scan(%r{\s*,\s*}) - end - private :array_separator - - def single_quote_unescaped_char(scanner) - scanner.scan(%r{[^'\\]}) - end - private :single_quote_unescaped_char - - def single_quote_escaped_char(scanner) - scanner.scan(%r{\\(['\\])}) && scanner[1] - end - private :single_quote_escaped_char - - def single_quote_char(scanner) - single_quote_escaped_char(scanner) || single_quote_unescaped_char(scanner) - end - private :single_quote_char - - def double_quote_unescaped_char(scanner) - scanner.scan(%r{[^"\\]}) - end - private :double_quote_unescaped_char - - # This handles the possible Ruby escape sequences in double-quoted strings, - # except for \M-x, \M-\C-x, \M-\cx, \c\M-x, \c?, and \C-?. The full list of - # escape sequences, and their meanings is taken from: - # https://github.com/ruby/ruby/blob/90fdfec11a4a42653722e2ce2a672d6e87a57b8e/doc/syntax/literals.rdoc#strings - def double_quote_escaped_char(scanner) - match = scanner.scan(%r{\\(["\\abtnvfres0-7xu])}) - return nil if match.nil? - - case scanner[1] - when '\\' then return '\\' - when '"' then return '"' - when 'a' then return "\a" - when 'b' then return "\b" - when 't' then return "\t" - when 'n' then return "\n" - when 'v' then return "\v" - when 'f' then return "\f" - when 'r' then return "\r" - when 'e' then return "\e" - when 's' then return "\s" - when %r{[0-7]} - octal_character = scanner[1] - other_digits = scanner.scan(%r{[0-7]{1,2}}) - octal_character << other_digits unless other_digits.nil? - - return octal_character.to_i(8).chr - when 'x' - hex_character = scanner.scan(%r{[0-9a-fA-F]{1,2}}) - return nil if hex_character.nil? - - hex_character.to_i(16).chr - when 'u' - return unicode_short_hex_character(scanner) || unicode_long_hex_characters(scanner) - else - # Not a valid escape sequence as far as we're concerned. - return nil - end - end - private :double_quote_escaped_char - - def unicode_short_hex_character(scanner) - unicode_character = scanner.scan(%r{[0-9a-fA-F]{4}}) - return nil if unicode_character.nil? - - [unicode_character.hex].pack 'U' - end - private :unicode_short_hex_character - - def unicode_long_hex_characters(scanner) - unicode_string = '' - return nil unless scanner.scan(%r|{|) - - loop do - char = scanner.scan(%r{[0-9a-fA-F]{1,6}}) - break if char.nil? - unicode_string << [char.hex].pack('U') - - separator = scanner.scan(%r{\s}) - break if separator.nil? - end - - return nil if scanner.scan(%r|}|).nil? || unicode_string.empty? - - unicode_string - end - private :unicode_long_hex_characters - - def single_quoted_string(scanner) - quoted_string = '' - - match = scanner.scan(%r{'}) - return nil if match.nil? - - loop do - match = single_quote_char(scanner) - break if match.nil? - - quoted_string << match - end - - match = scanner.scan(%r{'}) - return quoted_string if match - - nil - end - private :single_quoted_string - - def double_quote_char(scanner) - double_quote_escaped_char(scanner) || double_quote_unescaped_char(scanner) - end - private :double_quote_char - - def double_quoted_string(scanner) - quoted_string = '' - - match = scanner.scan(%r{"}) - return nil if match.nil? - - loop do - match = double_quote_char(scanner) - break if match.nil? - - quoted_string << match - end - - match = scanner.scan(%r{"}) - return quoted_string if match - - nil - end - private :double_quoted_string - - def quoted_string(scanner) - single_quoted_string(scanner) || double_quoted_string(scanner) - end - private :quoted_string - - def array_values(scanner) - values = [] - - loop do - match = quoted_string(scanner) - break if match.nil? - values << match - - match = array_separator(scanner) - break if match.nil? - end - - values - end - private :array_values end diff --git a/lib/puppet_x/augeas/util/parser.rb b/lib/puppet_x/augeas/util/parser.rb new file mode 100644 index 0000000..1abf42f --- /dev/null +++ b/lib/puppet_x/augeas/util/parser.rb @@ -0,0 +1,215 @@ +# rubocop:disable Style/Documentation +module PuppetX; end +module PuppetX::Augeas; end +module PuppetX::Augeas::Util; end +# rubocop:enable Style/Documentation + +# Container for helpers to parse user provided data contained in manifests. +module PuppetX::Augeas::Util::Parser + TOKEN_ARRAY_CLOSE = %r{\s*\]\s*} + TOKEN_ARRAY_OPEN = %r{\s*\[\s*} + TOKEN_ARRAY_SEPARATOR = %r{\s*,\s*} + TOKEN_CLOSE_CURLY = %r|}| + TOKEN_DOUBLE_QUOTE = %r{"} + TOKEN_DOUBLE_QUOTE_ESCAPED_CHAR = %r{\\(["\\abtnvfres0-7xu])} + TOKEN_DOUBLE_QUOTE_UNESCAPED_CHAR = %r{[^"\\]} + TOKEN_HEX_CHAR = %r{[0-9a-fA-F]{1,2}} + TOKEN_OCTAL_CHAR = %r{[0-7]{1,3}} + TOKEN_OPEN_CURLY = %r|{| + TOKEN_SINGLE_QUOTE = %r{'} + TOKEN_SINGLE_QUOTE_ESCAPED_CHAR = %r{\\(['\\])} + TOKEN_SINGLE_QUOTE_UNESCAPED_CHAR = %r{[^'\\]} + TOKEN_SPACE = %r{\s} + TOKEN_UNICODE_LONG_HEX_CHAR = %r{[0-9a-fA-F]{1,6}} + TOKEN_UNICODE_SHORT_HEX_CHAR = %r{[0-9a-fA-F]{4}} + + # Parse a string into the (nearly) equivalent Ruby array. This only handles + # arrays with string members (double-, or single-quoted), and does not + # support the full quite of escape sequences that Ruby allows in + # double-quoted strings. + # + # @param [String] The string to be parsed. + # @return [Array<String>] The parsed array elements, including handling any + # escape sequences. + def parse_to_array(string) + s = StringScanner.new(string) + match = array_open(s) + raise "Unexpected character in array at: #{s.rest}" if match.nil? + + array_content = array_values(s) + + match = array_close(s) + raise "Unexpected character in array at: #{s.rest}" if match.nil? || !s.empty? + + array_content + end + + def array_open(scanner) + scanner.scan(TOKEN_ARRAY_OPEN) + end + private :array_open + + def array_close(scanner) + scanner.scan(TOKEN_ARRAY_CLOSE) + end + private :array_close + + def array_separator(scanner) + scanner.scan(TOKEN_ARRAY_SEPARATOR) + end + private :array_separator + + def single_quote_unescaped_char(scanner) + scanner.scan(TOKEN_SINGLE_QUOTE_UNESCAPED_CHAR) + end + private :single_quote_unescaped_char + + def single_quote_escaped_char(scanner) + scanner.scan(TOKEN_SINGLE_QUOTE_ESCAPED_CHAR) && scanner[1] + end + private :single_quote_escaped_char + + def single_quote_char(scanner) + single_quote_escaped_char(scanner) || single_quote_unescaped_char(scanner) + end + private :single_quote_char + + def double_quote_unescaped_char(scanner) + scanner.scan(TOKEN_DOUBLE_QUOTE_UNESCAPED_CHAR) + end + private :double_quote_unescaped_char + + # This handles the possible Ruby escape sequences in double-quoted strings, + # except for \M-x, \M-\C-x, \M-\cx, \c\M-x, \c?, and \C-?. The full list of + # escape sequences, and their meanings is taken from: + # https://github.com/ruby/ruby/blob/90fdfec11a4a42653722e2ce2a672d6e87a57b8e/doc/syntax/literals.rdoc#strings + def double_quote_escaped_char(scanner) + match = scanner.scan(TOKEN_DOUBLE_QUOTE_ESCAPED_CHAR) + return nil if match.nil? + + case scanner[1] + when '\\' then return '\\' + when '"' then return '"' + when 'a' then return "\a" + when 'b' then return "\b" + when 't' then return "\t" + when 'n' then return "\n" + when 'v' then return "\v" + when 'f' then return "\f" + when 'r' then return "\r" + when 'e' then return "\e" + when 's' then return "\s" + when %r{[0-7]} + # Back the scanner up by one byte so we can grab all of the potential + # octal digits at the same time. + scanner.pos = scanner.pos - 1 + octal_character = scanner.scan(TOKEN_OCTAL_CHAR) + + return octal_character.to_i(8).chr + when 'x' + hex_character = scanner.scan(TOKEN_HEX_CHAR) + return nil if hex_character.nil? + + hex_character.to_i(16).chr + when 'u' + return unicode_short_hex_character(scanner) || unicode_long_hex_characters(scanner) + else + # Not a valid escape sequence as far as we're concerned. + return nil + end + end + private :double_quote_escaped_char + + def unicode_short_hex_character(scanner) + unicode_character = scanner.scan(TOKEN_UNICODE_SHORT_HEX_CHAR) + return nil if unicode_character.nil? + + [unicode_character.hex].pack 'U' + end + private :unicode_short_hex_character + + def unicode_long_hex_characters(scanner) + unicode_string = '' + return nil unless scanner.scan(TOKEN_OPEN_CURLY) + + loop do + char = scanner.scan(TOKEN_UNICODE_LONG_HEX_CHAR) + break if char.nil? + unicode_string << [char.hex].pack('U') + + separator = scanner.scan(TOKEN_SPACE) + break if separator.nil? + end + + return nil if scanner.scan(TOKEN_CLOSE_CURLY).nil? || unicode_string.empty? + + unicode_string + end + private :unicode_long_hex_characters + + def single_quoted_string(scanner) + quoted_string = '' + + match = scanner.scan(TOKEN_SINGLE_QUOTE) + return nil if match.nil? + + loop do + match = single_quote_char(scanner) + break if match.nil? + + quoted_string << match + end + + match = scanner.scan(TOKEN_SINGLE_QUOTE) + return quoted_string if match + + nil + end + private :single_quoted_string + + def double_quote_char(scanner) + double_quote_escaped_char(scanner) || double_quote_unescaped_char(scanner) + end + private :double_quote_char + + def double_quoted_string(scanner) + quoted_string = '' + + match = scanner.scan(TOKEN_DOUBLE_QUOTE) + return nil if match.nil? + + loop do + match = double_quote_char(scanner) + break if match.nil? + + quoted_string << match + end + + match = scanner.scan(TOKEN_DOUBLE_QUOTE) + return quoted_string if match + + nil + end + private :double_quoted_string + + def quoted_string(scanner) + single_quoted_string(scanner) || double_quoted_string(scanner) + end + private :quoted_string + + def array_values(scanner) + values = [] + + loop do + match = quoted_string(scanner) + break if match.nil? + values << match + + match = array_separator(scanner) + break if match.nil? + end + + values + end + private :array_values +end diff --git a/spec/unit/puppet_x/augeas/util/parser_spec.rb b/spec/unit/puppet_x/augeas/util/parser_spec.rb new file mode 100644 index 0000000..f8b5b2b --- /dev/null +++ b/spec/unit/puppet_x/augeas/util/parser_spec.rb @@ -0,0 +1,110 @@ +require 'spec_helper' +require 'puppet_x/augeas/util/parser' + +describe PuppetX::Augeas::Util::Parser do + include described_class + + it 'handles an empty array' do + expect(parse_to_array('[]')).to eq([]) + end + + it 'handles an array with a simple single-quoted entry' do + expect(parse_to_array("['entry']")).to eq(['entry']) + end + + it 'handles an array with a simple double-quoted entry' do + expect(parse_to_array('["entry"]')).to eq(['entry']) + end + + it 'handles an array with both single- and double-quoted entries' do + expect(parse_to_array(%q(['first', "second"]))).to eq(['first', 'second']) + end + + context 'inside single-quoted strings' do + it 'allows a literal backslash' do + expect(parse_to_array("['entry\\\\here']")).to eq(['entry\\here']) + end + + it 'allows an internal single-quote' do + expect(parse_to_array("['entry\\'here']")).to eq(['entry\'here']) + end + end + + context 'inside double-quoted strings' do + it 'allows a literal backslash' do + expect(parse_to_array('["entry\\\\here"]')).to eq(['entry\\here']) + end + + it 'allows an internal double-quote' do + expect(parse_to_array('["entry\\"here"]')).to eq(['entry"here']) + end + + it 'does not require escaping a single-quote' do + expect(parse_to_array('["entry\'here"]')).to eq(["entry'here"]) + end + + it 'allows a bell character escape' do + expect(parse_to_array('["entry\\ahere"]')).to eq(["entry\ahere"]) + end + + it 'allows a backspace character escape' do + expect(parse_to_array('["entry\\bhere"]')).to eq(["entry\bhere"]) + end + + it 'allows a horizontal tab character escape' do + expect(parse_to_array('["entry\\there"]')).to eq(["entry\there"]) + end + + it 'allows a line feed character escape' do + expect(parse_to_array('["entry\\nhere"]')).to eq(["entry\nhere"]) + end + + it 'allows a vertical tab character escape' do + expect(parse_to_array('["entry\\vhere"]')).to eq(["entry\vhere"]) + end + + it 'allows a form feed character escape' do + expect(parse_to_array('["entry\\fhere"]')).to eq(["entry\fhere"]) + end + + it 'allows a carriage return character escape' do + expect(parse_to_array('["entry\\rhere"]')).to eq(["entry\rhere"]) + end + + it 'allows an escape character escape' do + expect(parse_to_array('["entry\\ehere"]')).to eq(["entry\ehere"]) + end + + it 'allows a space character escape' do + expect(parse_to_array('["entry\\shere"]')).to eq(['entry here']) + end + + it 'allows octal character escapes' do + expect(parse_to_array('["\7", "\41", "\101", "\1411"]')).to eq(["\a", '!', 'A', 'a1']) + end + + it 'allows hexadecimal character escapes with \\x' do + expect(parse_to_array('["\x7", "\x21", "\x211"]')).to eq(["\a", '!', '!1']) + end + + it 'allows single-character unicode hexadecimal character escapes with \\u' do + expect(parse_to_array('["\u2015", "\u20222"]')).to eq(["\u2015", "\u2022" << '2']) + end + + it 'allows multi-character unicode hexadecimal character escapes with \\u{...}' do + expect(parse_to_array('["\u{7}", "\u{20}", "\u{100}", "\u{2026}", "\u{1F464}", "\u{100000}", "\u{53 74 72 69 6E 67}"]')).to eq(["\a", ' ', "\u{100}", "\u{2026}", "\u{1F464}", "\u{100000}", 'String']) + end + end + + it 'fails with garbage in front of the array' do + expect { parse_to_array("junk ['good', 'array', 'here']") }.to raise_error(RuntimeError, %r{^Unexpected character in array at: junk \['good}) + end + + it 'fails with garbage in the middle of the array' do + expect { parse_to_array("['got', 'some', junk 'here']") }.to raise_error(RuntimeError, %r{^Unexpected character in array at: junk 'here'}) + end + + it 'fails with garbage after the array' do + expect { parse_to_array("['good', 'array', 'here'] junk after") }.to raise_error(RuntimeError, %r{^Unexpected character in array at: junk after}) + end +end |