lib/puppet_x/augeas/util/parser.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217

# rubocop:disable Style/Documentation
module PuppetX; end

module PuppetX::Augeas; end

module PuppetX::Augeas::Util; end
# rubocop:enable Style/Documentation

# Container for helpers to parse user provided data contained in manifests.
module PuppetX::Augeas::Util::Parser
  TOKEN_ARRAY_CLOSE                 = %r{\s*\]\s*}.freeze
  TOKEN_ARRAY_OPEN                  = %r{\s*\[\s*}.freeze
  TOKEN_ARRAY_SEPARATOR             = %r{\s*,\s*}.freeze
  TOKEN_CLOSE_CURLY                 = %r|}|.freeze
  TOKEN_DOUBLE_QUOTE                = %r{"}.freeze
  TOKEN_DOUBLE_QUOTE_ESCAPED_CHAR   = %r{\\(["\\abtnvfres0-7xu])}.freeze
  TOKEN_DOUBLE_QUOTE_UNESCAPED_CHAR = %r{[^"\\]}.freeze
  TOKEN_HEX_CHAR                    = %r{[0-9a-fA-F]{1,2}}.freeze
  TOKEN_OCTAL_CHAR                  = %r{[0-7]{1,3}}.freeze
  TOKEN_OPEN_CURLY                  = %r|{|.freeze
  TOKEN_SINGLE_QUOTE                = %r{'}.freeze
  TOKEN_SINGLE_QUOTE_ESCAPED_CHAR   = %r{\\(['\\])}.freeze
  TOKEN_SINGLE_QUOTE_UNESCAPED_CHAR = %r{[^'\\]}.freeze
  TOKEN_SPACE                       = %r{\s}.freeze
  TOKEN_UNICODE_LONG_HEX_CHAR       = %r{[0-9a-fA-F]{1,6}}.freeze
  TOKEN_UNICODE_SHORT_HEX_CHAR      = %r{[0-9a-fA-F]{4}}.freeze

  # Parse a string into the (nearly) equivalent Ruby array. This only handles
  # arrays with string members (double-, or single-quoted), and does not
  # support the full quite of escape sequences that Ruby allows in
  # double-quoted strings.
  #
  # @param [String] The string to be parsed.
  # @return [Array<String>] The parsed array elements, including handling any
  #   escape sequences.
  def parse_to_array(string)
    s = StringScanner.new(string)
    match = array_open(s)
    raise "Unexpected character in array at: #{s.rest}" if match.nil?

    array_content = array_values(s)

    match = array_close(s)
    raise "Unexpected character in array at: #{s.rest}" if match.nil? || !s.empty?

    array_content
  end

  def array_open(scanner)
    scanner.scan(TOKEN_ARRAY_OPEN)
  end
  private :array_open

  def array_close(scanner)
    scanner.scan(TOKEN_ARRAY_CLOSE)
  end
  private :array_close

  def array_separator(scanner)
    scanner.scan(TOKEN_ARRAY_SEPARATOR)
  end
  private :array_separator

  def single_quote_unescaped_char(scanner)
    scanner.scan(TOKEN_SINGLE_QUOTE_UNESCAPED_CHAR)
  end
  private :single_quote_unescaped_char

  def single_quote_escaped_char(scanner)
    scanner.scan(TOKEN_SINGLE_QUOTE_ESCAPED_CHAR) && scanner[1]
  end
  private :single_quote_escaped_char

  def single_quote_char(scanner)
    single_quote_escaped_char(scanner) || single_quote_unescaped_char(scanner)
  end
  private :single_quote_char

  def double_quote_unescaped_char(scanner)
    scanner.scan(TOKEN_DOUBLE_QUOTE_UNESCAPED_CHAR)
  end
  private :double_quote_unescaped_char

  # This handles the possible Ruby escape sequences in double-quoted strings,
  # except for \M-x, \M-\C-x, \M-\cx, \c\M-x, \c?, and \C-?. The full list of
  # escape sequences, and their meanings is taken from:
  # https://github.com/ruby/ruby/blob/90fdfec11a4a42653722e2ce2a672d6e87a57b8e/doc/syntax/literals.rdoc#strings
  def double_quote_escaped_char(scanner)
    match = scanner.scan(TOKEN_DOUBLE_QUOTE_ESCAPED_CHAR)
    return nil if match.nil?

    case scanner[1]
    when '\\' then '\\'
    when '"'  then '"'
    when 'a'  then "\a"
    when 'b'  then "\b"
    when 't'  then "\t"
    when 'n'  then "\n"
    when 'v'  then "\v"
    when 'f'  then "\f"
    when 'r'  then "\r"
    when 'e'  then "\e"
    when 's'  then "\s"
    when %r{[0-7]}
      # Back the scanner up by one byte so we can grab all of the potential
      # octal digits at the same time.
      scanner.pos = scanner.pos - 1
      octal_character = scanner.scan(TOKEN_OCTAL_CHAR)

      octal_character.to_i(8).chr
    when 'x'
      hex_character = scanner.scan(TOKEN_HEX_CHAR)
      return nil if hex_character.nil?

      hex_character.to_i(16).chr
    when 'u'
      unicode_short_hex_character(scanner) || unicode_long_hex_characters(scanner)
    else
      # Not a valid escape sequence as far as we're concerned.
      nil
    end
  end
  private :double_quote_escaped_char

  def unicode_short_hex_character(scanner)
    unicode_character = scanner.scan(TOKEN_UNICODE_SHORT_HEX_CHAR)
    return nil if unicode_character.nil?

    [unicode_character.hex].pack 'U'
  end
  private :unicode_short_hex_character

  def unicode_long_hex_characters(scanner)
    unicode_string = ''
    return nil unless scanner.scan(TOKEN_OPEN_CURLY)

    loop do
      char = scanner.scan(TOKEN_UNICODE_LONG_HEX_CHAR)
      break if char.nil?
      unicode_string << [char.hex].pack('U')

      separator = scanner.scan(TOKEN_SPACE)
      break if separator.nil?
    end

    return nil if scanner.scan(TOKEN_CLOSE_CURLY).nil? || unicode_string.empty?

    unicode_string
  end
  private :unicode_long_hex_characters

  def single_quoted_string(scanner)
    quoted_string = ''

    match = scanner.scan(TOKEN_SINGLE_QUOTE)
    return nil if match.nil?

    loop do
      match = single_quote_char(scanner)
      break if match.nil?

      quoted_string << match
    end

    match = scanner.scan(TOKEN_SINGLE_QUOTE)
    return quoted_string if match

    nil
  end
  private :single_quoted_string

  def double_quote_char(scanner)
    double_quote_escaped_char(scanner) || double_quote_unescaped_char(scanner)
  end
  private :double_quote_char

  def double_quoted_string(scanner)
    quoted_string = ''

    match = scanner.scan(TOKEN_DOUBLE_QUOTE)
    return nil if match.nil?

    loop do
      match = double_quote_char(scanner)
      break if match.nil?

      quoted_string << match
    end

    match = scanner.scan(TOKEN_DOUBLE_QUOTE)
    return quoted_string if match

    nil
  end
  private :double_quoted_string

  def quoted_string(scanner)
    single_quoted_string(scanner) || double_quoted_string(scanner)
  end
  private :quoted_string

  def array_values(scanner)
    values = []

    loop do
      match = quoted_string(scanner)
      break if match.nil?
      values << match

      match = array_separator(scanner)
      break if match.nil?
    end

    values
  end
  private :array_values
end