>>From c688c2bd52563f9170912eac4148f438447dcd4d Mon Sep 17 00:00:00 2001 From: Michael Koziarski Date: Thu, 24 Jan 2013 09:33:37 +1300 Subject: [PATCH] Add an OkJson backend and remove the YAML backend Fixes CVE-2013-0333. The ActiveSupport::JSON::Backends::Yaml class is present but the functionality has been removed entirely. --- .../lib/active_support/json/backends/okjson.rb | 644 ++++++++++++++++++++ .../lib/active_support/json/backends/yaml.rb | 71 +--- activesupport/lib/active_support/json/decoding.rb | 2 +- activesupport/test/json/decoding_test.rb | 4 +- 4 files changed, 649 insertions(+), 72 deletions(-) create mode 100644 activesupport/lib/active_support/json/backends/okjson.rb diff --git a/activesupport/lib/active_support/json/backends/okjson.rb b/activesupport/lib/active_support/json/backends/okjson.rb new file mode 100644 index 0000000..f720a87 --- /dev/null +++ b/activesupport/lib/active_support/json/backends/okjson.rb @@ -0,0 +1,644 @@ +module ActiveSupport + # Include OkJson as a replacement for the Yaml backend + # encoding: UTF-8 + # + # Copyright 2011, 2012 Keith Rarick + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to deal + # in the Software without restriction, including without limitation the rights + # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + # copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in + # all copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + # THE SOFTWARE. + + # See https://github.com/kr/okjson for updates. + + require 'stringio' + + # Some parts adapted from + # http://golang.org/src/pkg/json/decode.go and + # http://golang.org/src/pkg/utf8/utf8.go + module OkJson + Upstream = 'LTD7LBKLZWFF7OZK' + extend self + + + # Decodes a json document in string s and + # returns the corresponding ruby value. + # String s must be valid UTF-8. If you have + # a string in some other encoding, convert + # it first. + # + # String values in the resulting structure + # will be UTF-8. + def decode(s) + ts = lex(s) + v, ts = textparse(ts) + if ts.length > 0 + raise Error, 'trailing garbage' + end + v + end + + + # Parses a "json text" in the sense of RFC 4627. + # Returns the parsed value and any trailing tokens. + # Note: this is almost the same as valparse, + # except that it does not accept atomic values. + def textparse(ts) + if ts.length < 0 + raise Error, 'empty' + end + + typ, _, val = ts[0] + case typ + when '{' then objparse(ts) + when '[' then arrparse(ts) + else + raise Error, "unexpected #{val.inspect}" + end + end + + + # Parses a "value" in the sense of RFC 4627. + # Returns the parsed value and any trailing tokens. + def valparse(ts) + if ts.length < 0 + raise Error, 'empty' + end + + typ, _, val = ts[0] + case typ + when '{' then objparse(ts) + when '[' then arrparse(ts) + when :val,:str then [val, ts[1..-1]] + else + raise Error, "unexpected #{val.inspect}" + end + end + + + # Parses an "object" in the sense of RFC 4627. + # Returns the parsed value and any trailing tokens. + def objparse(ts) + ts = eat('{', ts) + obj = {} + + if ts[0][0] == '}' + return obj, ts[1..-1] + end + + k, v, ts = pairparse(ts) + obj[k] = v + + if ts[0][0] == '}' + return obj, ts[1..-1] + end + + loop do + ts = eat(',', ts) + + k, v, ts = pairparse(ts) + obj[k] = v + + if ts[0][0] == '}' + return obj, ts[1..-1] + end + end + end + + + # Parses a "member" in the sense of RFC 4627. + # Returns the parsed values and any trailing tokens. + def pairparse(ts) + (typ, _, k), ts = ts[0], ts[1..-1] + if typ != :str + raise Error, "unexpected #{k.inspect}" + end + ts = eat(':', ts) + v, ts = valparse(ts) + [k, v, ts] + end + + + # Parses an "array" in the sense of RFC 4627. + # Returns the parsed value and any trailing tokens. + def arrparse(ts) + ts = eat('[', ts) + arr = [] + + if ts[0][0] == ']' + return arr, ts[1..-1] + end + + v, ts = valparse(ts) + arr << v + + if ts[0][0] == ']' + return arr, ts[1..-1] + end + + loop do + ts = eat(',', ts) + + v, ts = valparse(ts) + arr << v + + if ts[0][0] == ']' + return arr, ts[1..-1] + end + end + end + + + def eat(typ, ts) + if ts[0][0] != typ + raise Error, "expected #{typ} (got #{ts[0].inspect})" + end + ts[1..-1] + end + + + # Scans s and returns a list of json tokens, + # excluding white space (as defined in RFC 4627). + def lex(s) + ts = [] + while s.length > 0 + typ, lexeme, val = tok(s) + if typ == nil + raise Error, "invalid character at #{s[0,10].inspect}" + end + if typ != :space + ts << [typ, lexeme, val] + end + s = s[lexeme.length..-1] + end + ts + end + + + # Scans the first token in s and + # returns a 3-element list, or nil + # if s does not begin with a valid token. + # + # The first list element is one of + # '{', '}', ':', ',', '[', ']', + # :val, :str, and :space. + # + # The second element is the lexeme. + # + # The third element is the value of the + # token for :val and :str, otherwise + # it is the lexeme. + def tok(s) + case s[0] + when ?{ then ['{', s[0,1], s[0,1]] + when ?} then ['}', s[0,1], s[0,1]] + when ?: then [':', s[0,1], s[0,1]] + when ?, then [',', s[0,1], s[0,1]] + when ?[ then ['[', s[0,1], s[0,1]] + when ?] then [']', s[0,1], s[0,1]] + when ?n then nulltok(s) + when ?t then truetok(s) + when ?f then falsetok(s) + when ?" then strtok(s) + when Spc then [:space, s[0,1], s[0,1]] + when ?\t then [:space, s[0,1], s[0,1]] + when ?\n then [:space, s[0,1], s[0,1]] + when ?\r then [:space, s[0,1], s[0,1]] + else numtok(s) + end + end + + + def nulltok(s); s[0,4] == 'null' ? [:val, 'null', nil] : [] end + def truetok(s); s[0,4] == 'true' ? [:val, 'true', true] : [] end + def falsetok(s); s[0,5] == 'false' ? [:val, 'false', false] : [] end + + + def numtok(s) + m = /-?([1-9][0-9]+|[0-9])([.][0-9]+)?([eE][+-]?[0-9]+)?/.match(s) + if m && m.begin(0) == 0 + if m[3] && !m[2] + [:val, m[0], Integer(m[1])*(10**Integer(m[3][1..-1]))] + elsif m[2] + [:val, m[0], Float(m[0])] + else + [:val, m[0], Integer(m[0])] + end + else + [] + end + end + + + def strtok(s) + m = /"([^"\\]|\\["\/\\bfnrt]|\\u[0-9a-fA-F]{4})*"/.match(s) + if ! m + raise Error, "invalid string literal at #{abbrev(s)}" + end + [:str, m[0], unquote(m[0])] + end + + + def abbrev(s) + t = s[0,10] + p = t['`'] + t = t[0,p] if p + t = t + '...' if t.length < s.length + '`' + t + '`' + end + + + # Converts a quoted json string literal q into a UTF-8-encoded string. + # The rules are different than for Ruby, so we cannot use eval. + # Unquote will raise an error if q contains control characters. + def unquote(q) + q = q[1...-1] + a = q.dup # allocate a big enough string + rubydoesenc = false + # In ruby >= 1.9, a[w] is a codepoint, not a byte. + if a.class.method_defined?(:force_encoding) + a.force_encoding('UTF-8') + rubydoesenc = true + end + r, w = 0, 0 + while r < q.length + c = q[r] + case true + when c == ?\\ + r += 1 + if r >= q.length + raise Error, "string literal ends with a \"\\\": \"#{q}\"" + end + + case q[r] + when ?",?\\,?/,?' + a[w] = q[r] + r += 1 + w += 1 + when ?b,?f,?n,?r,?t + a[w] = Unesc[q[r]] + r += 1 + w += 1 + when ?u + r += 1 + uchar = begin + hexdec4(q[r,4]) + rescue RuntimeError => e + raise Error, "invalid escape sequence \\u#{q[r,4]}: #{e}" + end + r += 4 + if surrogate? uchar + if q.length >= r+6 + uchar1 = hexdec4(q[r+2,4]) + uchar = subst(uchar, uchar1) + if uchar != Ucharerr + # A valid pair; consume. + r += 6 + end + end + end + if rubydoesenc + a[w] = '' << uchar + w += 1 + else + w += ucharenc(a, w, uchar) + end + else + raise Error, "invalid escape char #{q[r]} in \"#{q}\"" + end + when c == ?", c < Spc + raise Error, "invalid character in string literal \"#{q}\"" + else + # Copy anything else byte-for-byte. + # Valid UTF-8 will remain valid UTF-8. + # Invalid UTF-8 will remain invalid UTF-8. + # In ruby >= 1.9, c is a codepoint, not a byte, + # in which case this is still what we want. + a[w] = c + r += 1 + w += 1 + end + end + a[0,w] + end + + + # Encodes unicode character u as UTF-8 + # bytes in string a at position i. + # Returns the number of bytes written. + def ucharenc(a, i, u) + case true + when u <= Uchar1max + a[i] = (u & 0xff).chr + 1 + when u <= Uchar2max + a[i+0] = (Utag2 | ((u>>6)&0xff)).chr + a[i+1] = (Utagx | (u&Umaskx)).chr + 2 + when u <= Uchar3max + a[i+0] = (Utag3 | ((u>>12)&0xff)).chr + a[i+1] = (Utagx | ((u>>6)&Umaskx)).chr + a[i+2] = (Utagx | (u&Umaskx)).chr + 3 + else + a[i+0] = (Utag4 | ((u>>18)&0xff)).chr + a[i+1] = (Utagx | ((u>>12)&Umaskx)).chr + a[i+2] = (Utagx | ((u>>6)&Umaskx)).chr + a[i+3] = (Utagx | (u&Umaskx)).chr + 4 + end + end + + + def hexdec4(s) + if s.length != 4 + raise Error, 'short' + end + (nibble(s[0])<<12) | (nibble(s[1])<<8) | (nibble(s[2])<<4) | nibble(s[3]) + end + + + def subst(u1, u2) + if Usurr1 <= u1 && u1 < Usurr2 && Usurr2 <= u2 && u2 < Usurr3 + return ((u1-Usurr1)<<10) | (u2-Usurr2) + Usurrself + end + return Ucharerr + end + + + def surrogate?(u) + Usurr1 <= u && u < Usurr3 + end + + + def nibble(c) + case true + when ?0 <= c && c <= ?9 then c.ord - ?0.ord + when ?a <= c && c <= ?z then c.ord - ?a.ord + 10 + when ?A <= c && c <= ?Z then c.ord - ?A.ord + 10 + else + raise Error, "invalid hex code #{c}" + end + end + + + # Encodes x into a json text. It may contain only + # Array, Hash, String, Numeric, true, false, nil. + # (Note, this list excludes Symbol.) + # X itself must be an Array or a Hash. + # No other value can be encoded, and an error will + # be raised if x contains any other value, such as + # Nan, Infinity, Symbol, and Proc, or if a Hash key + # is not a String. + # Strings contained in x must be valid UTF-8. + def encode(x) + case x + when Hash then objenc(x) + when Array then arrenc(x) + else + raise Error, 'root value must be an Array or a Hash' + end + end + + + def valenc(x) + case x + when Hash then objenc(x) + when Array then arrenc(x) + when String then strenc(x) + when Numeric then numenc(x) + when true then "true" + when false then "false" + when nil then "null" + else + raise Error, "cannot encode #{x.class}: #{x.inspect}" + end + end + + + def objenc(x) + '{' + x.map{|k,v| keyenc(k) + ':' + valenc(v)}.join(',') + '}' + end + + + def arrenc(a) + '[' + a.map{|x| valenc(x)}.join(',') + ']' + end + + + def keyenc(k) + case k + when String then strenc(k) + else + raise Error, "Hash key is not a string: #{k.inspect}" + end + end + + + def strenc(s) + t = StringIO.new + t.putc(?") + r = 0 + + # In ruby >= 1.9, s[r] is a codepoint, not a byte. + rubydoesenc = s.class.method_defined?(:encoding) + + while r < s.length + case s[r] + when ?" then t.print('\\"') + when ?\\ then t.print('\\\\') + when ?\b then t.print('\\b') + when ?\f then t.print('\\f') + when ?\n then t.print('\\n') + when ?\r then t.print('\\r') + when ?\t then t.print('\\t') + else + c = s[r] + case true + when rubydoesenc + begin + c.ord # will raise an error if c is invalid UTF-8 + t.write(c) + rescue + t.write(Ustrerr) + end + when Spc <= c && c <= ?~ + t.putc(c) + else + n = ucharcopy(t, s, r) # ensure valid UTF-8 output + r += n - 1 # r is incremented below + end + end + r += 1 + end + t.putc(?") + t.string + end + + + def numenc(x) + if ((x.nan? || x.infinite?) rescue false) + raise Error, "Numeric cannot be represented: #{x}" + end + "#{x}" + end + + + # Copies the valid UTF-8 bytes of a single character + # from string s at position i to I/O object t, and + # returns the number of bytes copied. + # If no valid UTF-8 char exists at position i, + # ucharcopy writes Ustrerr and returns 1. + def ucharcopy(t, s, i) + n = s.length - i + raise Utf8Error if n < 1 + + c0 = s[i].ord + + # 1-byte, 7-bit sequence? + if c0 < Utagx + t.putc(c0) + return 1 + end + + raise Utf8Error if c0 < Utag2 # unexpected continuation byte? + + raise Utf8Error if n < 2 # need continuation byte + c1 = s[i+1].ord + raise Utf8Error if c1 < Utagx || Utag2 <= c1 + + # 2-byte, 11-bit sequence? + if c0 < Utag3 + raise Utf8Error if ((c0&Umask2)<<6 | (c1&Umaskx)) <= Uchar1max + t.putc(c0) + t.putc(c1) + return 2 + end + + # need second continuation byte + raise Utf8Error if n < 3 + + c2 = s[i+2].ord + raise Utf8Error if c2 < Utagx || Utag2 <= c2 + + # 3-byte, 16-bit sequence? + if c0 < Utag4 + u = (c0&Umask3)<<12 | (c1&Umaskx)<<6 | (c2&Umaskx) + raise Utf8Error if u <= Uchar2max + t.putc(c0) + t.putc(c1) + t.putc(c2) + return 3 + end + + # need third continuation byte + raise Utf8Error if n < 4 + c3 = s[i+3].ord + raise Utf8Error if c3 < Utagx || Utag2 <= c3 + + # 4-byte, 21-bit sequence? + if c0 < Utag5 + u = (c0&Umask4)<<18 | (c1&Umaskx)<<12 | (c2&Umaskx)<<6 | (c3&Umaskx) + raise Utf8Error if u <= Uchar3max + t.putc(c0) + t.putc(c1) + t.putc(c2) + t.putc(c3) + return 4 + end + + raise Utf8Error + rescue Utf8Error + t.write(Ustrerr) + return 1 + end + + + class Utf8Error < ::StandardError + end + + + class Error < ::StandardError + end + + + Utagx = 0x80 # 1000 0000 + Utag2 = 0xc0 # 1100 0000 + Utag3 = 0xe0 # 1110 0000 + Utag4 = 0xf0 # 1111 0000 + Utag5 = 0xF8 # 1111 1000 + Umaskx = 0x3f # 0011 1111 + Umask2 = 0x1f # 0001 1111 + Umask3 = 0x0f # 0000 1111 + Umask4 = 0x07 # 0000 0111 + Uchar1max = (1<<7) - 1 + Uchar2max = (1<<11) - 1 + Uchar3max = (1<<16) - 1 + Ucharerr = 0xFFFD # unicode "replacement char" + Ustrerr = "\xef\xbf\xbd" # unicode "replacement char" + Usurrself = 0x10000 + Usurr1 = 0xd800 + Usurr2 = 0xdc00 + Usurr3 = 0xe000 + + Spc = ' '[0] + Unesc = {?b=>?\b, ?f=>?\f, ?n=>?\n, ?r=>?\r, ?t=>?\t} + end + + module JSON + module Backends + module OkJson + ParseError = ::ActiveSupport::OkJson::Error + extend self + + # Parses a JSON string or IO and convert it into an object + def decode(json) + if json.respond_to?(:read) + json = json.read + end + data = ActiveSupport::OkJson.decode(json) + if ActiveSupport.parse_json_times + convert_dates_from(data) + else + data + end + end + + private + def convert_dates_from(data) + case data + when nil + nil + when DATE_REGEX + begin + DateTime.parse(data) + rescue ArgumentError + data + end + when Array + data.map! { |d| convert_dates_from(d) } + when Hash + data.each do |key, value| + data[key] = convert_dates_from(value) + end + else + data + end + end + end + end + end +end diff --git a/activesupport/lib/active_support/json/backends/yaml.rb b/activesupport/lib/active_support/json/backends/yaml.rb index 4c41042..1926a3d 100644 --- a/activesupport/lib/active_support/json/backends/yaml.rb +++ b/activesupport/lib/active_support/json/backends/yaml.rb @@ -7,79 +7,12 @@ module ActiveSupport ParseError = ::StandardError extend self - # Converts a JSON string into a Ruby object. def decode(json) - YAML.load(convert_json_to_yaml(json)) - rescue ArgumentError => e - raise ParseError, "Invalid JSON string" + raise "The Yaml backend has been deprecated due to security risks, you should set ActiveSupport::JSON.backend = 'OkJson'" end protected - # Ensure that ":" and "," are always followed by a space - def convert_json_to_yaml(json) #:nodoc: - require 'strscan' unless defined? ::StringScanner - scanner, quoting, marks, pos, times = ::StringScanner.new(json), false, [], nil, [] - while scanner.scan_until(/(\\['"]|['":,\\]|\\.)/) - case char = scanner[1] - when '"', "'" - if !quoting - quoting = char - pos = scanner.pos - elsif quoting == char - if json[pos..scanner.pos-2] =~ DATE_REGEX - # found a date, track the exact positions of the quotes so we can - # overwrite them with spaces later. - times << pos << scanner.pos - end - quoting = false - end - when ":","," - marks << scanner.pos - 1 unless quoting - when "\\" - scanner.skip(/\\/) - end - end - - if marks.empty? - json.gsub(/\\([\\\/]|u[[:xdigit:]]{4})/) do - ustr = $1 - if ustr.start_with?('u') - [ustr[1..-1].to_i(16)].pack("U") - elsif ustr == '\\' - '\\\\' - else - ustr - end - end - else - left_pos = [-1].push(*marks) - right_pos = marks << scanner.pos + scanner.rest_size - output = [] - left_pos.each_with_index do |left, i| - scanner.pos = left.succ - chunk = scanner.peek(right_pos[i] - scanner.pos + 1) - # overwrite the quotes found around the dates with spaces - while times.size > 0 && times[0] <= right_pos[i] - chunk[times.shift - scanner.pos - 1] = ' ' - end - chunk.gsub!(/\\([\\\/]|u[[:xdigit:]]{4})/) do - ustr = $1 - if ustr.start_with?('u') - [ustr[1..-1].to_i(16)].pack("U") - elsif ustr == '\\' - '\\\\' - else - ustr - end - end - output << chunk - end - output = output * " " - - output.gsub!(/\\\//, '/') - output - end - end + end end end diff --git a/activesupport/lib/active_support/json/decoding.rb b/activesupport/lib/active_support/json/decoding.rb index 2182b90..806f4c5 100644 --- a/activesupport/lib/active_support/json/decoding.rb +++ b/activesupport/lib/active_support/json/decoding.rb @@ -6,7 +6,7 @@ module ActiveSupport module JSON # Listed in order of preference. - DECODERS = %w(Yajl Yaml) + DECODERS = %w(Yajl OkJson) class << self attr_reader :parse_error diff --git a/activesupport/test/json/decoding_test.rb b/activesupport/test/json/decoding_test.rb index e45851e..a7f7b46 100644 --- a/activesupport/test/json/decoding_test.rb +++ b/activesupport/test/json/decoding_test.rb @@ -42,9 +42,9 @@ class TestJSONDecoding < ActiveSupport::TestCase } # load the default JSON backend - ActiveSupport::JSON.backend = 'Yaml' + ActiveSupport::JSON.backend = 'OkJson' - backends = %w(Yaml) + backends = %w(OkJson) backends << "JSONGem" if defined?(::JSON) backends << "Yajl" if defined?(::Yajl) -- 1.7.2