// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // JSON (JavaScript Object Notation) parser. // See http://www.json.org/ // The json package implements a simple parser and // representation for JSON (JavaScript Object Notation), // as defined at http://www.json.org/. package json import ( "bytes"; "strconv"; "utf8"; ) // Strings // // Double quoted with escapes: \" \\ \/ \b \f \n \r \t \uXXXX. // No literal control characters, supposedly. // Have also seen \' and embedded newlines. func _UnHex(p string, r, l int) (v int, ok bool) { v = 0; for i := r; i < l; i++ { if i >= len(p) { return 0, false } v *= 16; switch { case '0' <= p[i] && p[i] <= '9': v += int(p[i] - '0') case 'a' <= p[i] && p[i] <= 'f': v += int(p[i] - 'a' + 10) case 'A' <= p[i] && p[i] <= 'F': v += int(p[i] - 'A' + 10) default: return 0, false } } return v, true; } func _ToHex(b []byte, rune int) { const hexDigits = "0123456789abcdef"; b[0] = hexDigits[rune>>12&0xf]; b[1] = hexDigits[rune>>8&0xf]; b[2] = hexDigits[rune>>4&0xf]; b[3] = hexDigits[rune&0xf]; } // Unquote unquotes the JSON-quoted string s, // returning a raw string t. If s is not a valid // JSON-quoted string, Unquote returns with ok set to false. func Unquote(s string) (t string, ok bool) { if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { return } b := make([]byte, len(s)); w := 0; for r := 1; r < len(s)-1; { switch { case s[r] == '\\': r++; if r >= len(s)-1 { return } switch s[r] { default: return case '"', '\\', '/', '\'': b[w] = s[r]; r++; w++; case 'b': b[w] = '\b'; r++; w++; case 'f': b[w] = '\f'; r++; w++; case 'n': b[w] = '\n'; r++; w++; case 'r': b[w] = '\r'; r++; w++; case 't': b[w] = '\t'; r++; w++; case 'u': r++; rune, ok := _UnHex(s, r, r+4); if !ok { return } r += 4; w += utf8.EncodeRune(rune, b[w:]); } // Control characters are invalid, but we've seen raw \n. case s[r] < ' ' && s[r] != '\n': if s[r] == '\n' { b[w] = '\n'; r++; w++; break; } return; // ASCII case s[r] < utf8.RuneSelf: b[w] = s[r]; r++; w++; // Coerce to well-formed UTF-8. default: rune, size := utf8.DecodeRuneInString(s[r:]); r += size; w += utf8.EncodeRune(rune, b[w:]); } } return string(b[0:w]), true; } // Quote quotes the raw string s using JSON syntax, // so that Unquote(Quote(s)) = s, true. func Quote(s string) string { chr := make([]byte, 6); chr0 := chr[0:1]; b := new(bytes.Buffer); chr[0] = '"'; b.Write(chr0); for _, rune := range s { switch { case rune == '"' || rune == '\\': chr[0] = '\\'; chr[1] = byte(rune); b.Write(chr[0:2]); case rune == '\b': chr[0] = '\\'; chr[1] = 'b'; b.Write(chr[0:2]); case rune == '\f': chr[0] = '\\'; chr[1] = 'f'; b.Write(chr[0:2]); case rune == '\n': chr[0] = '\\'; chr[1] = 'n'; b.Write(chr[0:2]); case rune == '\r': chr[0] = '\\'; chr[1] = 'r'; b.Write(chr[0:2]); case rune == '\t': chr[0] = '\\'; chr[1] = 't'; b.Write(chr[0:2]); case 0x20 <= rune && rune < utf8.RuneSelf: chr[0] = byte(rune); b.Write(chr0); default: chr[0] = '\\'; chr[1] = 'u'; _ToHex(chr[2:6], rune); b.Write(chr); } } chr[0] = '"'; b.Write(chr0); return b.String(); } // _Lexer type _Lexer struct { s string; i int; kind int; token string; } func punct(c byte) bool { return c == '"' || c == '[' || c == ']' || c == ':' || c == '{' || c == '}' || c == ',' } func white(c byte) bool { return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v' } func skipwhite(p string, i int) int { for i < len(p) && white(p[i]) { i++ } return i; } func skiptoken(p string, i int) int { for i < len(p) && !punct(p[i]) && !white(p[i]) { i++ } return i; } func skipstring(p string, i int) int { for i++; i < len(p) && p[i] != '"'; i++ { if p[i] == '\\' { i++ } } if i >= len(p) { return i } return i + 1; } func (t *_Lexer) Next() { i, s := t.i, t.s; i = skipwhite(s, i); if i >= len(s) { t.kind = 0; t.token = ""; t.i = len(s); return; } c := s[i]; switch { case c == '-' || '0' <= c && c <= '9': j := skiptoken(s, i); t.kind = '1'; t.token = s[i:j]; i = j; case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': j := skiptoken(s, i); t.kind = 'a'; t.token = s[i:j]; i = j; case c == '"': j := skipstring(s, i); t.kind = '"'; t.token = s[i:j]; i = j; case c == '[', c == ']', c == ':', c == '{', c == '}', c == ',': t.kind = int(c); t.token = s[i : i+1]; i++; default: t.kind = '?'; t.token = s[i : i+1]; } t.i = i; } // Parser // // Implements parsing but not the actions. Those are // carried out by the implementation of the Builder interface. // A Builder represents the object being created. // Calling a method like Int64(i) sets that object to i. // Calling a method like Elem(i) or Key(s) creates a // new builder for a subpiece of the object (logically, // an array element or a map key). // // There are two Builders, in other files. // The JsonBuilder builds a generic Json structure // in which maps are maps. // The StructBuilder copies data into a possibly // nested data structure, using the "map keys" // as struct field names. type _Value interface{} // BUG(rsc): The json Builder interface needs to be // reconciled with the xml Builder interface. // A Builder is an interface implemented by clients and passed // to the JSON parser. It gives clients full control over the // eventual representation returned by the parser. type Builder interface { // Set value Int64(i int64); Uint64(i uint64); Float64(f float64); String(s string); Bool(b bool); Null(); Array(); Map(); // Create sub-Builders Elem(i int) Builder; Key(s string) Builder; // Flush changes to parent Builder if necessary. Flush(); } func parse(lex *_Lexer, build Builder) bool { ok := false; Switch: switch lex.kind { case 0: break case '1': // If the number is exactly an integer, use that. if i, err := strconv.Atoi64(lex.token); err == nil { build.Int64(i); ok = true; } else if i, err := strconv.Atoui64(lex.token); err == nil { build.Uint64(i); ok = true; } else // Fall back to floating point. if f, err := strconv.Atof64(lex.token); err == nil { build.Float64(f); ok = true; } case 'a': switch lex.token { case "true": build.Bool(true); ok = true; case "false": build.Bool(false); ok = true; case "null": build.Null(); ok = true; } case '"': if str, ok1 := Unquote(lex.token); ok1 { build.String(str); ok = true; } case '[': // array build.Array(); lex.Next(); n := 0; for lex.kind != ']' { if n > 0 { if lex.kind != ',' { break Switch } lex.Next(); } if !parse(lex, build.Elem(n)) { break Switch } n++; } ok = true; case '{': // map lex.Next(); build.Map(); n := 0; for lex.kind != '}' { if n > 0 { if lex.kind != ',' { break Switch } lex.Next(); } if lex.kind != '"' { break Switch } key, ok := Unquote(lex.token); if !ok { break Switch } lex.Next(); if lex.kind != ':' { break Switch } lex.Next(); if !parse(lex, build.Key(key)) { break Switch } n++; } ok = true; } if ok { lex.Next() } build.Flush(); return ok; } // Parse parses the JSON syntax string s and makes calls to // the builder to construct a parsed representation. // On success, it returns with ok set to true. // On error, it returns with ok set to false, errindx set // to the byte index in s where a syntax error occurred, // and errtok set to the offending token. func Parse(s string, builder Builder) (ok bool, errindx int, errtok string) { lex := new(_Lexer); lex.s = s; lex.Next(); if parse(lex, builder) { if lex.kind == 0 { // EOF return true, 0, "" } } return false, lex.i, lex.token; }