// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package datafmt import ( "container/vector"; "go/scanner"; "go/token"; "os"; "strconv"; "strings"; ) // ---------------------------------------------------------------------------- // Parsing type parser struct { scanner.ErrorVector; scanner scanner.Scanner; pos token.Position; // token position tok token.Token; // one token look-ahead lit []byte; // token literal packs map[string]string; // PackageName -> ImportPath rules map[string]expr; // RuleName -> Expression } func (p *parser) next() { p.pos, p.tok, p.lit = p.scanner.Scan(); switch p.tok { case token.CHAN, token.FUNC, token.INTERFACE, token.MAP, token.STRUCT: // Go keywords for composite types are type names // returned by reflect. Accept them as identifiers. p.tok = token.IDENT // p.lit is already set correctly } } func (p *parser) init(filename string, src []byte) { p.ErrorVector.Reset(); p.scanner.Init(filename, src, p, scanner.AllowIllegalChars); // return '@' as token.ILLEGAL w/o error message p.next(); // initializes pos, tok, lit p.packs = make(map[string]string); p.rules = make(map[string]expr); } func (p *parser) errorExpected(pos token.Position, msg string) { msg = "expected " + msg; if pos.Offset == p.pos.Offset { // the error happened at the current position; // make the error message more specific msg += ", found '" + p.tok.String() + "'"; if p.tok.IsLiteral() { msg += " " + string(p.lit) } } p.Error(pos, msg); } func (p *parser) expect(tok token.Token) token.Position { pos := p.pos; if p.tok != tok { p.errorExpected(pos, "'"+tok.String()+"'") } p.next(); // make progress in any case return pos; } func (p *parser) parseIdentifier() string { name := string(p.lit); p.expect(token.IDENT); return name; } func (p *parser) parseTypeName() (string, bool) { pos := p.pos; name, isIdent := p.parseIdentifier(), true; if p.tok == token.PERIOD { // got a package name, lookup package if importPath, found := p.packs[name]; found { name = importPath } else { p.Error(pos, "package not declared: "+name) } p.next(); name, isIdent = name+"."+p.parseIdentifier(), false; } return name, isIdent; } // Parses a rule name and returns it. If the rule name is // a package-qualified type name, the package name is resolved. // The 2nd result value is true iff the rule name consists of a // single identifier only (and thus could be a package name). // func (p *parser) parseRuleName() (string, bool) { name, isIdent := "", false; switch p.tok { case token.IDENT: name, isIdent = p.parseTypeName() case token.DEFAULT: name = "default"; p.next(); case token.QUO: name = "/"; p.next(); default: p.errorExpected(p.pos, "rule name"); p.next(); // make progress in any case } return name, isIdent; } func (p *parser) parseString() string { s := ""; if p.tok == token.STRING { s, _ = strconv.Unquote(string(p.lit)); // Unquote may fail with an error, but only if the scanner found // an illegal string in the first place. In this case the error // has already been reported. p.next(); return s; } else { p.expect(token.STRING) } return s; } func (p *parser) parseLiteral() literal { s := strings.Bytes(p.parseString()); // A string literal may contain %-format specifiers. To simplify // and speed up printing of the literal, split it into segments // that start with "%" possibly followed by a last segment that // starts with some other character. var list vector.Vector; i0 := 0; for i := 0; i < len(s); i++ { if s[i] == '%' && i+1 < len(s) { // the next segment starts with a % format if i0 < i { // the current segment is not empty, split it off list.Push(s[i0:i]); i0 = i; } i++; // skip %; let loop skip over char after % } } // the final segment may start with any character // (it is empty iff the string is empty) list.Push(s[i0:]); // convert list into a literal lit := make(literal, list.Len()); for i := 0; i < list.Len(); i++ { lit[i] = list.At(i).([]byte) } return lit; } func (p *parser) parseField() expr { var fname string; switch p.tok { case token.ILLEGAL: if string(p.lit) != "@" { return nil } fname = "@"; p.next(); case token.MUL: fname = "*"; p.next(); case token.IDENT: fname = p.parseIdentifier() default: return nil } var ruleName string; if p.tok == token.COLON { p.next(); ruleName, _ = p.parseRuleName(); } return &field{fname, ruleName}; } func (p *parser) parseOperand() (x expr) { switch p.tok { case token.STRING: x = p.parseLiteral() case token.LPAREN: p.next(); x = p.parseExpression(); if p.tok == token.SHR { p.next(); x = &group{x, p.parseExpression()}; } p.expect(token.RPAREN); case token.LBRACK: p.next(); x = &option{p.parseExpression()}; p.expect(token.RBRACK); case token.LBRACE: p.next(); x = p.parseExpression(); var div expr; if p.tok == token.QUO { p.next(); div = p.parseExpression(); } x = &repetition{x, div}; p.expect(token.RBRACE); default: x = p.parseField() // may be nil } return x; } func (p *parser) parseSequence() expr { var list vector.Vector; for x := p.parseOperand(); x != nil; x = p.parseOperand() { list.Push(x) } // no need for a sequence if list.Len() < 2 switch list.Len() { case 0: return nil case 1: return list.At(0).(expr) } // convert list into a sequence seq := make(sequence, list.Len()); for i := 0; i < list.Len(); i++ { seq[i] = list.At(i).(expr) } return seq; } func (p *parser) parseExpression() expr { var list vector.Vector; for { x := p.parseSequence(); if x != nil { list.Push(x) } if p.tok != token.OR { break } p.next(); } // no need for an alternatives if list.Len() < 2 switch list.Len() { case 0: return nil case 1: return list.At(0).(expr) } // convert list into a alternatives alt := make(alternatives, list.Len()); for i := 0; i < list.Len(); i++ { alt[i] = list.At(i).(expr) } return alt; } func (p *parser) parseFormat() { for p.tok != token.EOF { pos := p.pos; name, isIdent := p.parseRuleName(); switch p.tok { case token.STRING: // package declaration importPath := p.parseString(); // add package declaration if !isIdent { p.Error(pos, "illegal package name: "+name) } else if _, found := p.packs[name]; !found { p.packs[name] = importPath } else { p.Error(pos, "package already declared: "+name) } case token.ASSIGN: // format rule p.next(); x := p.parseExpression(); // add rule if _, found := p.rules[name]; !found { p.rules[name] = x } else { p.Error(pos, "format rule already declared: "+name) } default: p.errorExpected(p.pos, "package declaration or format rule"); p.next(); // make progress in any case } if p.tok == token.SEMICOLON { p.next() } else { break } } p.expect(token.EOF); } func remap(p *parser, name string) string { i := strings.Index(name, "."); if i >= 0 { packageName, suffix := name[0:i], name[i:]; // lookup package if importPath, found := p.packs[packageName]; found { name = importPath + suffix } else { var invalidPos token.Position; p.Error(invalidPos, "package not declared: "+packageName); } } return name; } // Parse parses a set of format productions from source src. Custom // formatters may be provided via a map of formatter functions. If // there are no errors, the result is a Format and the error is nil. // Otherwise the format is nil and a non-empty ErrorList is returned. // func Parse(filename string, src []byte, fmap FormatterMap) (Format, os.Error) { // parse source var p parser; p.init(filename, src); p.parseFormat(); // add custom formatters, if any for name, form := range fmap { name = remap(&p, name); if _, found := p.rules[name]; !found { p.rules[name] = &custom{name, form} } else { var invalidPos token.Position; p.Error(invalidPos, "formatter already declared: "+name); } } return p.rules, p.GetError(scanner.NoMultiples); }