// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // The doc package extracts source code documentation from a Go AST. package doc import ( "container/vector"; "go/ast"; "go/token"; "regexp"; "sort"; ) // ---------------------------------------------------------------------------- type typeDoc struct { // len(decl.Specs) == 1, and the element type is *ast.TypeSpec // if the type declaration hasn't been seen yet, decl is nil decl *ast.GenDecl; // values, factory functions, and methods associated with the type values *vector.Vector; // list of *ast.GenDecl (consts and vars) factories map[string]*ast.FuncDecl; methods map[string]*ast.FuncDecl; } // docReader accumulates documentation for a single package. // It modifies the AST: Comments (declaration documentation) // that have been collected by the DocReader are set to nil // in the respective AST nodes so that they are not printed // twice (once when printing the documentation and once when // printing the corresponding AST node). // type docReader struct { doc *ast.CommentGroup; // package documentation, if any pkgName string; values *vector.Vector; // list of *ast.GenDecl (consts and vars) types map[string]*typeDoc; funcs map[string]*ast.FuncDecl; bugs *vector.Vector; // list of *ast.CommentGroup } func (doc *docReader) init(pkgName string) { doc.pkgName = pkgName; doc.values = new(vector.Vector); doc.types = make(map[string]*typeDoc); doc.funcs = make(map[string]*ast.FuncDecl); doc.bugs = new(vector.Vector); } func (doc *docReader) addType(decl *ast.GenDecl) { spec := decl.Specs[0].(*ast.TypeSpec); typ := doc.lookupTypeDoc(spec.Name.Value); // typ should always be != nil since declared types // are always named - be conservative and check if typ != nil { // a type should be added at most once, so typ.decl // should be nil - if it isn't, simply overwrite it typ.decl = decl } } func (doc *docReader) lookupTypeDoc(name string) *typeDoc { if name == "" { return nil // no type docs for anonymous types } if tdoc, found := doc.types[name]; found { return tdoc } // type wasn't found - add one without declaration tdoc := &typeDoc{nil, new(vector.Vector), make(map[string]*ast.FuncDecl), make(map[string]*ast.FuncDecl)}; doc.types[name] = tdoc; return tdoc; } func baseTypeName(typ ast.Expr) string { switch t := typ.(type) { case *ast.Ident: // if the type is not exported, the effect to // a client is as if there were no type name if t.IsExported() { return string(t.Value) } case *ast.StarExpr: return baseTypeName(t.X) } return ""; } func (doc *docReader) addValue(decl *ast.GenDecl) { // determine if decl should be associated with a type // Heuristic: For each typed entry, determine the type name, if any. // If there is exactly one type name that is sufficiently // frequent, associate the decl with the respective type. domName := ""; domFreq := 0; prev := ""; for _, s := range decl.Specs { if v, ok := s.(*ast.ValueSpec); ok { name := ""; switch { case v.Type != nil: // a type is present; determine it's name name = baseTypeName(v.Type) case decl.Tok == token.CONST: // no type is present but we have a constant declaration; // use the previous type name (w/o more type information // we cannot handle the case of unnamed variables with // initializer expressions except for some trivial cases) name = prev } if name != "" { // entry has a named type if domName != "" && domName != name { // more than one type name - do not associate // with any type domName = ""; break; } domName = name; domFreq++; } prev = name; } } // determine values list const threshold = 0.75; values := doc.values; if domName != "" && domFreq >= int(float(len(decl.Specs))*threshold) { // typed entries are sufficiently frequent typ := doc.lookupTypeDoc(domName); if typ != nil { values = typ.values // associate with that type } } values.Push(decl); } func (doc *docReader) addFunc(fun *ast.FuncDecl) { name := fun.Name.Value; // determine if it should be associated with a type if fun.Recv != nil { // method typ := doc.lookupTypeDoc(baseTypeName(fun.Recv.Type)); if typ != nil { // exported receiver type typ.methods[name] = fun } // otherwise don't show the method // TODO(gri): There may be exported methods of non-exported types // that can be called because of exported values (consts, vars, or // function results) of that type. Could determine if that is the // case and then show those methods in an appropriate section. return; } // perhaps a factory function // determine result type, if any if len(fun.Type.Results) >= 1 { res := fun.Type.Results[0]; if len(res.Names) <= 1 { // exactly one (named or anonymous) result associated // with the first type in result signature (there may // be more than one result) tname := baseTypeName(res.Type); typ := doc.lookupTypeDoc(tname); if typ != nil { // named and exported result type // Work-around for failure of heuristic: In package os // too many functions are considered factory functions // for the Error type. Eliminate manually for now as // this appears to be the only important case in the // current library where the heuristic fails. if doc.pkgName == "os" && tname == "Error" && name != "NewError" && name != "NewSyscallError" { // not a factory function for os.Error doc.funcs[name] = fun; // treat as ordinary function return; } typ.factories[name] = fun; return; } } } // ordinary function doc.funcs[name] = fun; } func (doc *docReader) addDecl(decl ast.Decl) { switch d := decl.(type) { case *ast.GenDecl: if len(d.Specs) > 0 { switch d.Tok { case token.CONST, token.VAR: // constants and variables are always handled as a group doc.addValue(d) case token.TYPE: // types are handled individually var noPos token.Position; for _, spec := range d.Specs { // make a (fake) GenDecl node for this TypeSpec // (we need to do this here - as opposed to just // for printing - so we don't lose the GenDecl // documentation) // // TODO(gri): Consider just collecting the TypeSpec // node (and copy in the GenDecl.doc if there is no // doc in the TypeSpec - this is currently done in // makeTypeDocs below). Simpler data structures, but // would lose GenDecl documentation if the TypeSpec // has documentation as well. doc.addType(&ast.GenDecl{d.Doc, d.Pos(), token.TYPE, noPos, []ast.Spec{spec}, noPos}) // A new GenDecl node is created, no need to nil out d.Doc. } } } case *ast.FuncDecl: doc.addFunc(d) } } func copyCommentList(list []*ast.Comment) []*ast.Comment { copy := make([]*ast.Comment, len(list)); for i, c := range list { copy[i] = c } return copy; } var ( bug_markers = regexp.MustCompile("^/[/*][ \t]*BUG\\(.*\\):[ \t]*"); // BUG(uid): bug_content = regexp.MustCompile("[^ \n\r\t]+"); // at least one non-whitespace char ) // addFile adds the AST for a source file to the docReader. // Adding the same AST multiple times is a no-op. // func (doc *docReader) addFile(src *ast.File) { // add package documentation if src.Doc != nil { // TODO(gri) This won't do the right thing if there is more // than one file with package comments. Consider // using ast.MergePackageFiles which handles these // comments correctly (but currently looses BUG(...) // comments). doc.doc = src.Doc; src.Doc = nil; // doc consumed - remove from ast.File node } // add all declarations for _, decl := range src.Decls { doc.addDecl(decl) } // collect BUG(...) comments for c := src.Comments; c != nil; c = c.Next { text := c.List[0].Text; cstr := string(text); if m := bug_markers.ExecuteString(cstr); len(m) > 0 { // found a BUG comment; maybe empty if bstr := cstr[m[1]:]; bug_content.MatchString(bstr) { // non-empty BUG comment; collect comment without BUG prefix list := copyCommentList(c.List); list[0].Text = text[m[1]:]; doc.bugs.Push(&ast.CommentGroup{list, nil}); } } } src.Comments = nil; // consumed unassociated comments - remove from ast.File node } func NewFileDoc(file *ast.File) *PackageDoc { var r docReader; r.init(file.Name.Value); r.addFile(file); return r.newDoc("", "", nil); } func NewPackageDoc(pkg *ast.Package, importpath string) *PackageDoc { var r docReader; r.init(pkg.Name); filenames := make([]string, len(pkg.Files)); i := 0; for filename, f := range pkg.Files { r.addFile(f); filenames[i] = filename; i++; } return r.newDoc(importpath, pkg.Path, filenames); } // ---------------------------------------------------------------------------- // Conversion to external representation // ValueDoc is the documentation for a group of declared // values, either vars or consts. // type ValueDoc struct { Doc string; Decl *ast.GenDecl; order int; } type sortValueDoc []*ValueDoc func (p sortValueDoc) Len() int { return len(p) } func (p sortValueDoc) Swap(i, j int) { p[i], p[j] = p[j], p[i] } func declName(d *ast.GenDecl) string { if len(d.Specs) != 1 { return "" } switch v := d.Specs[0].(type) { case *ast.ValueSpec: return v.Names[0].Value case *ast.TypeSpec: return v.Name.Value } return ""; } func (p sortValueDoc) Less(i, j int) bool { // sort by name // pull blocks (name = "") up to top // in original order if ni, nj := declName(p[i].Decl), declName(p[j].Decl); ni != nj { return ni < nj } return p[i].order < p[j].order; } func makeValueDocs(v *vector.Vector, tok token.Token) []*ValueDoc { d := make([]*ValueDoc, v.Len()); // big enough in any case n := 0; for i := range d { decl := v.At(i).(*ast.GenDecl); if decl.Tok == tok { d[n] = &ValueDoc{CommentText(decl.Doc), decl, i}; n++; decl.Doc = nil; // doc consumed - removed from AST } } d = d[0:n]; sort.Sort(sortValueDoc(d)); return d; } // FuncDoc is the documentation for a func declaration, // either a top-level function or a method function. // type FuncDoc struct { Doc string; Recv ast.Expr; // TODO(rsc): Would like string here Name string; Decl *ast.FuncDecl; } type sortFuncDoc []*FuncDoc func (p sortFuncDoc) Len() int { return len(p) } func (p sortFuncDoc) Swap(i, j int) { p[i], p[j] = p[j], p[i] } func (p sortFuncDoc) Less(i, j int) bool { return p[i].Name < p[j].Name } func makeFuncDocs(m map[string]*ast.FuncDecl) []*FuncDoc { d := make([]*FuncDoc, len(m)); i := 0; for _, f := range m { doc := new(FuncDoc); doc.Doc = CommentText(f.Doc); f.Doc = nil; // doc consumed - remove from ast.FuncDecl node if f.Recv != nil { doc.Recv = f.Recv.Type } doc.Name = f.Name.Value; doc.Decl = f; d[i] = doc; i++; } sort.Sort(sortFuncDoc(d)); return d; } // TypeDoc is the documentation for a declared type. // Consts and Vars are sorted lists of constants and variables of (mostly) that type. // Factories is a sorted list of factory functions that return that type. // Methods is a sorted list of method functions on that type. type TypeDoc struct { Doc string; Type *ast.TypeSpec; Consts []*ValueDoc; Vars []*ValueDoc; Factories []*FuncDoc; Methods []*FuncDoc; Decl *ast.GenDecl; order int; } type sortTypeDoc []*TypeDoc func (p sortTypeDoc) Len() int { return len(p) } func (p sortTypeDoc) Swap(i, j int) { p[i], p[j] = p[j], p[i] } func (p sortTypeDoc) Less(i, j int) bool { // sort by name // pull blocks (name = "") up to top // in original order if ni, nj := p[i].Type.Name.Value, p[j].Type.Name.Value; ni != nj { return ni < nj } return p[i].order < p[j].order; } // NOTE(rsc): This would appear not to be correct for type ( ) // blocks, but the doc extractor above has split them into // individual declarations. func (doc *docReader) makeTypeDocs(m map[string]*typeDoc) []*TypeDoc { d := make([]*TypeDoc, len(m)); i := 0; for _, old := range m { // all typeDocs should have a declaration associated with // them after processing an entire package - be conservative // and check if decl := old.decl; decl != nil { typespec := decl.Specs[0].(*ast.TypeSpec); t := new(TypeDoc); doc := typespec.Doc; typespec.Doc = nil; // doc consumed - remove from ast.TypeSpec node if doc == nil { // no doc associated with the spec, use the declaration doc, if any doc = decl.Doc } decl.Doc = nil; // doc consumed - remove from ast.Decl node t.Doc = CommentText(doc); t.Type = typespec; t.Consts = makeValueDocs(old.values, token.CONST); t.Vars = makeValueDocs(old.values, token.VAR); t.Factories = makeFuncDocs(old.factories); t.Methods = makeFuncDocs(old.methods); t.Decl = old.decl; t.order = i; d[i] = t; i++; } else { // no corresponding type declaration found - move any associated // values, factory functions, and methods back to the top-level // so that they are not lost (this should only happen if a package // file containing the explicit type declaration is missing or if // an unqualified type name was used after a "." import) // 1) move values doc.values.AppendVector(old.values); // 2) move factory functions for name, f := range old.factories { doc.funcs[name] = f } // 3) move methods for name, f := range old.methods { // don't overwrite functions with the same name if _, found := doc.funcs[name]; !found { doc.funcs[name] = f } } } } d = d[0:i]; // some types may have been ignored sort.Sort(sortTypeDoc(d)); return d; } func makeBugDocs(v *vector.Vector) []string { d := make([]string, v.Len()); for i := 0; i < v.Len(); i++ { d[i] = CommentText(v.At(i).(*ast.CommentGroup)) } return d; } // PackageDoc is the documentation for an entire package. // type PackageDoc struct { PackageName string; ImportPath string; FilePath string; Filenames []string; Doc string; Consts []*ValueDoc; Types []*TypeDoc; Vars []*ValueDoc; Funcs []*FuncDoc; Bugs []string; } // newDoc returns the accumulated documentation for the package. // func (doc *docReader) newDoc(importpath, filepath string, filenames []string) *PackageDoc { p := new(PackageDoc); p.PackageName = doc.pkgName; p.ImportPath = importpath; p.FilePath = filepath; sort.SortStrings(filenames); p.Filenames = filenames; p.Doc = CommentText(doc.doc); // makeTypeDocs may extend the list of doc.values and // doc.funcs and thus must be called before any other // function consuming those lists p.Types = doc.makeTypeDocs(doc.types); p.Consts = makeValueDocs(doc.values, token.CONST); p.Vars = makeValueDocs(doc.values, token.VAR); p.Funcs = makeFuncDocs(doc.funcs); p.Bugs = makeBugDocs(doc.bugs); return p; } // ---------------------------------------------------------------------------- // Filtering by name // Does s look like a regular expression? func isRegexp(s string) bool { metachars := ".(|)*+?^$[]"; for _, c := range s { for _, m := range metachars { if c == m { return true } } } return false; } func match(s string, names []string) bool { for _, t := range names { if isRegexp(t) { if matched, _ := regexp.MatchString(t, s); matched { return true } } if s == t { return true } } return false; } func matchDecl(d *ast.GenDecl, names []string) bool { for _, d := range d.Specs { switch v := d.(type) { case *ast.ValueSpec: for _, name := range v.Names { if match(name.Value, names) { return true } } case *ast.TypeSpec: if match(v.Name.Value, names) { return true } } } return false; } func filterValueDocs(a []*ValueDoc, names []string) []*ValueDoc { w := 0; for _, vd := range a { if matchDecl(vd.Decl, names) { a[w] = vd; w++; } } return a[0:w]; } func filterFuncDocs(a []*FuncDoc, names []string) []*FuncDoc { w := 0; for _, fd := range a { if match(fd.Name, names) { a[w] = fd; w++; } } return a[0:w]; } func filterTypeDocs(a []*TypeDoc, names []string) []*TypeDoc { w := 0; for _, td := range a { n := 0; // number of matches if matchDecl(td.Decl, names) { n = 1 } else { // type name doesn't match, but we may have matching consts, vars, factories or methods td.Consts = filterValueDocs(td.Consts, names); td.Vars = filterValueDocs(td.Vars, names); td.Factories = filterFuncDocs(td.Factories, names); td.Methods = filterFuncDocs(td.Methods, names); n += len(td.Consts) + len(td.Vars) + len(td.Factories) + len(td.Methods); } if n > 0 { a[w] = td; w++; } } return a[0:w]; } // Filter eliminates information from d that is not // about one of the given names. // TODO: Recognize "Type.Method" as a name. // TODO(r): maybe precompile the regexps. // func (p *PackageDoc) Filter(names []string) { p.Consts = filterValueDocs(p.Consts, names); p.Vars = filterValueDocs(p.Vars, names); p.Types = filterTypeDocs(p.Types, names); p.Funcs = filterFuncDocs(p.Funcs, names); p.Doc = ""; // don't show top-level package doc }