// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package patch implements parsing and execution of the textual and // binary patch descriptions used by version control tools such as // CVS, Git, Mercurial, and Subversion. package patch import ( "bytes"; "os"; "path"; "strings"; ) // A Set represents a set of patches to be applied as a single atomic unit. // Patch sets are often preceded by a descriptive header. type Set struct { Header string; // free-form text File []*File; } // A File represents a collection of changes to be made to a single file. type File struct { Verb Verb; Src string; // source for Verb == Copy, Verb == Rename Dst string; OldMode, NewMode int; // 0 indicates not used Diff; // changes to data; == NoDiff if operation does not edit file } // A Verb is an action performed on a file. type Verb string const ( Add Verb = "add"; Copy Verb = "copy"; Delete Verb = "delete"; Edit Verb = "edit"; Rename Verb = "rename"; ) // A Diff is any object that describes changes to transform // an old byte stream to a new one. type Diff interface { // Apply applies the changes listed in the diff // to the string s, returning the new version of the string. // Note that the string s need not be a text string. Apply(old []byte) (new []byte, err os.Error); } // NoDiff is a no-op Diff implementation: it passes the // old data through unchanged. var NoDiff Diff = noDiffType(0) type noDiffType int func (noDiffType) Apply(old []byte) ([]byte, os.Error) { return old, nil } // A SyntaxError represents a syntax error encountered while parsing a patch. type SyntaxError string func (e SyntaxError) String() string { return string(e) } var newline = []byte{'\n'} // Parse patches the patch text to create a patch Set. // The patch text typically comprises a textual header and a sequence // of file patches, as would be generated by CVS, Subversion, // Mercurial, or Git. func Parse(text []byte) (*Set, os.Error) { // Split text into files. // CVS and Subversion begin new files with // Index: file name. // ================== // diff -u blah blah // // Mercurial and Git use // diff [--git] a/file/path b/file/path. // // First look for Index: lines. If none, fall back on diff lines. text, files := sections(text, "Index: "); if len(files) == 0 { text, files = sections(text, "diff ") } set := &Set{string(text), make([]*File, len(files))}; // Parse file header and then // parse files into patch chunks. // Each chunk begins with @@. for i, raw := range files { p := new(File); set.File[i] = p; // First line of hdr is the Index: that // begins the section. After that is the file name. s, raw, _ := getLine(raw, 1); if hasPrefix(s, "Index: ") { p.Dst = string(bytes.TrimSpace(s[7:])); goto HaveName; } else if hasPrefix(s, "diff ") { str := string(bytes.TrimSpace(s)); i := strings.LastIndex(str, " b/"); if i >= 0 { p.Dst = str[i+3:]; goto HaveName; } } return nil, SyntaxError("unexpected patch header line: " + string(s)); HaveName: p.Dst = path.Clean(p.Dst); if strings.HasPrefix(p.Dst, "../") || strings.HasPrefix(p.Dst, "/") { return nil, SyntaxError("invalid path: " + p.Dst) } // Parse header lines giving file information: // new file mode %o - file created // deleted file mode %o - file deleted // old file mode %o - file mode changed // new file mode %o - file mode changed // rename from %s - file renamed from other file // rename to %s // copy from %s - file copied from other file // copy to %s p.Verb = Edit; for len(raw) > 0 { oldraw := raw; var l []byte; l, raw, _ = getLine(raw, 1); l = bytes.TrimSpace(l); if m, s, ok := atoi(l, "new file mode ", 8); ok && len(s) == 0 { p.NewMode = m; p.Verb = Add; continue; } if m, s, ok := atoi(l, "deleted file mode ", 8); ok && len(s) == 0 { p.OldMode = m; p.Verb = Delete; p.Src = p.Dst; p.Dst = ""; continue; } if m, s, ok := atoi(l, "old file mode ", 8); ok && len(s) == 0 { // usually implies p.Verb = "rename" or "copy" // but we'll get that from the rename or copy line. p.OldMode = m; continue; } if m, s, ok := atoi(l, "old mode ", 8); ok && len(s) == 0 { p.OldMode = m; continue; } if m, s, ok := atoi(l, "new mode ", 8); ok && len(s) == 0 { p.NewMode = m; continue; } if s, ok := skip(l, "rename from "); ok && len(s) > 0 { p.Src = string(s); p.Verb = Rename; continue; } if s, ok := skip(l, "rename to "); ok && len(s) > 0 { p.Verb = Rename; continue; } if s, ok := skip(l, "copy from "); ok && len(s) > 0 { p.Src = string(s); p.Verb = Copy; continue; } if s, ok := skip(l, "copy to "); ok && len(s) > 0 { p.Verb = Copy; continue; } if s, ok := skip(l, "Binary file "); ok && len(s) > 0 { // Hg prints // Binary file foo has changed // when deleting a binary file. continue } if s, ok := skip(l, "RCS file: "); ok && len(s) > 0 { // CVS prints // RCS file: /cvs/plan9/bin/yesterday,v // retrieving revision 1.1 // for each file. continue } if s, ok := skip(l, "retrieving revision "); ok && len(s) > 0 { // CVS prints // RCS file: /cvs/plan9/bin/yesterday,v // retrieving revision 1.1 // for each file. continue } if hasPrefix(l, "===") || hasPrefix(l, "---") || hasPrefix(l, "+++") || hasPrefix(l, "diff ") { continue } if hasPrefix(l, "@@ -") { diff, err := ParseTextDiff(oldraw); if err != nil { return nil, err } p.Diff = diff; break; } if hasPrefix(l, "index ") || hasPrefix(l, "GIT binary patch") { diff, err := ParseGitBinary(oldraw); if err != nil { return nil, err } p.Diff = diff; break; } return nil, SyntaxError("unexpected patch header line: " + string(l)); } if p.Diff == nil { p.Diff = NoDiff } if p.Verb == Edit { p.Src = p.Dst } } return set, nil; } // getLine returns the first n lines of data and the remainder. // If data has no newline, getLine returns data, nil, false func getLine(data []byte, n int) (first []byte, rest []byte, ok bool) { rest = data; ok = true; for ; n > 0; n-- { nl := bytes.Index(rest, newline); if nl < 0 { rest = nil; ok = false; break; } rest = rest[nl+1:]; } first = data[0 : len(data)-len(rest)]; return; } // sections returns a collection of file sections, // each of which begins with a line satisfying prefix. // text before the first instance of such a line is // returned separately. func sections(text []byte, prefix string) ([]byte, [][]byte) { n := 0; for b := text; ; { if hasPrefix(b, prefix) { n++ } nl := bytes.Index(b, newline); if nl < 0 { break } b = b[nl+1:]; } sect := make([][]byte, n+1); n = 0; for b := text; ; { if hasPrefix(b, prefix) { sect[n] = text[0 : len(text)-len(b)]; n++; text = b; } nl := bytes.Index(b, newline); if nl < 0 { sect[n] = text; break; } b = b[nl+1:]; } return sect[0], sect[1:]; } // if s begins with the prefix t, skip returns // s with that prefix removed and ok == true. func skip(s []byte, t string) (ss []byte, ok bool) { if len(s) < len(t) || string(s[0:len(t)]) != t { return nil, false } return s[len(t):], true; } // if s begins with the prefix t and then is a sequence // of digits in the given base, atoi returns the number // represented by the digits and s with the // prefix and the digits removed. func atoi(s []byte, t string, base int) (n int, ss []byte, ok bool) { if s, ok = skip(s, t); !ok { return } var i int; for i = 0; i < len(s) && '0' <= s[i] && s[i] <= byte('0'+base-1); i++ { n = n*base + int(s[i]-'0') } if i == 0 { return } return n, s[i:], true; } // hasPrefix returns true if s begins with t. func hasPrefix(s []byte, t string) bool { _, ok := skip(s, t); return ok; } // splitLines returns the result of splitting s into lines. // The \n on each line is preserved. func splitLines(s []byte) [][]byte { return bytes.SplitAfter(s, newline, 0) }