master
  1package gitdiff
  2
  3import (
  4	"fmt"
  5	"io"
  6	"os"
  7	"strconv"
  8	"strings"
  9	"time"
 10)
 11
 12const (
 13	devNull = "/dev/null"
 14)
 15
 16// ParseNextFileHeader finds and parses the next file header in the stream. If
 17// a header is found, it returns a file and all input before the header. It
 18// returns nil if no headers are found before the end of the input.
 19func (p *parser) ParseNextFileHeader() (*File, string, error) {
 20	var preamble strings.Builder
 21	var file *File
 22	for {
 23		// check for disconnected fragment headers (corrupt patch)
 24		frag, err := p.ParseTextFragmentHeader()
 25		if err != nil {
 26			// not a valid header, nothing to worry about
 27			goto NextLine
 28		}
 29		if frag != nil {
 30			return nil, "", p.Errorf(-1, "patch fragment without file header: %s", frag.Header())
 31		}
 32
 33		// check for a git-generated patch
 34		file, err = p.ParseGitFileHeader()
 35		if err != nil {
 36			return nil, "", err
 37		}
 38		if file != nil {
 39			return file, preamble.String(), nil
 40		}
 41
 42		// check for a "traditional" patch
 43		file, err = p.ParseTraditionalFileHeader()
 44		if err != nil {
 45			return nil, "", err
 46		}
 47		if file != nil {
 48			return file, preamble.String(), nil
 49		}
 50
 51	NextLine:
 52		preamble.WriteString(p.Line(0))
 53		if err := p.Next(); err != nil {
 54			if err == io.EOF {
 55				break
 56			}
 57			return nil, "", err
 58		}
 59	}
 60	return nil, preamble.String(), nil
 61}
 62
 63func (p *parser) ParseGitFileHeader() (*File, error) {
 64	const prefix = "diff --git "
 65
 66	if !strings.HasPrefix(p.Line(0), prefix) {
 67		return nil, nil
 68	}
 69	header := p.Line(0)[len(prefix):]
 70
 71	defaultName, err := parseGitHeaderName(header)
 72	if err != nil {
 73		return nil, p.Errorf(0, "git file header: %v", err)
 74	}
 75
 76	f := &File{}
 77	for {
 78		end, err := parseGitHeaderData(f, p.Line(1), defaultName)
 79		if err != nil {
 80			return nil, p.Errorf(1, "git file header: %v", err)
 81		}
 82
 83		if err := p.Next(); err != nil {
 84			if err == io.EOF {
 85				break
 86			}
 87			return nil, err
 88		}
 89
 90		if end {
 91			break
 92		}
 93	}
 94
 95	if f.OldName == "" && f.NewName == "" {
 96		if defaultName == "" {
 97			return nil, p.Errorf(0, "git file header: missing filename information")
 98		}
 99		f.OldName = defaultName
100		f.NewName = defaultName
101	}
102
103	if (f.NewName == "" && !f.IsDelete) || (f.OldName == "" && !f.IsNew) {
104		return nil, p.Errorf(0, "git file header: missing filename information")
105	}
106
107	return f, nil
108}
109
110func (p *parser) ParseTraditionalFileHeader() (*File, error) {
111	const shortestValidFragHeader = "@@ -1 +1 @@\n"
112	const (
113		oldPrefix = "--- "
114		newPrefix = "+++ "
115	)
116
117	oldLine, newLine := p.Line(0), p.Line(1)
118
119	if !strings.HasPrefix(oldLine, oldPrefix) || !strings.HasPrefix(newLine, newPrefix) {
120		return nil, nil
121	}
122	// heuristic: only a file header if followed by a (probable) fragment header
123	if len(p.Line(2)) < len(shortestValidFragHeader) || !strings.HasPrefix(p.Line(2), "@@ -") {
124		return nil, nil
125	}
126
127	// advance past the first two lines so parser is after the header
128	// no EOF check needed because we know there are >=3 valid lines
129	if err := p.Next(); err != nil {
130		return nil, err
131	}
132	if err := p.Next(); err != nil {
133		return nil, err
134	}
135
136	oldName, _, err := parseName(oldLine[len(oldPrefix):], '\t', 0)
137	if err != nil {
138		return nil, p.Errorf(0, "file header: %v", err)
139	}
140
141	newName, _, err := parseName(newLine[len(newPrefix):], '\t', 0)
142	if err != nil {
143		return nil, p.Errorf(1, "file header: %v", err)
144	}
145
146	f := &File{}
147	switch {
148	case oldName == devNull || hasEpochTimestamp(oldLine):
149		f.IsNew = true
150		f.NewName = newName
151	case newName == devNull || hasEpochTimestamp(newLine):
152		f.IsDelete = true
153		f.OldName = oldName
154	default:
155		// if old name is a prefix of new name, use that instead
156		// this avoids picking variants like "file.bak" or "file~"
157		if strings.HasPrefix(newName, oldName) {
158			f.OldName = oldName
159			f.NewName = oldName
160		} else {
161			f.OldName = newName
162			f.NewName = newName
163		}
164	}
165
166	return f, nil
167}
168
169// parseGitHeaderName extracts a default file name from the Git file header
170// line. This is required for mode-only changes and creation/deletion of empty
171// files. Other types of patch include the file name(s) in the header data.
172// If the names in the header do not match because the patch is a rename,
173// return an empty default name.
174func parseGitHeaderName(header string) (string, error) {
175	header = strings.TrimSuffix(header, "\n")
176	if len(header) == 0 {
177		return "", nil
178	}
179
180	var err error
181	var first, second string
182
183	// there are 4 cases to account for:
184	//
185	//   1) unquoted unquoted
186	//   2) unquoted "quoted"
187	//   3) "quoted" unquoted
188	//   4) "quoted" "quoted"
189	//
190	quote := strings.IndexByte(header, '"')
191	switch {
192	case quote < 0:
193		// case 1
194		first = header
195
196	case quote > 0:
197		// case 2
198		first = header[:quote-1]
199		if !isSpace(header[quote-1]) {
200			return "", fmt.Errorf("missing separator")
201		}
202
203		second, _, err = parseQuotedName(header[quote:])
204		if err != nil {
205			return "", err
206		}
207
208	case quote == 0:
209		// case 3 or case 4
210		var n int
211		first, n, err = parseQuotedName(header)
212		if err != nil {
213			return "", err
214		}
215
216		// git accepts multiple spaces after a quoted name, but not after an
217		// unquoted name, since the name might end with one or more spaces
218		for n < len(header) && isSpace(header[n]) {
219			n++
220		}
221		if n == len(header) {
222			return "", nil
223		}
224
225		if header[n] == '"' {
226			second, _, err = parseQuotedName(header[n:])
227			if err != nil {
228				return "", err
229			}
230		} else {
231			second = header[n:]
232		}
233	}
234
235	first = trimTreePrefix(first, 1)
236	if second != "" {
237		if first == trimTreePrefix(second, 1) {
238			return first, nil
239		}
240		return "", nil
241	}
242
243	// at this point, both names are unquoted (case 1)
244	// since names may contain spaces, we can't use a known separator
245	// instead, look for a split that produces two equal names
246
247	for i := 0; i < len(first)-1; i++ {
248		if !isSpace(first[i]) {
249			continue
250		}
251		second = trimTreePrefix(first[i+1:], 1)
252		if name := first[:i]; name == second {
253			return name, nil
254		}
255	}
256	return "", nil
257}
258
259// parseGitHeaderData parses a single line of metadata from a Git file header.
260// It returns true when header parsing is complete; in that case, line was the
261// first line of non-header content.
262func parseGitHeaderData(f *File, line, defaultName string) (end bool, err error) {
263	if len(line) > 0 && line[len(line)-1] == '\n' {
264		line = line[:len(line)-1]
265	}
266
267	for _, hdr := range []struct {
268		prefix string
269		end    bool
270		parse  func(*File, string, string) error
271	}{
272		{"@@ -", true, nil},
273		{"--- ", false, parseGitHeaderOldName},
274		{"+++ ", false, parseGitHeaderNewName},
275		{"old mode ", false, parseGitHeaderOldMode},
276		{"new mode ", false, parseGitHeaderNewMode},
277		{"deleted file mode ", false, parseGitHeaderDeletedMode},
278		{"new file mode ", false, parseGitHeaderCreatedMode},
279		{"copy from ", false, parseGitHeaderCopyFrom},
280		{"copy to ", false, parseGitHeaderCopyTo},
281		{"rename old ", false, parseGitHeaderRenameFrom},
282		{"rename new ", false, parseGitHeaderRenameTo},
283		{"rename from ", false, parseGitHeaderRenameFrom},
284		{"rename to ", false, parseGitHeaderRenameTo},
285		{"similarity index ", false, parseGitHeaderScore},
286		{"dissimilarity index ", false, parseGitHeaderScore},
287		{"index ", false, parseGitHeaderIndex},
288	} {
289		if strings.HasPrefix(line, hdr.prefix) {
290			if hdr.parse != nil {
291				err = hdr.parse(f, line[len(hdr.prefix):], defaultName)
292			}
293			return hdr.end, err
294		}
295	}
296
297	// unknown line indicates the end of the header
298	// this usually happens if the diff is empty
299	return true, nil
300}
301
302func parseGitHeaderOldName(f *File, line, defaultName string) error {
303	name, _, err := parseName(line, '\t', 1)
304	if err != nil {
305		return err
306	}
307	if f.OldName == "" && !f.IsNew {
308		f.OldName = name
309		return nil
310	}
311	return verifyGitHeaderName(name, f.OldName, f.IsNew, "old")
312}
313
314func parseGitHeaderNewName(f *File, line, defaultName string) error {
315	name, _, err := parseName(line, '\t', 1)
316	if err != nil {
317		return err
318	}
319	if f.NewName == "" && !f.IsDelete {
320		f.NewName = name
321		return nil
322	}
323	return verifyGitHeaderName(name, f.NewName, f.IsDelete, "new")
324}
325
326func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) {
327	f.OldMode, err = parseMode(strings.TrimSpace(line))
328	return
329}
330
331func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) {
332	f.NewMode, err = parseMode(strings.TrimSpace(line))
333	return
334}
335
336func parseGitHeaderDeletedMode(f *File, line, defaultName string) error {
337	f.IsDelete = true
338	f.OldName = defaultName
339	return parseGitHeaderOldMode(f, line, defaultName)
340}
341
342func parseGitHeaderCreatedMode(f *File, line, defaultName string) error {
343	f.IsNew = true
344	f.NewName = defaultName
345	return parseGitHeaderNewMode(f, line, defaultName)
346}
347
348func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) {
349	f.IsCopy = true
350	f.OldName, _, err = parseName(line, 0, 0)
351	return
352}
353
354func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) {
355	f.IsCopy = true
356	f.NewName, _, err = parseName(line, 0, 0)
357	return
358}
359
360func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) {
361	f.IsRename = true
362	f.OldName, _, err = parseName(line, 0, 0)
363	return
364}
365
366func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) {
367	f.IsRename = true
368	f.NewName, _, err = parseName(line, 0, 0)
369	return
370}
371
372func parseGitHeaderScore(f *File, line, defaultName string) error {
373	score, err := strconv.ParseInt(strings.TrimSuffix(line, "%"), 10, 32)
374	if err != nil {
375		nerr := err.(*strconv.NumError)
376		return fmt.Errorf("invalid score line: %v", nerr.Err)
377	}
378	if score <= 100 {
379		f.Score = int(score)
380	}
381	return nil
382}
383
384func parseGitHeaderIndex(f *File, line, defaultName string) error {
385	const sep = ".."
386
387	// note that git stops parsing if the OIDs are too long to be valid
388	// checking this requires knowing if the repository uses SHA1 or SHA256
389	// hashes, which we don't know, so we just skip that check
390
391	parts := strings.SplitN(line, " ", 2)
392	oids := strings.SplitN(parts[0], sep, 2)
393
394	if len(oids) < 2 {
395		return fmt.Errorf("invalid index line: missing %q", sep)
396	}
397	f.OldOIDPrefix, f.NewOIDPrefix = oids[0], oids[1]
398
399	if len(parts) > 1 {
400		return parseGitHeaderOldMode(f, parts[1], defaultName)
401	}
402	return nil
403}
404
405func parseMode(s string) (os.FileMode, error) {
406	mode, err := strconv.ParseInt(s, 8, 32)
407	if err != nil {
408		nerr := err.(*strconv.NumError)
409		return os.FileMode(0), fmt.Errorf("invalid mode line: %v", nerr.Err)
410	}
411	return os.FileMode(mode), nil
412}
413
414// parseName extracts a file name from the start of a string and returns the
415// name and the index of the first character after the name. If the name is
416// unquoted and term is non-zero, parsing stops at the first occurrence of
417// term.
418//
419// If the name is exactly "/dev/null", no further processing occurs. Otherwise,
420// if dropPrefix is greater than zero, that number of prefix components
421// separated by forward slashes are dropped from the name and any duplicate
422// slashes are collapsed.
423func parseName(s string, term byte, dropPrefix int) (name string, n int, err error) {
424	if len(s) > 0 && s[0] == '"' {
425		name, n, err = parseQuotedName(s)
426	} else {
427		name, n, err = parseUnquotedName(s, term)
428	}
429	if err != nil {
430		return "", 0, err
431	}
432	if name == devNull {
433		return name, n, nil
434	}
435	return cleanName(name, dropPrefix), n, nil
436}
437
438func parseQuotedName(s string) (name string, n int, err error) {
439	for n = 1; n < len(s); n++ {
440		if s[n] == '"' && s[n-1] != '\\' {
441			n++
442			break
443		}
444	}
445	if n == 2 {
446		return "", 0, fmt.Errorf("missing name")
447	}
448	if name, err = strconv.Unquote(s[:n]); err != nil {
449		return "", 0, err
450	}
451	return name, n, err
452}
453
454func parseUnquotedName(s string, term byte) (name string, n int, err error) {
455	for n = 0; n < len(s); n++ {
456		if s[n] == '\n' {
457			break
458		}
459		if term > 0 && s[n] == term {
460			break
461		}
462	}
463	if n == 0 {
464		return "", 0, fmt.Errorf("missing name")
465	}
466	return s[:n], n, nil
467}
468
469// verifyGitHeaderName checks a parsed name against state set by previous lines
470func verifyGitHeaderName(parsed, existing string, isNull bool, side string) error {
471	if existing != "" {
472		if isNull {
473			return fmt.Errorf("expected %s, but filename is set to %s", devNull, existing)
474		}
475		if existing != parsed {
476			return fmt.Errorf("inconsistent %s filename", side)
477		}
478	}
479	if isNull && parsed != devNull {
480		return fmt.Errorf("expected %s", devNull)
481	}
482	return nil
483}
484
485// cleanName removes double slashes and drops prefix segments.
486func cleanName(name string, drop int) string {
487	var b strings.Builder
488	for i := 0; i < len(name); i++ {
489		if name[i] == '/' {
490			if i < len(name)-1 && name[i+1] == '/' {
491				continue
492			}
493			if drop > 0 {
494				drop--
495				b.Reset()
496				continue
497			}
498		}
499		b.WriteByte(name[i])
500	}
501	return b.String()
502}
503
504// trimTreePrefix removes up to n leading directory components from name.
505func trimTreePrefix(name string, n int) string {
506	i := 0
507	for ; i < len(name) && n > 0; i++ {
508		if name[i] == '/' {
509			n--
510		}
511	}
512	return name[i:]
513}
514
515// hasEpochTimestamp returns true if the string ends with a POSIX-formatted
516// timestamp for the UNIX epoch after a tab character. According to git, this
517// is used by GNU diff to mark creations and deletions.
518func hasEpochTimestamp(s string) bool {
519	const posixTimeLayout = "2006-01-02 15:04:05.9 -0700"
520
521	start := strings.IndexRune(s, '\t')
522	if start < 0 {
523		return false
524	}
525
526	ts := strings.TrimSuffix(s[start+1:], "\n")
527
528	// a valid timestamp can have optional ':' in zone specifier
529	// remove that if it exists so we have a single format
530	if len(ts) >= 3 && ts[len(ts)-3] == ':' {
531		ts = ts[:len(ts)-3] + ts[len(ts)-2:]
532	}
533
534	t, err := time.Parse(posixTimeLayout, ts)
535	if err != nil {
536		return false
537	}
538	if !t.Equal(time.Unix(0, 0)) {
539		return false
540	}
541	return true
542}
543
544func isSpace(c byte) bool {
545	return c == ' ' || c == '\t' || c == '\n'
546}