patch_identity.go

  1package gitdiff
  2
  3import (
  4	"fmt"
  5	"strings"
  6)
  7
  8// PatchIdentity identifies a person who authored or committed a patch.
  9type PatchIdentity struct {
 10	Name  string
 11	Email string
 12}
 13
 14func (i PatchIdentity) String() string {
 15	name := i.Name
 16	if name == "" {
 17		name = `""`
 18	}
 19	return fmt.Sprintf("%s <%s>", name, i.Email)
 20}
 21
 22// ParsePatchIdentity parses a patch identity string. A patch identity contains
 23// an email address and an optional name in [RFC 5322] format. This is either a
 24// plain email adddress or a name followed by an address in angle brackets:
 25//
 26//	author@example.com
 27//	Author Name <author@example.com>
 28//
 29// If the input is not one of these formats, ParsePatchIdentity applies a
 30// heuristic to separate the name and email portions. If both the name and
 31// email are missing or empty, ParsePatchIdentity returns an error. It
 32// otherwise does not validate the result.
 33//
 34// [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322
 35func ParsePatchIdentity(s string) (PatchIdentity, error) {
 36	s = normalizeSpace(s)
 37	s = unquotePairs(s)
 38
 39	var name, email string
 40	if at := strings.IndexByte(s, '@'); at >= 0 {
 41		start, end := at, at
 42		for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' {
 43			start--
 44		}
 45		for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' {
 46			end++
 47		}
 48		email = s[start+1 : end]
 49
 50		// Adjust the boundaries so that we drop angle brackets, but keep
 51		// spaces when removing the email to form the name.
 52		if start < 0 || s[start] != '<' {
 53			start++
 54		}
 55		if end >= len(s) || s[end] != '>' {
 56			end--
 57		}
 58		name = s[:start] + s[end+1:]
 59	} else {
 60		start, end := 0, 0
 61		for i := 0; i < len(s); i++ {
 62			if s[i] == '<' && start == 0 {
 63				start = i + 1
 64			}
 65			if s[i] == '>' && start > 0 {
 66				end = i
 67				break
 68			}
 69		}
 70		if start > 0 && end >= start {
 71			email = strings.TrimSpace(s[start:end])
 72			name = s[:start-1]
 73		}
 74	}
 75
 76	// After extracting the email, the name might contain extra whitespace
 77	// again and may be surrounded by comment characters. The git source gives
 78	// these examples of when this can happen:
 79	//
 80	//   "Name <email@domain>"
 81	//   "email@domain (Name)"
 82	//   "Name <email@domain> (Comment)"
 83	//
 84	name = normalizeSpace(name)
 85	if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") {
 86		name = name[1 : len(name)-1]
 87	}
 88	name = strings.TrimSpace(name)
 89
 90	// If the name is empty or contains email-like characters, use the email
 91	// instead (assuming one exists)
 92	if name == "" || strings.ContainsAny(name, "@<>") {
 93		name = email
 94	}
 95
 96	if name == "" && email == "" {
 97		return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s)
 98	}
 99	return PatchIdentity{Name: name, Email: email}, nil
100}
101
102// unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to
103// remove any "quoted-pairs" (backslash-espaced characters). It also removes
104// the quotes from any quoted strings, but leaves the comment delimiters.
105func unquotePairs(s string) string {
106	quote := false
107	comments := 0
108	escaped := false
109
110	var out strings.Builder
111	for i := 0; i < len(s); i++ {
112		if escaped {
113			escaped = false
114		} else {
115			switch s[i] {
116			case '\\':
117				// quoted-pair is only allowed in quoted-string/comment
118				if quote || comments > 0 {
119					escaped = true
120					continue // drop '\' character
121				}
122
123			case '"':
124				if comments == 0 {
125					quote = !quote
126					continue // drop '"' character
127				}
128
129			case '(':
130				if !quote {
131					comments++
132				}
133			case ')':
134				if comments > 0 {
135					comments--
136				}
137			}
138		}
139		out.WriteByte(s[i])
140	}
141	return out.String()
142}
143
144// normalizeSpace trims leading and trailing whitespace from s and converts
145// inner sequences of one or more whitespace characters to single spaces.
146func normalizeSpace(s string) string {
147	var sb strings.Builder
148	for i := 0; i < len(s); i++ {
149		c := s[i]
150		if !isRFC5332Space(c) {
151			if sb.Len() > 0 && isRFC5332Space(s[i-1]) {
152				sb.WriteByte(' ')
153			}
154			sb.WriteByte(c)
155		}
156	}
157	return sb.String()
158}
159
160func isRFC5332Space(c byte) bool {
161	switch c {
162	case '\t', '\n', '\r', ' ':
163		return true
164	}
165	return false
166}