package addressparsing import "strings" func IsWS(c byte) bool { return c == '\t' || c == '\n' || c == '\r' || c == '\x20' } func TrimWS(s string) string { return strings.TrimFunc(s, func(r rune) bool { return r <= '~' && IsWS(byte(r)) }) } // // DropExtraneousWhitespace respects quotes but does not respect comments. // // DropExtraneousWhitespace drops any sequence of unquoted whitespace which: // - is leading or trailing // - is between two quoted sections // - has a '<', '>', '.' or '@' either on the left or right side of it // // DropExtraneousWhitespace is idempotent. func DropExtraneousWhitespace(s string) string { isMarked := func(c byte) bool { return c == '<' || c == '>' || c == '.' || c == '@' } isMarkedR := func(z string) bool { if len(z) <= 0 { panic("unreachable") } return isMarked(z[len(z)-1]) } isMarkedL := func(z string) bool { if len(z) <= 0 { panic("unreachable") } return isMarked(z[0]) } atoms := make([]string, 0, 3) dropCentre := func() { if len(atoms) != 3 { panic("unreachable") } atoms[1] = atoms[2] atoms = atoms[:2] } shiftLeft := func() (shiftedOut string) { switch len(atoms) { case 0: case 1: shiftedOut = atoms[0] atoms = atoms[:0] case 2: shiftedOut = atoms[0] atoms[0] = atoms[1] atoms = atoms[:1] case 3: shiftedOut = atoms[0] atoms[0] = atoms[1] atoms[1] = atoms[2] atoms = atoms[:2] default: panic("unreachable") } return } b := new(strings.Builder) FmapSplitByWhitespaceQuotedText(s, func(z string) { if len(atoms) < 3 { atoms = append(atoms, z) } // drop leading whitespace for b.Len() == 0 && len(atoms) > 0 && IsWS(atoms[0][0]) { shiftLeft() } if len(atoms) == 3 { switch { // drop whitespace between two quoted sections case atoms[0][0] == '"' && IsWS(atoms[1][0]) && atoms[2][0] == '"': dropCentre() case isMarkedR(atoms[0]) && IsWS(atoms[1][0]): dropCentre() case IsWS(atoms[1][0]) && isMarkedL(atoms[2]): dropCentre() default: b.WriteString(shiftLeft()) } } }) // the only whitespace which can be dropped at this point is trailing whitespace, whitespace between special characters is no longer possible if len(atoms) > 2 { panic("unreachable") } else if len(atoms) == 2 && IsWS(atoms[0][0]) && IsWS(atoms[1][0]) { panic("unreachable") } // drop trailing whitespace for len(atoms) > 0 && IsWS(atoms[len(atoms)-1][0]) { atoms = atoms[:len(atoms)-1] } for len(atoms) > 0 { b.WriteString(atoms[0]) atoms = atoms[1:] } return b.String() // NOTE(jfrech): 2024-06-13: Old and buggy implementation carcase, before [FmapSplitByWhitespaceQuotedText]. /* buf := new(bytes.Buffer) ws := new(bytes.Buffer) var quoted bool is := func(c byte) bool { return c == '<' || c == '>' || c == '.' || c == '@' } // left == -1 means start of string // right == -1 means end of string // else, these are bytes flush := func(left, right int) { if !(left == -1 || left >= 0 && left < 256) { panic("unreachable") } if !(right == -1 || right >= 0 && right < 256) { panic("unreachable") } defer ws.Reset() switch { case ws.Len() <= 0: // nothing to do // leave quoted whitespace pristine case quoted: fallthrough // by default, leave whitespace pristine default: buf.Write(ws.Bytes()) // ignore leading and trailing whitespace case left == -1 || right == -1: // drop whitespace between quoted sections case byte(left) == '"' && byte(right) == '"': case is(byte(left)) || is(byte(right)): } } e := func(c byte) { if IsWS(c) { panic("unreachable") } left := -1 if buf.Len() > 0 { left = int(buf.Bytes()[buf.Len()-1]) } right := int(c) flush(left, right) buf.WriteByte(c) } for len(s) > 0 { c := s[0] s = s[1:] switch { case c == '"': quoted = !quoted e(c) case c == '\\' && quoted: e(c) if len(s) > 0 { c2 := s[0] s = s[1:] e(c2) } case IsWS(c) && !quoted: ws.WriteByte(c) default: e(c) } } // ignore trailing whitespace func() { left := -1 if buf.Len() > 0 { left = int(buf.Bytes()[buf.Len()-1]) } flush(left, -1) }() return buf.String() */ }