package mailx

import (
    "bytes"
    "fmt"
    "regexp"
    "strings"
    "unicode/utf8"

    "pkg.jfrech.com/brief/internal/crumbs"
    "pkg.jfrech.com/brief/text"
)

// Encode encodes a Field in a way to maximise both decodability of the exact
// (including neighbouring whitespace) Field.Body by most mail agents as well
// as maximise MUA passthrough.
//
// For a valid fieldname (nonempty, consisting of c>' '&&c!=':'&&c<='~') and
// a valid fieldbody (properly UTF-8-encoded), Encode is injective.
//
// Encoded field may be folded. Encoded field includes a trailing CRLF.
//
// The empty fieldname is encoded as "-". Invalid fieldname characters are
// replaced by '-'. Invalid fieldbody bytes are encoded as utf8.RuneError
// (there is no non-character-semantics IANA charset so encoding true data in
// mail field bodies is not possible in any context).
//
// Overlong atoms (> 76 octets) or overlong lines (> 80 octets, including CRLF)
// may get emitted for whitespace-enclosed (counting left and right ends of
// string as implicit whitespace) strings of the form /<[!-~]*@[!-~]*>/ to aid
// in non-q-atom-compliant mail transfer agent's understanding of the message.
//
// Guaranteed to not emit lines longer than 1000 bytes (including CRLF) if
// len(field.Name) <= 972 (equivalently, len(field.Name) + len(": ") +
// + len("=?UTF-8?Q?=FF=FF=FF=FF?=") + len("\r\n") <= 1000).
//
// NOTE: When decoding mixed qatom/atom streams, whitespace is discarded
// iff it is present between two qatoms.
// [2023-11-13, jfrech] NOTE: An empty Q-encoded atom (e.g. "=?UTF-8?Q??=")
//     is invalid by RFC specifications.
// [2023-11-22, jfrech] NOTE: Whitespace is kept between two atoms iff they
//     are not both q-atoms:
//         decode("a b") = "a b",
//         decode("a =?UTF-8?Q?b?=") = "a b",
//         decode("=?UTF-8?Q?a?= b") = "a b",
//         decode("=?UTF-8?Q?a?= =?UTF-8?Q?b?=") = "ab"
func (field Field) Encode() []byte {
    // TODO maybe fusing is not wise: keeping atoms apart may aid in semantics-sensitive contexts
    const fuseQatoms = true

    if field.Body == "" {
        return []byte(field.fixedName() + ":" + CRLF)
    }

    isPlainAscii := func(s string) bool {
        if strings.Contains(s, "=?") || strings.Contains(s, "?=") {
            return false
        } else if len(s) == 0 || (len(s) > 0 && (text.IsHTSP(s[0]) || text.IsHTSP(s[len(s)-1]))) {
            return false
        }

        for _, c := range []byte(s) {
            if c == '\t' || c == ' ' {
                continue
            }
            if c < ' ' || c > '~' {
                return false
            }
        }
        return true
    }

    // looks like an e-mail address, if possible do not q-encode just to not
    // hit 80 char max
    encodingDiscouraged := func(text string) bool {
        return isPlainAscii(text) && regexp.MustCompile(`<.*@.*>`).MatchString(text)
    }

    hasWhitespacePrefix := func(s string) bool {
        return len(s) > 0 && text.IsHTSP(s[0])
    }
    hasWhitespaceSuffix := func(s string) bool {
        return len(s) > 0 && text.IsHTSP(s[len(s)-1])
    }

    forceValidUTF8 := func(text string) string {
        b := new(strings.Builder)
        for _, r := range text {
            // [2023-11-13, jfrech] NOTE: We can here even allow r=='\r' or
            // r=='\n' as they will correctly be q-encoded.
            // Thus, even r=='\x00' is permitted here.
            b.WriteRune(r)
        }
        return b.String()
    }

    // qesc encodes a single rune as qatom innards
    qesc := func(r rune) []byte {
        switch {
        // [2023-11-12, jfrech] Encoding ' ' as '_' is no longer
        // deemed too obscure.
        case r == ' ':
            return []byte{'_'}

        // only bytteratim-encode i) ASCII characters which ii) have no
        // qatom-special meaning and iii) do not confuse primitive e-mail
        // search queries
        case (r > ' ' && r <= '~') && (r != '=' && r != '?' && r != '_') && (r != '<' && r != '@' && r != '>'):
            return []byte{byte(r)}

        default:
            buf := new(bytes.Buffer) // TODO make global?
            utf8buf := make([]byte, 4) // TODO make global?
            for _, c := range utf8buf[:utf8.EncodeRune(utf8buf, r)] {
                fmt.Fprintf(buf, "=%02X", int(c))
            }
            return buf.Bytes()
        }
    }

    type Part struct {
        Text string
        NeedsEncoding bool
    }

    var parts []Part
    for fb := forceValidUTF8(field.Body); len(fb) > 0; {
        switch {
        case text.IsHTSP(fb[0]):
            k := strings.IndexFunc(fb, func(r rune) bool {
                return !strings.ContainsRune(text.HTSP, r)
            })
            if k == -1 {
                k = len(fb)
            }

            part := fb[:k]
            fb = fb[k:]

            // At least three consecutive whitespaces allow for a qatom
            // surrounded by whitespace:
            //     "x   y" becomes "x =?UTF-8?Q?_?= y"
            if len(part) >= 3 {
                parts = append(parts, Part{Text: string(part[0])})
                parts = append(parts, Part{Text: part[1:len(part)-1], NeedsEncoding: true})
                parts = append(parts, Part{Text: string(part[len(part)-1])})
            } else {
                parts = append(parts, Part{Text: part})
            }
        default:
            k := strings.IndexAny(fb, text.HTSP)
            if k == -1 {
                k = len(fb)
            }
            part := fb[:k]
            fb = fb[k:]

            parts = append(parts, Part{
                Text: part,
                NeedsEncoding: !isPlainAscii(part),
            })
        }
    }

    // leading whitespace
    if len(parts) > 0 && hasWhitespacePrefix(parts[0].Text) {
        parts[0].NeedsEncoding = true
        if len(parts) > 1 && len(parts[0].Text) > 1 {
            c := parts[0].Text[len(parts[0].Text)-1]
            parts[0].Text = parts[0].Text[:len(parts[0].Text)-1]
            parts[1].Text = string(c) + parts[1].Text
        }
    }
    // trailing whitespace
    if len(parts) > 0 && hasWhitespaceSuffix(parts[len(parts)-1].Text) {
        parts[len(parts)-1].NeedsEncoding = true
        if len(parts) > 1 && len(parts[len(parts)-1].Text) > 1 {
            c := parts[len(parts)-1].Text[0]
            parts[len(parts)-1].Text = parts[len(parts)-1].Text[1:]
            parts[len(parts)-2].Text = parts[len(parts)-2].Text + string(c)
        }
    }

    for j := range parts {
        if parts[j].Text == "" {
            panic("unreachable: empty part")
        }
    }

    // avoid Part{"  ",false} (exactly two consecutive whitespace)
    for j := 1; j < len(parts)-1; j++ {
        if !parts[j].NeedsEncoding && len(parts[j].Text) == 2 && crumbs.All(text.IsHTSP, []byte(parts[j].Text)) {
            switch {
            case parts[j-1].NeedsEncoding:
                parts[j-1].Text = parts[j-1].Text + string(parts[j].Text[0])
                parts[j].Text = parts[j].Text[1:]
            case parts[j+1].NeedsEncoding:
                parts[j+1].Text = string(parts[j].Text[1]) + parts[j+1].Text
                parts[j].Text = parts[j].Text[:len(parts[j].Text)-1]

            // "<wakka@wuk.de>  <woergl.wukk.com>" should never occur, but the
            // behaviour is modeled after "Wukk <wukk@w.wo>", where the right
            // part is kept pristine.
            default:
                fallthrough
            case encodingDiscouraged(parts[j+1].Text):
                parts[j-1].Text = parts[j-1].Text + string(parts[j].Text[0])
                parts[j-1].NeedsEncoding = true
                parts[j].Text = parts[j].Text[1:]

            case encodingDiscouraged(parts[j-1].Text):
                parts[j+1].Text = string(parts[j].Text[1]) + parts[j+1].Text
                parts[j+1].NeedsEncoding = true
                parts[j].Text = parts[j].Text[:len(parts[j].Text)-1]
            }
        }
    }

    // force encoding
    for j := 1; j < len(parts); j++ {
        for i := 0; i < 2; i++ { // TODO ugly
            if parts[j-1].NeedsEncoding && !hasWhitespacePrefix(parts[j].Text) {
                parts[j].NeedsEncoding = true
            }

            if !hasWhitespaceSuffix(parts[j-1].Text) && parts[j].NeedsEncoding {
                parts[j-1].NeedsEncoding = true
            }
        }
    }

    // limits
    if true {
        if len(parts) > 0 && len(field.fixedName()) + len(": ") + len(parts[0].Text) + len(CRLF) > MaxLineOverlength {
            parts[0].NeedsEncoding = true
        }
        for j := range parts {
            if len(parts[j].Text) > MaxAtomLength && !encodingDiscouraged(parts[j].Text) {
                parts[j].NeedsEncoding = true
            }
            if len(" ") + len(parts[j].Text) + len(CRLF) > MaxLineOverlength {
                parts[j].NeedsEncoding = true
            }
        }
    }

    // save squeezed-in full whitespace
    // (e.g. "...?= =?..." loses its whitespace whilst e.g. "...?= f =?..."
    // keeps both left and right)
    for j := 1; j < len(parts)-1; j++ {
        if parts[j-1].NeedsEncoding && crumbs.All(text.IsHTSP, []byte(parts[j].Text)) && parts[j+1].NeedsEncoding {
            parts[j].NeedsEncoding = true
        }
    }

    if fuseQatoms {
        // fuse .NeedsEncoding
        for j := 1; j < len(parts); j++ {
            if parts[j-1].NeedsEncoding && parts[j].NeedsEncoding {
                parts[j-1].Text += parts[j].Text
                parts[j] = Part{}
                parts = append(parts[:j], parts[j+1:]...)
                j--
            }
        }
    }

    for j := range parts {
        if parts[j].Text == "" {
            panic("unreachable: empty part")
        }
    }

    buf := new(bytes.Buffer)

    line := new(bytes.Buffer)
    line.Grow(MaxLineLength+1)
    line.WriteString(field.fixedName())
    line.WriteString(": ")

    fold := func() {
        if line.Len() < 1 || !text.IsHTSP(line.Bytes()[line.Len()-1]) {
            panic("unreachable")
        }

        if buf.Len() == 0 && line.Len() == len(field.fixedName()) + len(": ") {
            return
        }

        c := line.Bytes()[line.Len()-1]
        line.Truncate(line.Len()-1)
        if line.Len() > 0 {
            line.WriteString(CRLF)
            buf.Write(line.Bytes())
        }
        line.Reset()
        line.WriteByte(c)
    }

    for _, part := range parts {
        switch {
        default:
            atominnards := new(bytes.Buffer)
            for txt := part.Text; len(txt) > 0; {
                if bytes.HasSuffix(line.Bytes(), []byte("?=")) {
                    line.WriteString(" ")
                }
                if line.Len() < 1 || !text.IsHTSP(line.Bytes()[line.Len()-1]) {
                    panic("unreachable")
                }

                r0, _ := utf8.DecodeRuneInString(txt)
                if line.Len() + (len("=?UTF-8?Q?") + len(qesc(r0)) + len("?=")) + len(CRLF) > MaxLineLength {
                    fold()
                }

                atominnards.Reset()
                for len(txt) > 0 {
                    r, n := utf8.DecodeRuneInString(txt)
                    if atominnards.Len() > 0 && line.Len() + (len("=?UTF-8?Q?") + atominnards.Len() + len(qesc(r)) + len("?=")) + len(CRLF) > MaxLineLength {
                        break
                    }
                    txt = txt[n:]
                    // NOTE: Even if always bytes.Equal(qesc(r),[]byte(byte(r))) holds, qencoding is still necessary to not invent whitespace around an atom.
                    atominnards.Write(qesc(r))
                }

                line.WriteString("=?UTF-8?Q?")
                line.Write(atominnards.Bytes())
                atominnards.Reset()
                line.WriteString("?=")
                // only the very first line may be overlong
                if buf.Len() > 0 && line.Len() + len(CRLF) > MaxLineLength {
                    panic("unreachable")
                }
            }

        case !part.NeedsEncoding && (len(part.Text) <= MaxLineLength-len(" ")-len(CRLF) || (encodingDiscouraged(part.Text) && len(part.Text) <= MaxLineOverlength-len(" ")-len(CRLF))):
            if line.Len() + len(part.Text) + len(CRLF) <= MaxLineLength {
                line.WriteString(part.Text)
                break
            }

            txt := part.Text
            if len(txt) <= 0 || line.Len() <= 0 {
                panic("unreachable")
            }
            if text.IsHTSP(txt[0]) {
                if text.IsHTSP(line.Bytes()[line.Len()-1]) {
                    panic("unreachable")
                }
                line.WriteByte(txt[0])
                txt = txt[1:]
            }

            fold()
            line.WriteString(txt)
        }
    }
    if line.Len() > 0 {
        line.WriteString(CRLF)
        buf.Write(line.Bytes())
        line.Reset()
    }

    return buf.Bytes()
}