1 files changed, 194 insertions, 0 deletions
diff --git a/pkg/names/fio.go b/pkg/names/fio.go
new file mode 100644
index 0000000..4ecca7e
--- /dev/null
+++ b/pkg/names/fio.go
@@ -0,0 +1,194 @@
+package names
+
+import (
+	"errors"
+	"regexp"
+	"strings"
+	"unicode"
+)
+
+type Parts struct {
+	First      string
+	Last       string
+	Patronymic string // may be "" or an initial like "F"
+}
+
+// ParseLatinName parses 2–3 tokens containing First/Last and optional patronymic (1–2 letters).
+// Tokens may be in any order, e.g. "PETROVSKAYA KARINA" or "RUSLAN F EVSEEV".
+func ParseLatinName(s string) (Parts, error) {
+	toks := tokenizeLatin(s) // keeps letters, apostrophes, hyphens, optional trailing dot
+	if len(toks) < 2 || len(toks) > 3 {
+		return Parts{}, errors.New("expecting 2 or 3 name parts")
+	}
+
+	type part struct {
+		raw string
+		lo  string
+	}
+	ps := make([]part, 0, len(toks))
+	for _, t := range toks {
+		lo := strings.ToLower(strings.TrimSuffix(t, "."))
+		ps = append(ps, part{raw: t, lo: lo})
+	}
+
+	// 1) Patronymic: 1–2 letters (optionally with a trailing dot), or RU-style patronymic suffix
+	pIdx := -1
+	for i, p := range ps {
+		if isInitial(p.raw) || isPatronymicLatin(p.lo) {
+			pIdx = i
+			break
+		}
+	}
+
+	// 2) Surname: look for common last-name suffixes among remaining tokens
+	lIdx := -1
+	for i, p := range ps {
+		if i == pIdx {
+			continue
+		}
+		if looksLikeSurnameLatin(p.lo) {
+			lIdx = i
+			break
+		}
+	}
+
+	// 3) Assign the rest to first name; tie-break if needed
+	rem := make([]int, 0, 2)
+	for i := range ps {
+		if i != pIdx && i != lIdx {
+			rem = append(rem, i)
+		}
+	}
+
+	// If surname not obvious and we have 2 leftovers, pick the longer one as surname
+	if lIdx == -1 && len(rem) == 2 {
+		if runeLen(ps[rem[0]].raw) >= runeLen(ps[rem[1]].raw) {
+			lIdx = rem[0]
+			rem = rem[1:]
+		} else {
+			lIdx = rem[1]
+			rem = rem[:1]
+		}
+	}
+
+	out := Parts{}
+	if pIdx != -1 {
+		out.Patronymic = strings.TrimSuffix(ps[pIdx].raw, ".")
+	}
+	if lIdx != -1 {
+		out.Last = ps[lIdx].raw
+	}
+
+	// Remaining becomes first name; if still empty (2 tokens), pick the non-surname/non-patronymic as first
+	if len(rem) == 1 {
+		out.First = ps[rem[0]].raw
+	} else if len(ps) == 2 {
+		for i := range ps {
+			if i != pIdx && i != lIdx {
+				out.First = ps[i].raw
+			}
+		}
+	}
+
+	// Normalize to Title Case (capitalize first letter, lowercase rest)
+	out.First = capWord(out.First)
+	out.Last = capWord(out.Last)
+	out.Patronymic = strings.ToUpper(out.Patronymic) // keep initials uppercase
+
+	// Sanity
+	if out.First == "" || out.Last == "" {
+		return out, errors.New("unable to classify parts")
+	}
+	return out, nil
+}
+
+func tokenizeLatin(s string) []string {
+	// keep letters, apostrophes, hyphens; allow an optional trailing dot for initials
+	re := regexp.MustCompile(`(?i)[a-z]+(?:['-][a-z]+)*\.?`)
+	return re.FindAllString(s, -1)
+}
+
+func isInitial(x string) bool {
+	x = strings.TrimSuffix(x, ".")
+	r := []rune(x)
+	return len(r) >= 1 && len(r) <= 2 && allASCIIAlpha(r)
+}
+
+func isPatronymicLatin(lo string) bool {
+	// Latin transliterations of RU patronymics (very rough)
+	sufs := []string{"ovich", "evich", "ich", "ovna", "evna", "ichna", "ogly", "kyzy"}
+	for _, s := range sufs {
+		if strings.HasSuffix(lo, s) && len(lo) >= len(s)+2 {
+			return true
+		}
+	}
+	return false
+}
+
+func looksLikeSurnameLatin(lo string) bool {
+	// Common Slavic surname endings (male & female forms)
+	sufs := []string{
+		"ov", "ev", "in", "ina", "ova", "eva",
+		"sky", "skiy", "skii", "skaya", "ska",
+		"enko", "ienko",
+		"uk", "yk", "chuk", "czuk",
+		"yan", "ian",
+		"dze", "dze", "shvili",
+	}
+	for _, s := range sufs {
+		if strings.HasSuffix(lo, s) {
+			return true
+		}
+	}
+	// If token contains an apostrophe mid-word (e.g., emel'yanova), still may be a surname
+	if strings.Contains(lo, "'") {
+		// feminine -'yanova/-'eva etc.
+		if strings.HasSuffix(lo, "yanova") || strings.HasSuffix(lo, "yanov") || strings.HasSuffix(lo, "eva") || strings.HasSuffix(lo, "ova") {
+			return true
+		}
+	}
+	return false
+}
+
+func capWord(s string) string {
+	if s == "" {
+		return s
+	}
+	// keep internal hyphens/apostrophes, title-case each segment
+	sep := func(r rune) bool { return r == '-' || r == '\'' }
+	parts := strings.FieldsFunc(strings.ToLower(s), sep)
+	i := 0
+	builder := strings.Builder{}
+	for _, r := range s {
+		if r == '-' || r == '\'' {
+			builder.WriteRune(r)
+			continue
+		}
+		// find which sub-part this rune belongs to by counting letters consumed
+		if len(parts) == 0 {
+			builder.WriteRune(unicode.ToUpper(r))
+			continue
+		}
+		if i == 0 {
+			builder.WriteRune(unicode.ToUpper(r))
+		} else {
+			builder.WriteRune(unicode.ToLower(r))
+		}
+		i++
+		// crude reset at separators handled above
+	}
+	// Simpler/robust alternative:
+	// return strings.Title(strings.ToLower(s)) // deprecated but OK for ASCII; avoided here.
+	return strings.ToUpper(string([]rune(s)[0])) + strings.ToLower(s[1:])
+}
+
+func allASCIIAlpha(r []rune) bool {
+	for _, ch := range r {
+		if ch < 'A' || (ch > 'Z' && ch < 'a') || ch > 'z' {
+			return false
+		}
+	}
+	return true
+}
+
+func runeLen(s string) int { return len([]rune(s)) }