diff options
Diffstat (limited to 'pkg/names/gender.go')
| -rw-r--r-- | pkg/names/gender.go | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/pkg/names/gender.go b/pkg/names/gender.go new file mode 100644 index 0000000..bdd16e1 --- /dev/null +++ b/pkg/names/gender.go @@ -0,0 +1,47 @@ +package names + +import ( + "airlines/pkg/model" + "strings" + "unicode" +) + +func normalizeTitle(x string) string { + x = strings.ToLower(x) + // strip common punctuation + x = strings.ReplaceAll(x, ".", "") + x = strings.ReplaceAll(x, "'", "") + x = strings.ReplaceAll(x, "’", "") + x = strings.ReplaceAll(x, "-", "") + return x +} + +func GenderFromTitle(s string) model.Sex { + if s == "" { + return model.SexUnknown + } + // only first token (before space/comma/slash/etc.) + cut := func(r rune) bool { return unicode.IsSpace(r) || r == ',' || r == '/' || r == '&' } + first := strings.FieldsFunc(s, cut) + if len(first) == 0 { + return model.SexUnknown + } + t := normalizeTitle(first[0]) + + // male honorifics + switch t { + case "mr", "sir", "lord", "monsieur", "m", "don", "senor", "sr": // "sr" may collide with "senior"; context needed + return model.SexMale + } + + // female honorifics + switch t { + case "mrs", "miss", "ms", "madam", "madame", "mademoiselle", "mlle", + "lady", "dame", "senora", "sra", "señora", "srta", "srita", "dona": + return model.SexFemale + } + + // neutral/ambiguous titles (return Unknown) + // e.g., "mx", "dr", "prof", "rev", "coach", "officer", etc. + return model.SexUnknown +} |
