diff options
Diffstat (limited to 'pkg')
| -rw-r--r-- | pkg/adapters/json/json.go | 4 | ||||
| -rw-r--r-- | pkg/adapters/json/model.go | 8 | ||||
| -rw-r--r-- | pkg/adapters/xlsx/model.go | 138 | ||||
| -rw-r--r-- | pkg/adapters/xlsx/registry.go | 69 | ||||
| -rw-r--r-- | pkg/adapters/xlsx/xlsx.go | 90 | ||||
| -rw-r--r-- | pkg/names/fio.go | 194 | ||||
| -rw-r--r-- | pkg/names/gender.go | 47 | ||||
| -rw-r--r-- | pkg/store/db.go | 1 |
8 files changed, 545 insertions, 6 deletions
diff --git a/pkg/adapters/json/json.go b/pkg/adapters/json/json.go index c0ea4e4..47a563e 100644 --- a/pkg/adapters/json/json.go +++ b/pkg/adapters/json/json.go @@ -100,7 +100,7 @@ type JsonCard struct { func (r *JsonRoot) DumpToDb(ctx context.Context, s *store.Store) { var err error for _, user := range r.ForumProfiles { - dbUser := user.ToUser() + dbUser, _ := user.ToUser() dbUser, err = s.CreateOrGetUser(ctx, dbUser) if err != nil { panic(err) @@ -120,7 +120,7 @@ func (r *JsonRoot) DumpToDb(ctx context.Context, s *store.Store) { } for _, flight := range user.RegisteredFlights { - dbFlight := flight.ToFlight() + dbFlight, _ := flight.ToFlight() _, err = s.AddFlightToUser(ctx, dbUser.ID, dbFlight) if err != nil { fmt.Println(err) diff --git a/pkg/adapters/json/model.go b/pkg/adapters/json/model.go index 2a91f21..2cc5d8e 100644 --- a/pkg/adapters/json/model.go +++ b/pkg/adapters/json/model.go @@ -32,7 +32,7 @@ func onlyDigits(s string) string { return string(out) } -func (jp JsonProfile) ToUser() *model.User { +func (jp JsonProfile) ToUser() (*model.User, error) { return &model.User{ Name: sOrEmpty(jp.RealName.FirstName), Surname: sOrEmpty(jp.RealName.LastName), @@ -40,10 +40,10 @@ func (jp JsonProfile) ToUser() *model.User { Fathersname: "", Sex: jp.Sex, Birthday: model.SentinelBirthday(), - } + }, nil } -func (jf JsonFlight) ToFlight() *model.Flight { +func (jf JsonFlight) ToFlight() (*model.Flight, error) { return &model.Flight{ Number: jf.Flight, From: jf.Departure.Airport, @@ -53,7 +53,7 @@ func (jf JsonFlight) ToFlight() *model.Flight { ToCity: jf.Arrival.City, ToCountry: jf.Arrival.Country, Date: jf.Date.ToDateUTC(), - } + }, nil } func (jc JsonCard) ToCard() (*model.Card, error) { diff --git a/pkg/adapters/xlsx/model.go b/pkg/adapters/xlsx/model.go new file mode 100644 index 0000000..d8c5194 --- /dev/null +++ b/pkg/adapters/xlsx/model.go @@ -0,0 +1,138 @@ +package xlsx + +import ( + "errors" + "regexp" + "strconv" + "strings" + "time" + + "github.com/leonm1/airports-go" +) + +type Ticket struct { + Sheet string + Passenger string + Title string + FlightNumber string + FromCity string + ToCity string + FromAirport string + ToAirport string + FlightDate string // (raw, expected YYYY-MM-DD; Excel text may start with ') + FlightTime string // (raw, expected HH-MM or HH:MM; Excel text may start with ') + PNR string + Card string + TicketNumber string // (may have a leading ' in Excel) +} + +func (t Ticket) DateTime() (time.Time, *time.Location, error) { + loc := t.inferLocationFromAirports() + date := strings.TrimLeft(strings.TrimSpace(t.FlightDate), "'") + hm := strings.TrimLeft(strings.TrimSpace(t.FlightTime), "'") + hm = strings.ReplaceAll(hm, "-", ":") + + if date == "" || hm == "" { + return time.Time{}, loc, errors.New("missing FlightDate or FlightTime") + } + ts, err := time.ParseInLocation("2006-01-02 15:04", date+" "+hm, loc) + return ts, loc, err +} + +func (t Ticket) inferLocationFromAirports() *time.Location { + if loc := iataToLocation(t.FromAirport); loc != nil { + return loc + } + if loc := iataToLocation(t.ToAirport); loc != nil { + return loc + } + return time.Local +} + +func iataToLocation(code string) *time.Location { + iata := strings.ToUpper(strings.TrimSpace(code)) + if len(iata) != 3 { + return nil + } + ap, err := airports.LookupIATA(iata) + if err != nil { + return nil + } + // Prefer IANA tz name + if tz := strings.TrimSpace(ap.Tz); tz != "" && tz != `\N` { + if loc, err := time.LoadLocation(tz); err == nil { + return loc + } + } + // Fallback: fixed offset (no DST) + if ap.Timezone != 0 { + sec := int(ap.Timezone * 3600.0) + return time.FixedZone("UTC"+offsetLabel(sec), sec) + } + return nil +} + +func offsetLabel(sec int) string { + sign := "+" + if sec < 0 { + sign = "-" + sec = -sec + } + h := sec / 3600 + m := (sec % 3600) / 60 + return sign + two(h) + ":" + two(m) +} +func two(x int) string { + if x < 10 { + return "0" + strconv.Itoa(x) + } + return strconv.Itoa(x) +} + +func parseCardLine(s string) (prefix string, number uint64, bonus string) { + raw := strings.TrimSpace(s) + if raw == "" { + return "", 0, "" + } + // number = last run of digits + if m := regexp.MustCompile(`(\d{3,})\D*$`).FindStringSubmatch(raw); len(m) == 2 { + if n, err := strconv.ParseUint(m[1], 10, 64); err == nil { + number = n + } + } + + // tokens (letters with '-', '/', apostrophes) + tokRe := regexp.MustCompile(`[A-Za-z][A-Za-z'/-]*`) + toks := tokRe.FindAllString(s, -1) + + // prefix = first 2–3 letter all-caps-ish token + for _, t := range toks { + u := strings.ToUpper(t) + if len(u) >= 2 && len(u) <= 3 && regexp.MustCompile(`^[A-Z]{2,3}$`).MatchString(u) { + prefix = u + break + } + } + // bonus = all tokens except prefix + words := []string{} + for _, t := range toks { + if strings.ToUpper(t) == prefix { + continue + } + words = append(words, t) + } + if len(words) > 0 { + bonus = strings.Join(words, " ") + } + if bonus == "" && prefix != "" { + bonus = prefix + } + return +} + +func firstNonEmpty(a, b string) string { + if strings.TrimSpace(a) != "" { + return a + } + return b +} diff --git a/pkg/adapters/xlsx/registry.go b/pkg/adapters/xlsx/registry.go new file mode 100644 index 0000000..46c395e --- /dev/null +++ b/pkg/adapters/xlsx/registry.go @@ -0,0 +1,69 @@ +package xlsx + +import ( + "fmt" + "strings" + + "airlines/pkg/model" + "airlines/pkg/names" + + "github.com/leonm1/airports-go" +) + +func (t Ticket) ToUser() (model.User, error) { + fio, err := names.ParseLatinName(t.Passenger) + if err != nil { + return model.User{}, fmt.Errorf("%v %s", t.Sheet, err.Error()) + } + sex := names.GenderFromTitle(t.Title) + + u := model.User{ + Nick: "", + Name: fio.First, + Surname: fio.Last, + Fathersname: fio.Patronymic, + Sex: sex, + } + return u, nil +} + +func (t Ticket) ToCard() (model.Card, error) { + prefix, number, bonus := parseCardLine(t.Card) + if number == 0 && prefix == "" && bonus == "" { + return model.Card{}, nil + } + return model.Card{ + Prefix: prefix, + Number: number, + Bonusprogramm: "", + }, nil +} + +func (t Ticket) ToFlight() (model.Flight, error) { + // Resolve IATA records + fromIATA := strings.ToUpper(strings.TrimSpace(t.FromAirport)) + toIATA := strings.ToUpper(strings.TrimSpace(t.ToAirport)) + + fromRec, _ := airports.LookupIATA(fromIATA) + toRec, _ := airports.LookupIATA(toIATA) + + fromCity := firstNonEmpty(strings.TrimSpace(t.FromCity), fromRec.City) + toCity := firstNonEmpty(strings.TrimSpace(t.ToCity), toRec.City) + + fromCountry := fromRec.Country + toCountry := toRec.Country + departUTC, _, err := t.DateTime() + if err != nil { + return model.Flight{}, err + } + return model.Flight{ + Number: strings.TrimSpace(t.FlightNumber), + From: fromIATA, + FromCity: fromCity, + FromCountry: fromCountry, + To: toIATA, + ToCity: toCity, + ToCountry: toCountry, + Date: departUTC, + }, nil +} diff --git a/pkg/adapters/xlsx/xlsx.go b/pkg/adapters/xlsx/xlsx.go new file mode 100644 index 0000000..6ef9baa --- /dev/null +++ b/pkg/adapters/xlsx/xlsx.go @@ -0,0 +1,90 @@ +package xlsx + +import ( + "fmt" + "strings" + + "github.com/xuri/excelize/v2" +) + +func UnmarshallXlsxFile(fname string) ([]Ticket, error) { + var err error + f, err := excelize.OpenFile(fname) + if err != nil { + return nil, err + } + defer func() { + if err = f.Close(); err != nil { + fmt.Println(err) + } + }() + + get := func(sheet, cell string) (string, error) { + v, err := f.GetCellValue(sheet, cell) + if err != nil { + return "", fmt.Errorf("%s %s: %w", sheet, cell, err) + } + v = strings.Trim(v, " `'\"") + return v, nil + } + + sheetMap := f.GetSheetMap() + tickets := make([]Ticket, 0, len(sheetMap)) + + for _, sheet := range sheetMap { + t := Ticket{} + t.Sheet = sheet + + t.Passenger, err = get(sheet, "B3") + if err != nil { + return nil, err + } + t.Title, err = get(sheet, "A3") + if err != nil { + return nil, err + } + t.FlightNumber, err = get(sheet, "A5") + if err != nil { + return nil, err + } + t.FromCity, err = get(sheet, "D5") + if err != nil { + return nil, err + } + t.ToCity, err = get(sheet, "H5") + if err != nil { + return nil, err + } + t.FromAirport, err = get(sheet, "D7") + if err != nil { + return nil, err + } + t.ToAirport, err = get(sheet, "H7") + if err != nil { + return nil, err + } + t.FlightDate, err = get(sheet, "A9") + if err != nil { + return nil, err + } + t.FlightTime, err = get(sheet, "C9") + if err != nil { + return nil, err + } + t.PNR, err = get(sheet, "B13") + if err != nil { + return nil, err + } + t.Card, err = get(sheet, "F3") + if err != nil { + return nil, err + } + t.TicketNumber, err = get(sheet, "E13") + if err != nil { + return nil, err + } + + tickets = append(tickets, t) + } + return tickets, nil +} diff --git a/pkg/names/fio.go b/pkg/names/fio.go new file mode 100644 index 0000000..4ecca7e --- /dev/null +++ b/pkg/names/fio.go @@ -0,0 +1,194 @@ +package names + +import ( + "errors" + "regexp" + "strings" + "unicode" +) + +type Parts struct { + First string + Last string + Patronymic string // may be "" or an initial like "F" +} + +// ParseLatinName parses 2–3 tokens containing First/Last and optional patronymic (1–2 letters). +// Tokens may be in any order, e.g. "PETROVSKAYA KARINA" or "RUSLAN F EVSEEV". +func ParseLatinName(s string) (Parts, error) { + toks := tokenizeLatin(s) // keeps letters, apostrophes, hyphens, optional trailing dot + if len(toks) < 2 || len(toks) > 3 { + return Parts{}, errors.New("expecting 2 or 3 name parts") + } + + type part struct { + raw string + lo string + } + ps := make([]part, 0, len(toks)) + for _, t := range toks { + lo := strings.ToLower(strings.TrimSuffix(t, ".")) + ps = append(ps, part{raw: t, lo: lo}) + } + + // 1) Patronymic: 1–2 letters (optionally with a trailing dot), or RU-style patronymic suffix + pIdx := -1 + for i, p := range ps { + if isInitial(p.raw) || isPatronymicLatin(p.lo) { + pIdx = i + break + } + } + + // 2) Surname: look for common last-name suffixes among remaining tokens + lIdx := -1 + for i, p := range ps { + if i == pIdx { + continue + } + if looksLikeSurnameLatin(p.lo) { + lIdx = i + break + } + } + + // 3) Assign the rest to first name; tie-break if needed + rem := make([]int, 0, 2) + for i := range ps { + if i != pIdx && i != lIdx { + rem = append(rem, i) + } + } + + // If surname not obvious and we have 2 leftovers, pick the longer one as surname + if lIdx == -1 && len(rem) == 2 { + if runeLen(ps[rem[0]].raw) >= runeLen(ps[rem[1]].raw) { + lIdx = rem[0] + rem = rem[1:] + } else { + lIdx = rem[1] + rem = rem[:1] + } + } + + out := Parts{} + if pIdx != -1 { + out.Patronymic = strings.TrimSuffix(ps[pIdx].raw, ".") + } + if lIdx != -1 { + out.Last = ps[lIdx].raw + } + + // Remaining becomes first name; if still empty (2 tokens), pick the non-surname/non-patronymic as first + if len(rem) == 1 { + out.First = ps[rem[0]].raw + } else if len(ps) == 2 { + for i := range ps { + if i != pIdx && i != lIdx { + out.First = ps[i].raw + } + } + } + + // Normalize to Title Case (capitalize first letter, lowercase rest) + out.First = capWord(out.First) + out.Last = capWord(out.Last) + out.Patronymic = strings.ToUpper(out.Patronymic) // keep initials uppercase + + // Sanity + if out.First == "" || out.Last == "" { + return out, errors.New("unable to classify parts") + } + return out, nil +} + +func tokenizeLatin(s string) []string { + // keep letters, apostrophes, hyphens; allow an optional trailing dot for initials + re := regexp.MustCompile(`(?i)[a-z]+(?:['-][a-z]+)*\.?`) + return re.FindAllString(s, -1) +} + +func isInitial(x string) bool { + x = strings.TrimSuffix(x, ".") + r := []rune(x) + return len(r) >= 1 && len(r) <= 2 && allASCIIAlpha(r) +} + +func isPatronymicLatin(lo string) bool { + // Latin transliterations of RU patronymics (very rough) + sufs := []string{"ovich", "evich", "ich", "ovna", "evna", "ichna", "ogly", "kyzy"} + for _, s := range sufs { + if strings.HasSuffix(lo, s) && len(lo) >= len(s)+2 { + return true + } + } + return false +} + +func looksLikeSurnameLatin(lo string) bool { + // Common Slavic surname endings (male & female forms) + sufs := []string{ + "ov", "ev", "in", "ina", "ova", "eva", + "sky", "skiy", "skii", "skaya", "ska", + "enko", "ienko", + "uk", "yk", "chuk", "czuk", + "yan", "ian", + "dze", "dze", "shvili", + } + for _, s := range sufs { + if strings.HasSuffix(lo, s) { + return true + } + } + // If token contains an apostrophe mid-word (e.g., emel'yanova), still may be a surname + if strings.Contains(lo, "'") { + // feminine -'yanova/-'eva etc. + if strings.HasSuffix(lo, "yanova") || strings.HasSuffix(lo, "yanov") || strings.HasSuffix(lo, "eva") || strings.HasSuffix(lo, "ova") { + return true + } + } + return false +} + +func capWord(s string) string { + if s == "" { + return s + } + // keep internal hyphens/apostrophes, title-case each segment + sep := func(r rune) bool { return r == '-' || r == '\'' } + parts := strings.FieldsFunc(strings.ToLower(s), sep) + i := 0 + builder := strings.Builder{} + for _, r := range s { + if r == '-' || r == '\'' { + builder.WriteRune(r) + continue + } + // find which sub-part this rune belongs to by counting letters consumed + if len(parts) == 0 { + builder.WriteRune(unicode.ToUpper(r)) + continue + } + if i == 0 { + builder.WriteRune(unicode.ToUpper(r)) + } else { + builder.WriteRune(unicode.ToLower(r)) + } + i++ + // crude reset at separators handled above + } + // Simpler/robust alternative: + // return strings.Title(strings.ToLower(s)) // deprecated but OK for ASCII; avoided here. + return strings.ToUpper(string([]rune(s)[0])) + strings.ToLower(s[1:]) +} + +func allASCIIAlpha(r []rune) bool { + for _, ch := range r { + if ch < 'A' || (ch > 'Z' && ch < 'a') || ch > 'z' { + return false + } + } + return true +} + +func runeLen(s string) int { return len([]rune(s)) } diff --git a/pkg/names/gender.go b/pkg/names/gender.go new file mode 100644 index 0000000..bdd16e1 --- /dev/null +++ b/pkg/names/gender.go @@ -0,0 +1,47 @@ +package names + +import ( + "airlines/pkg/model" + "strings" + "unicode" +) + +func normalizeTitle(x string) string { + x = strings.ToLower(x) + // strip common punctuation + x = strings.ReplaceAll(x, ".", "") + x = strings.ReplaceAll(x, "'", "") + x = strings.ReplaceAll(x, "’", "") + x = strings.ReplaceAll(x, "-", "") + return x +} + +func GenderFromTitle(s string) model.Sex { + if s == "" { + return model.SexUnknown + } + // only first token (before space/comma/slash/etc.) + cut := func(r rune) bool { return unicode.IsSpace(r) || r == ',' || r == '/' || r == '&' } + first := strings.FieldsFunc(s, cut) + if len(first) == 0 { + return model.SexUnknown + } + t := normalizeTitle(first[0]) + + // male honorifics + switch t { + case "mr", "sir", "lord", "monsieur", "m", "don", "senor", "sr": // "sr" may collide with "senior"; context needed + return model.SexMale + } + + // female honorifics + switch t { + case "mrs", "miss", "ms", "madam", "madame", "mademoiselle", "mlle", + "lady", "dame", "senora", "sra", "señora", "srta", "srita", "dona": + return model.SexFemale + } + + // neutral/ambiguous titles (return Unknown) + // e.g., "mx", "dr", "prof", "rev", "coach", "officer", etc. + return model.SexUnknown +} diff --git a/pkg/store/db.go b/pkg/store/db.go index 802b4ec..648bca8 100644 --- a/pkg/store/db.go +++ b/pkg/store/db.go @@ -10,6 +10,7 @@ import ( "airlines/pkg/model" "gorm.io/driver/postgres" + _ "gorm.io/driver/sqlite" "gorm.io/gorm" "gorm.io/gorm/clause" "gorm.io/gorm/logger" |
