From bb833561aa74f02970aee13cdc75973b29716491 Mon Sep 17 00:00:00 2001 From: leshe4ka46 Date: Mon, 27 Oct 2025 20:36:28 +0300 Subject: # This is a combination of 2 commits. # This is the 1st commit message: unmarshal all formats, merge them in the single table, users are truly unique # This is the commit message #2: i --- pkg/localstore/import.go | 484 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 484 insertions(+) create mode 100644 pkg/localstore/import.go (limited to 'pkg/localstore/import.go') diff --git a/pkg/localstore/import.go b/pkg/localstore/import.go new file mode 100644 index 0000000..eb008ba --- /dev/null +++ b/pkg/localstore/import.go @@ -0,0 +1,484 @@ +package localstore + +import ( + "encoding/csv" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "airlines/pkg/model" + + "github.com/schollz/progressbar/v3" +) + +func (s *Store) ImportAllCSVs(dir string) error { + if dir == "" { + return errors.New("empty directory path") + } + if !strings.HasSuffix(dir, string(filepath.Separator)) { + dir += string(filepath.Separator) + } + + // lock for writes while rebuilding everything + s.mu.Lock() + defer s.mu.Unlock() + + // reset containers + s.users = nil + s.cards = nil + s.flights = nil + if s.userFlights == nil { + s.userFlights = make(map[uint64]map[uint64]struct{}) + } else { + for k := range s.userFlights { + delete(s.userFlights, k) + } + } + if s.cardFlights == nil { + s.cardFlights = make(map[uint64]map[uint64]struct{}) + } else { + for k := range s.cardFlights { + delete(s.cardFlights, k) + } + } + // (Re)build helper indices when possible + if s.cardsByUser == nil { + s.cardsByUser = make(map[uint64]map[uint64]struct{}) + } else { + for k := range s.cardsByUser { + delete(s.cardsByUser, k) + } + } + // We cannot reconstruct codesByUser / countriesByUser from CSVs here; leave as-is or empty. + // Initialize if nil so your code using them won't panic. + if s.codesByUser == nil { + s.codesByUser = make(map[uint64]map[string]struct{}) + } + if s.countriesByUser == nil { + s.countriesByUser = make(map[uint64]map[string]struct{}) + } + + // 1) users.csv + if err := s.loadUsersCSV(dir + "users.csv"); err != nil { + return fmt.Errorf("load users.csv: %w", err) + } + fmt.Println("loaed users") + + // 2) cards.csv + if err := s.loadCardsCSV(dir + "cards.csv"); err != nil { + return fmt.Errorf("load cards.csv: %w", err) + } + + // 3) flights.csv + if err := s.loadFlightsCSV(dir + "flights.csv"); err != nil { + return fmt.Errorf("load flights.csv: %w", err) + } + + // 4) user_flights.csv + if err := s.loadUserFlightsCSV(dir + "user_flights.csv"); err != nil { + return fmt.Errorf("load user_flights.csv: %w", err) + } + + // 5) card_flights.csv + if err := s.loadCardFlightsCSV(dir + "card_flights.csv"); err != nil { + return fmt.Errorf("load card_flights.csv: %w", err) + } + + return nil +} + +func (s *Store) loadUsersCSV(path string) error { + r, closer, err := openCSV(path) + if err != nil { + return err + } + defer closer.Close() + + // header + if _, err := r.Read(); err != nil { + return fmt.Errorf("users header: %w", err) + } + + bar := progressbar.Default(int64(150000), "reading users") + + for { + bar.Add(1) + rec, err := r.Read() + if errors.Is(err, io.EOF) { + break + } + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return fmt.Errorf("users read: %w", err) + } + // columns: id, nick, name, surname, fathersname, sex, birthday, total_flights, total_codes, total_countries, total_cards + if len(rec) < 11 { + return fmt.Errorf("users row has %d columns, expected >=11", len(rec)) + } + + id, err := parseUint(rec[0]) + if err != nil { + return fmt.Errorf("user id: %w", err) + } + sexInt, err := parseInt(rec[5]) + if err != nil { + return fmt.Errorf("user sex: %w", err) + } + + var bday time.Time + if strings.TrimSpace(rec[6]) != "" { + // "2006-01-02" in UTC + t, err := time.Parse("2006-01-02", rec[6]) + if err != nil { + return fmt.Errorf("user birthday: %w", err) + } + bday = t.UTC() + } else { + // keep zero time; or: + // bday = model.SentinelBirthday() // <-- if you prefer sentinel + } + + u := &model.User{ + ID: id, + Nick: rec[1], + Name: rec[2], + Surname: rec[3], + Fathersname: strings.TrimSpace(rec[4]), + Sex: model.Sex(sexInt), // adjust if your type differs + Birthday: bday, + } + + s.putUser(u) + } + return nil +} + +func (s *Store) loadCardsCSV(path string) error { + r, closer, err := openCSV(path) + if err != nil { + return err + } + defer closer.Close() + + // header + if _, err := r.Read(); err != nil { + return fmt.Errorf("cards header: %w", err) + } + + bar := progressbar.Default(int64(177000), "reading cards") + for { + bar.Add(1) + rec, err := r.Read() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return fmt.Errorf("cards read: %w", err) + } + // columns: id, prefix, number, bonusprogramm, user_id + if len(rec) < 5 { + return fmt.Errorf("cards row has %d columns, expected >=5", len(rec)) + } + + id, err := parseUint(rec[0]) + if err != nil { + return fmt.Errorf("card id: %w", err) + } + num, err := parseUint(rec[2]) + if err != nil { + return fmt.Errorf("card number: %w", err) + } + uid, err := parseUint(rec[4]) + if err != nil { + return fmt.Errorf("card user_id: %w", err) + } + + c := &model.Card{ + ID: id, + Prefix: rec[1], + Number: num, + Bonusprogramm: rec[3], + UserID: uid, + } + s.putCard(c) + + // index: cardsByUser + if _, ok := s.cardsByUser[uid]; !ok { + s.cardsByUser[uid] = make(map[uint64]struct{}) + } + s.cardsByUser[uid][id] = struct{}{} + } + return nil +} + +func (s *Store) loadFlightsCSV(path string) error { + r, closer, err := openCSV(path) + if err != nil { + return err + } + defer closer.Close() + + // header + if _, err := r.Read(); err != nil { + return fmt.Errorf("flights header: %w", err) + } + + bar := progressbar.Default(int64(2000000), "reading flights") + for { + bar.Add(1) + rec, err := r.Read() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return fmt.Errorf("flights read: %w", err) + } + // columns: + // id, number, from, to, fromlat, fromlon, tolat, tolon, dep_date, has_time, dep_time, dep_iso + if len(rec) < 12 { + return fmt.Errorf("flights row has %d columns, expected >=12", len(rec)) + } + + id, err := parseUint(rec[0]) + if err != nil { + return fmt.Errorf("flight id: %w", err) + } + + fromLat, err := parseFloat(rec[4]) + if err != nil { + return fmt.Errorf("fromlat: %w", err) + } + fromLon, err := parseFloat(rec[5]) + if err != nil { + return fmt.Errorf("fromlon: %w", err) + } + toLat, err := parseFloat(rec[6]) + if err != nil { + return fmt.Errorf("tolat: %w", err) + } + toLon, err := parseFloat(rec[7]) + if err != nil { + return fmt.Errorf("tolon: %w", err) + } + + depDateStr := strings.TrimSpace(rec[8]) // "2006-01-02" + hasTime, err := strconv.ParseBool(rec[9]) + if err != nil { + return fmt.Errorf("has_time: %w", err) + } + + var dep time.Time + if hasTime { + // When exported with time present, dep_iso is RFC3339; prefer it for full fidelity. + depISO := strings.TrimSpace(rec[11]) + if depISO != "" { + t, err := time.Parse(time.RFC3339, depISO) + if err != nil { + return fmt.Errorf("dep_iso: %w", err) + } + dep = t + } else { + // Fallback: combine dep_date + dep_time in local (treat as UTC if not specified) + depTime := strings.TrimSpace(rec[10]) // "15:04:05" + t, err := time.Parse("2006-01-02 15:04:05", depDateStr+" "+depTime) + if err != nil { + return fmt.Errorf("dep_date+dep_time: %w", err) + } + dep = t.UTC() + } + } else { + // Date only → set at UTC midnight of that date + if depDateStr == "" { + return fmt.Errorf("dep_date is empty while has_time=false") + } + t, err := time.Parse("2006-01-02", depDateStr) + if err != nil { + return fmt.Errorf("dep_date: %w", err) + } + dep = t.UTC() + } + + f := &model.Flight{ + ID: id, + Number: rec[1], + From: rec[2], + To: rec[3], + FromCoords: model.LatLong{ + Lat: fromLat, + Long: fromLon, + }, + ToCoords: model.LatLong{ + Lat: toLat, + Long: toLon, + }, + Date: dep, + HasTime: hasTime, + } + + s.putFlight(f) + } + return nil +} + +func (s *Store) loadUserFlightsCSV(path string) error { + r, closer, err := openCSV(path) + if err != nil { + return err + } + defer closer.Close() + + // header + if _, err := r.Read(); err != nil { + return fmt.Errorf("user_flights header: %w", err) + } + + bar := progressbar.Default(int64(3200000), "reading u-flights") + for { + bar.Add(1) + rec, err := r.Read() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return fmt.Errorf("user_flights read: %w", err) + } + // columns: user_id, flight_id + if len(rec) < 2 { + return fmt.Errorf("user_flights row has %d columns, expected >=2", len(rec)) + } + uid, err := parseUint(rec[0]) + if err != nil { + return fmt.Errorf("user_id: %w", err) + } + fid, err := parseUint(rec[1]) + if err != nil { + return fmt.Errorf("flight_id: %w", err) + } + + // guard against missing references (mirror your exporter’s checks) + if !s.validFlightID(fid) { + continue + } + if _, ok := s.userFlights[uid]; !ok { + s.userFlights[uid] = make(map[uint64]struct{}) + } + s.userFlights[uid][fid] = struct{}{} + } + return nil +} + +func (s *Store) loadCardFlightsCSV(path string) error { + r, closer, err := openCSV(path) + if err != nil { + return err + } + defer closer.Close() + + // header + if _, err := r.Read(); err != nil { + return fmt.Errorf("card_flights header: %w", err) + } + + for { + rec, err := r.Read() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return fmt.Errorf("card_flights read: %w", err) + } + // columns: card_id, flight_id + if len(rec) < 2 { + return fmt.Errorf("card_flights row has %d columns, expected >=2", len(rec)) + } + cid, err := parseUint(rec[0]) + if err != nil { + return fmt.Errorf("card_id: %w", err) + } + fid, err := parseUint(rec[1]) + if err != nil { + return fmt.Errorf("flight_id: %w", err) + } + + if !s.validFlightID(fid) { + continue + } + if _, ok := s.cardFlights[cid]; !ok { + s.cardFlights[cid] = make(map[uint64]struct{}) + } + s.cardFlights[cid][fid] = struct{}{} + } + return nil +} + +// --- helpers --- + +func openCSV(path string) (*csv.Reader, io.Closer, error) { + f, err := os.Open(path) + if err != nil { + return nil, nil, err + } + r := csv.NewReader(f) + r.ReuseRecord = true + // r.FieldsPerRecord = -1 // allow variable columns per row (comment out if you want strictness) + return r, f, nil +} + +func parseUint(s string) (uint64, error) { + return strconv.ParseUint(strings.TrimSpace(s), 10, 64) +} +func parseInt(s string) (int64, error) { + return strconv.ParseInt(strings.TrimSpace(s), 10, 64) +} +func parseFloat(s string) (float64, error) { + if strings.TrimSpace(s) == "" { + return 0, nil + } + return strconv.ParseFloat(strings.TrimSpace(s), 64) +} + +// Ensure slices are large enough and place item by ID (1-based supported) +func ensureLen[T any](slice []*T, id uint64) []*T { + needed := int(id) + 1 // keep index == id; index 0 unused per your exporters + if len(slice) <= needed { + newSlice := make([]*T, needed) + copy(newSlice, slice) + return newSlice + } + return slice +} + +func (s *Store) putUser(u *model.User) { + if u == nil { + return + } + s.users = ensureLen[model.User](s.users, u.ID) + s.users[u.ID] = u +} + +func (s *Store) putCard(c *model.Card) { + if c == nil { + return + } + s.cards = ensureLen[model.Card](s.cards, c.ID) + s.cards[c.ID] = c +} + +func (s *Store) putFlight(f *model.Flight) { + if f == nil { + return + } + s.flights = ensureLen[model.Flight](s.flights, f.ID) + s.flights[f.ID] = f +} + +func (s *Store) validFlightID(fid uint64) bool { + return fid != 0 && int(fid) < len(s.flights) && s.flights[fid] != nil +} -- cgit v1.2.3