aboutsummaryrefslogtreecommitdiff
path: root/cmd
diff options
context:
space:
mode:
authorleshe4ka46 <alex9102naid1@ya.ru>2025-10-27 20:36:28 +0300
committerleshe4ka46 <alex9102naid1@ya.ru>2025-10-28 13:42:21 +0300
commitbb833561aa74f02970aee13cdc75973b29716491 (patch)
tree0914668e11dbf825979f7419ce1bc78294cd3f7f /cmd
parente17a425dfb3382310fb5863f516dacdca9f44956 (diff)
# This is a combination of 2 commits.
# This is the 1st commit message: unmarshal all formats, merge them in the single table, users are truly unique # This is the commit message #2: i
Diffstat (limited to 'cmd')
-rw-r--r--cmd/airlines/main.go100
-rw-r--r--cmd/analytics/analytics.go18
-rw-r--r--cmd/csv/main.go18
-rw-r--r--cmd/fiotest/fio.go15
-rw-r--r--cmd/pdf/pdf.go126
-rw-r--r--cmd/store/test.go117
-rw-r--r--cmd/xlsx/main.go88
-rw-r--r--cmd/xml/xml.go32
-rw-r--r--cmd/yaml/main.go18
9 files changed, 451 insertions, 81 deletions
diff --git a/cmd/airlines/main.go b/cmd/airlines/main.go
index 8eeaef5..1710d57 100644
--- a/cmd/airlines/main.go
+++ b/cmd/airlines/main.go
@@ -1,13 +1,15 @@
package main
import (
- "context"
- "encoding/json"
"fmt"
- "os"
- ljson "airlines/pkg/adapters/json"
- "airlines/pkg/store"
+ "airlines/pkg/adapters/csv"
+ "airlines/pkg/adapters/json"
+ "airlines/pkg/adapters/xlsx"
+ "airlines/pkg/adapters/xml"
+ "airlines/pkg/adapters/yaml"
+
+ "airlines/pkg/localstore"
"github.com/joho/godotenv"
)
@@ -18,26 +20,80 @@ func main() {
if err != nil {
fmt.Println(err)
}
- store, err := store.NewStore(fmt.Sprintf("postgres://%s:%s@%s:%s/%s", os.Getenv("DB_USER"), os.Getenv("DB_PASSWORD"), os.Getenv("DB_HOST"), os.Getenv("DB_PORT"), os.Getenv("DB_NAME")))
- if err != nil {
- panic(err)
- }
- _ = store.AutoMigrate()
+ //fmt.Sprintf("postgres://%s:%s@%s:%s/%s", os.Getenv("DB_USER"), os.Getenv("DB_PASSWORD"), os.Getenv("DB_HOST"), os.Getenv("DB_PORT"), os.Getenv("DB_NAME")))
+ // store, err := store.NewStore("user=postgres dbname=airlines host=/home/alex/.pgsocket sslmode=disable")
- // i, err := json.ImportForumProfilesJSON(context.Background(), store, "../../full.json", 16384)
- // fmt.Println(i, err)
- f, err := os.Open("../../full.json")
+ store := localstore.NewLocalStore()
+ fmt.Println("store created")
- dec := json.NewDecoder(f)
- // optional: be strict about unexpected fields
- // dec.DisallowUnknownFields()
+ func() {
+ root, err := json.UnmarshalJsonRoot("/home/alex/ds-data/FrequentFlyerForum-Profiles.json")
+ if err != nil {
+ panic(err)
+ }
+ fmt.Println("\nunmarshalled json")
- var root ljson.JsonRoot
- if err := dec.Decode(&root); err != nil {
- panic(err)
- }
+ root.DumpToDb(store)
+ fmt.Println("\ndumped json")
+
+ }()
+
+ func() {
+ // xlsx
+ tickets, err := xlsx.UnmarshallXlsxFiles("/home/alex/ds-data/YourBoardingPassDotAero/")
+ if err != nil {
+ panic(err)
+ }
+ fmt.Println("\nunmarshalled xlsx")
+
+ xlsx.DumpToDb(store, tickets)
+ fmt.Println("\ndumped xlsx to db")
+ }()
+
+ func() {
+ xmldata, err := xml.UnmarshalXml("/home/alex/ds-data/PointzAggregator-AirlinesData.xml")
+ if err != nil {
+ panic(err)
+ }
+ fmt.Println("\nunmarshalled xml")
+
+ xmldata.DumpToDb(store)
+ fmt.Println("\ndumped xml to db")
+ }()
+
+ func() {
+ yamlData, err := yaml.UnmarshallYaml("/home/alex/ds-data/SkyTeam-Exchange.yaml")
+ if err != nil {
+ panic(err)
+ }
+ fmt.Println("\nunmarshalled yaml")
+
+ yamlData.DumpToDb(store)
+ fmt.Println("\ndumped yaml to db")
+ }()
+
+ func() {
+ csvData, err := csv.UnmarshallCsv("/home/alex/ds-data/csv.csv", false)
+ if err != nil {
+ panic(err)
+ }
+ fmt.Println("\nunmarshalled csv1")
+
+ csvData.DumpToDb(store)
+ fmt.Println("\ndumped yaml to csv1")
+ }()
+
+ // fuck it
+ func() {
+ csvData2, err := csv.UnmarshallCsv("/home/alex/ds-data/tab.csv", true)
+ if err != nil {
+ panic(err)
+ }
+ fmt.Println("\nunmarshalled csv2")
- root.DumpToDb(context.Background(), store)
- // fmt.Println(root)
+ csvData2.DumpToDb(store)
+ fmt.Println("\ndumped yaml to csv2")
+ }()
+ fmt.Println(store.ExportAllCSVs("/tmp/ds"))
}
diff --git a/cmd/analytics/analytics.go b/cmd/analytics/analytics.go
new file mode 100644
index 0000000..e2cbdb9
--- /dev/null
+++ b/cmd/analytics/analytics.go
@@ -0,0 +1,18 @@
+package main
+
+import (
+ "airlines/pkg/localstore"
+ "fmt"
+)
+
+
+func main() {
+ loc := localstore.NewLocalStore()
+
+ loc.ImportAllCSVs("/tmp/ds")
+
+ fmt.Println(loc.FindCard("FF", 0, ""));
+
+
+ loc.Analytics()
+}
diff --git a/cmd/csv/main.go b/cmd/csv/main.go
new file mode 100644
index 0000000..e1e5174
--- /dev/null
+++ b/cmd/csv/main.go
@@ -0,0 +1,18 @@
+package main
+
+import (
+ "airlines/pkg/adapters/csv"
+ "airlines/pkg/localstore"
+)
+
+func main() {
+ yamlData, err := csv.UnmarshallCsv("/home/alex/ds-data/tab.csv", true)
+ if err != nil {
+ panic(err)
+ }
+
+ store := localstore.NewLocalStore()
+
+ yamlData.DumpToDb(store)
+ store.ExportAllCSVs("/tmp/ds")
+}
diff --git a/cmd/fiotest/fio.go b/cmd/fiotest/fio.go
deleted file mode 100644
index 8195da7..0000000
--- a/cmd/fiotest/fio.go
+++ /dev/null
@@ -1,15 +0,0 @@
-package main
-
-import (
- "airlines/pkg/names"
- "fmt"
-)
-
-func main() {
-
- f, err := names.ParseLatinName("MAKAR A TIKHOMIROV")
- if err != nil {
- panic(err)
- }
- fmt.Printf("%+v\n", f)
-}
diff --git a/cmd/pdf/pdf.go b/cmd/pdf/pdf.go
index cb6aeb7..4a185a1 100644
--- a/cmd/pdf/pdf.go
+++ b/cmd/pdf/pdf.go
@@ -1,55 +1,151 @@
+/*
+ * Extract vector lines and other paths for each page of a PDF file.
+ *
+ * Run as: go run pdf_extract_lines.go input.pdf
+ */
+
package main
import (
"fmt"
- "log"
"os"
+ "github.com/unidoc/unipdf/v4/creator"
"github.com/unidoc/unipdf/v4/extractor"
"github.com/unidoc/unipdf/v4/model"
)
func main() {
- f, err := os.Open("../../test.pdf")
+ // err := reconstruct("../../test.pdf")
+ // if err != nil {
+ // fmt.Printf("Error: %v\n", err)
+ // os.Exit(1)
+ // }
+
+ err := outputPdfLines("reconstr_words.pdf")
if err != nil {
- log.Fatalf("Failed to open PDF: %v\n", err)
+ fmt.Printf("Error: %v\n", err)
+ os.Exit(1)
}
+
+
+}
+
+// outputPdfLines prints out lines of PDF file to stdout.
+func outputPdfLines(inputPath string) error {
+ f, err := os.Open(inputPath)
+ if err != nil {
+ return err
+ }
+
defer f.Close()
+
pdfReader, err := model.NewPdfReader(f)
if err != nil {
- log.Fatalf("Failed to read PDF: %v\n", err)
+ return err
}
+
numPages, err := pdfReader.GetNumPages()
if err != nil {
- log.Fatalf("Failed to retrieve the number of pages: %v\n", err)
+ return err
}
- fmt.Printf("Total number of pages: %d\n", numPages)
+
+ // Iterate through pages.
fmt.Printf("--------------------\n")
- fmt.Printf("PDF to text extraction:\n")
+ fmt.Printf("PDF lines extraction:\n")
fmt.Printf("--------------------\n")
for i := 0; i < numPages; i++ {
pageNum := i + 1
page, err := pdfReader.GetPage(pageNum)
if err != nil {
- panic(err)
+ return err
}
ex, err := extractor.New(page)
if err != nil {
- panic(err)
+ return err
}
- text, err := ex.ExtractText()
+ fmt.Println("------------------------------")
+ fmt.Printf("Page %d:\n", pageNum)
+
+ // Extract stroke paths from the current page.
+ paths, err := ex.ExtractStrokePaths()
if err != nil {
- panic(err)
+ return err
}
- fmt.Println("------------------------------")
- fmt.Printf("Page %d:\n", pageNum)
- fmt.Printf("\"%s\"\n", text)
- fmt.Println("------------------------------")
+ // Print debugging info.
+ for i, path := range paths {
+ fmt.Printf("Path %d:\n", i)
+ for j, point := range path.Points {
+ fmt.Printf("Point %d: %f %f \n", j, point.X, point.Y)
+ }
+ }
+ }
+
+ return nil
+}
+
+func reconstruct(pdfPath string) error {
+ f, err := os.Open(pdfPath)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+
+ pdfr, err := model.NewPdfReaderLazy(f)
+ if err != nil {
+ return err
+ }
+
+ c := creator.New()
+
+ for pageNum := 1; pageNum <= len(pdfr.PageList); pageNum++ {
+ page, err := pdfr.GetPage(pageNum)
+ if err != nil {
+ return err
+ }
+
+ extr, err := extractor.New(page)
+ if err != nil {
+ return err
+ }
+ pageText, _, _, err := extr.ExtractPageText()
+ if err != nil {
+ return err
+ }
+
+ // Start on a new page.
+ c.NewPage()
+ fmt.Printf("Page %d\n", pageNum)
+
+ text := pageText.Text()
+ textmarks := pageText.Marks()
+ fmt.Printf("%s\n", text)
+
+ // Reconstruct the text, each single TextMark drawn at a time with creator.Paragraph.
+ for _, tm := range textmarks.Elements() {
+ if tm.Font == nil {
+ continue
+ }
+ fmt.Printf("%s\n", tm.Text)
+ // Reconstruct by drawing each glyph from textmarks with the creator package.
+ para := c.NewStyledParagraph()
+ para.SetText(tm.Original)
+ para.SetFont(tm.Font)
+ para.SetFontSize(tm.FontSize)
+ r, g, b, _ := tm.StrokeColor.RGBA()
+ rf, gf, bf := float64(r)/0xffff, float64(g)/0xffff, float64(b)/0xffff
+ para.SetFontColor(creator.ColorRGBFromArithmetic(rf, gf, bf))
+ // Convert to PDF coordinate system.
+ yPos := c.Context().PageHeight - (tm.BBox.Lly + tm.BBox.Height())
+ para.SetPos(tm.BBox.Llx, yPos) // Upper left corner.
+ c.Draw(para)
+ }
}
+ return c.WriteToFile("reconstr_words.pdf")
}
diff --git a/cmd/store/test.go b/cmd/store/test.go
new file mode 100644
index 0000000..d05ae06
--- /dev/null
+++ b/cmd/store/test.go
@@ -0,0 +1,117 @@
+package main
+
+import (
+ "fmt"
+ "time"
+
+ "airlines/pkg/localstore"
+ "airlines/pkg/model"
+)
+
+func main() {
+ store := localstore.NewLocalStore()
+ fmt.Println("store created")
+
+ u := &model.User{
+ Name: "a",
+ Surname: "b",
+ Fathersname: "A",
+ }
+
+ u1, err := store.SaveUser(u)
+
+ if err != nil {
+ fmt.Println("error saving user:", err)
+ return
+ }
+
+ fmt.Println("user saved:", u1)
+
+ u = &model.User{
+ Name: "c",
+ Surname: "d",
+ }
+
+ u2, err := store.SaveUser(u)
+ if err != nil {
+ fmt.Println("error saving user:", err)
+ return
+ }
+
+ fmt.Println("user saved:", u2)
+
+
+ u = &model.User{
+ Name: "a",
+ Surname: "b",
+ Fathersname: "ABBBBB",
+ Birthday: time.Now(),
+ }
+
+ u3, err := store.SaveUser(u)
+
+ if err != nil {
+ fmt.Println("error saving user:", err)
+ return
+ }
+ fmt.Println("user saved:", u3)
+
+ now := time.Now()
+ now = time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, time.UTC)
+
+ f := &model.Flight{
+ Number: "AB123",
+ From: "JFK",
+ To: "LAX",
+ Date: now,
+ }
+
+ f1, err := store.SaveFlight(f)
+ if err != nil {
+ fmt.Println("error saving flight:", err)
+ return
+ }
+ fmt.Println("flight saved:", f1)
+
+ f = &model.Flight{
+ Number: "CD456",
+ From: "LAX",
+ To: "SFO",
+ Date: time.Now(),
+ }
+ f2, err := store.SaveFlight(f)
+ if err != nil {
+ fmt.Println("error saving flight:", err)
+ return
+ }
+ fmt.Println("flight saved:", f2)
+
+
+ f = &model.Flight{
+ Number: "AB123",
+ From: "JFK",
+ To: "LAX",
+ Date: now.Add(10 * time.Second),
+ HasTime: true,
+ }
+
+ f3, err := store.SaveFlight(f)
+ if err != nil {
+ fmt.Println("error saving flight:", err)
+ return
+ }
+ fmt.Println("flight saved:", f3)
+
+
+
+ f4, err := store.SaveFlight(f)
+ if err != nil {
+ fmt.Println("error saving flight:", err)
+ return
+ }
+ fmt.Println("flight saved:", f4)
+
+
+
+ // fmt.Println(store.ExportAllCSVs("/tmp/ds"))
+}
diff --git a/cmd/xlsx/main.go b/cmd/xlsx/main.go
index 4ccb01e..b960448 100644
--- a/cmd/xlsx/main.go
+++ b/cmd/xlsx/main.go
@@ -2,46 +2,76 @@ package main
import (
"airlines/pkg/adapters/xlsx"
+ csvwriter "airlines/pkg/csvWriter"
"fmt"
"os"
+ "sync"
)
-// func readXLSX(path string) {
-// tickets, err := xlsx.UnmarshallXlsxFile("/home/alex/ds-data/YourBoardingPassDotAero/YourBoardingPassDotAero-2017-11-30.xlsx")
-// if err != nil {
-// panic(err)
-// }
-
-// }
-
func main() {
tickets := make([]xlsx.Ticket, 0)
baseDir := "/home/alex/ds-data/YourBoardingPassDotAero/"
items, _ := os.ReadDir(baseDir)
+ var mu sync.Mutex
+ var wg sync.WaitGroup
+ sem := make(chan struct{}, 8)
for _, item := range items {
if !item.IsDir() {
- fmt.Println("Processing file:", item.Name())
- parsedTickets, err := xlsx.UnmarshallXlsxFile(baseDir + item.Name())
- if err != nil {
- panic(err)
- }
- tickets = append(tickets, parsedTickets...)
+ wg.Add(1)
+ sem <- struct{}{}
+ go func(name string) {
+ defer func() { <-sem }()
+ defer wg.Done()
+ fmt.Println("Processing file:", name)
+ parsedTickets, err := xlsx.UnmarshallXlsxFile(baseDir + name)
+ if err != nil {
+ panic(err)
+ }
+ mu.Lock()
+ defer mu.Unlock()
+ tickets = append(tickets, parsedTickets...)
+
+ }(item.Name())
}
}
- // for _, ticket := range tickets {
- // u, err := ticket.ToUser()
- // if err != nil {
- // panic(err)
- // }
- // f, err := ticket.ToFlight()
- // if err != nil {
- // panic(err)
- // }
- // c, err := ticket.ToCard()
- // if err != nil {
- // panic(err)
- // }
- // fmt.Printf("%+v %+v %+v\n", u, f, c)
- // }
+ wg.Wait()
+
+ fmt.Println("finished")
+
+ file, err := csvwriter.NewCsvWriter("/tmp/output.csv")
+ if err != nil {
+ panic(err)
+ }
+ defer file.Close()
+
+ file.Write([]string{"Number", "FromAer", "FromCoordsLat", "FromCoordsLong", "ToAer", "ToCoordsLat", "ToCoordsLong", "DateUnix"})
+
+ for i, ticket := range tickets {
+ if i%(len(tickets)/100) == 0 {
+ fmt.Printf("%f\n", float32(i)/float32(len(tickets))*100)
+ }
+ if i%(len(tickets)/100*5) == 0 {
+ file.Sync()
+ }
+ // u, err := ticket.ToUser()
+ // if err != nil {
+ // panic(err)
+ // }
+ f, err := ticket.ToFlight()
+ if err != nil {
+ panic(err)
+ }
+ file.Write([]string{f.Number,
+ f.From, fmt.Sprintf("%v", f.FromCoords.Lat), fmt.Sprintf("%v", f.FromCoords.Long),
+ f.To, fmt.Sprintf("%v", f.ToCoords.Lat), fmt.Sprintf("%v", f.ToCoords.Long),
+ fmt.Sprintf("%v", f.Date.Unix()),
+ })
+ // c, err := ticket.ToCard()
+ // if err != nil {
+ // panic(err)
+ // }
+ // fmt.Printf("%+v %+v %+v\n", u, f, c)
+ }
+ file.Sync()
}
diff --git a/cmd/xml/xml.go b/cmd/xml/xml.go
new file mode 100644
index 0000000..2679114
--- /dev/null
+++ b/cmd/xml/xml.go
@@ -0,0 +1,32 @@
+package main
+
+import (
+ "airlines/pkg/adapters/xml"
+ "airlines/pkg/localstore"
+ "fmt"
+)
+
+func main() {
+ pointzUsers, err := xml.UnmarshalXml("/home/alex/ds-data/PointzAggregator-AirlinesData.xml")
+ if err != nil {
+ panic(err)
+ }
+
+ fmt.Println("unmarshall ok")
+
+ // for _, user := range pointzUsers.Users {
+ // fmt.Printf("User UID: %s, Name: %s %s\n", user.UID, user.Name.First, user.Name.Last)
+ // for _, card := range user.Cards.Card {
+ // fmt.Printf(" Card Number: %s, Program: %s\n", card.Number, card.Program)
+ // for _, activity := range card.Activities.Activitys {
+ // fmt.Printf(" Activity Type: %s, Code: %s, Date: %s, Departure: %s, Arrival: %s, Fare: %s\n",
+ // activity.Type, activity.Code, activity.Date, activity.Departure, activity.Arrival, activity.Fare)
+ // }
+ // }
+ // }
+ store := localstore.NewLocalStore()
+
+ pointzUsers.DumpToDb(store)
+
+ store.ExportAllCSVs("/tmp/ds")
+}
diff --git a/cmd/yaml/main.go b/cmd/yaml/main.go
new file mode 100644
index 0000000..232ba79
--- /dev/null
+++ b/cmd/yaml/main.go
@@ -0,0 +1,18 @@
+package main
+
+import (
+ "airlines/pkg/adapters/yaml"
+ "airlines/pkg/localstore"
+)
+
+func main() {
+ yamlData, err := yaml.UnmarshallYaml("/home/alex/ds-data/SkyTeam-Exchange.yaml")
+ if err != nil {
+ panic(err)
+ }
+
+ store := localstore.NewLocalStore()
+
+ yamlData.DumpToDb(store)
+ store.ExportAllCSVs("/tmp/ds")
+}