textract location info, insert info line per test - ags-upload - Insert AGS files to a database
 (HTM) git clone git://src.adamsgaard.dk/ags-upload
 (DIR) Log
 (DIR) Files
 (DIR) Refs
       ---
 (DIR) commit b81873bfd2e96dfd41e851bc86c0203adb885e5f
 (DIR) parent 6949fac8034abbbff9156c7e92f9fc9c092448de
 (HTM) Author: Anders Damsgaard <anders@adamsgaard.dk>
       Date:   Wed,  8 Oct 2025 14:23:29 +0200
       
       extract location info, insert info line per test
       
       Diffstat:
         M cmd/main.go                         |     272 +++++++++++++++++--------------
       
       1 file changed, 154 insertions(+), 118 deletions(-)
       ---
 (DIR) diff --git a/cmd/main.go b/cmd/main.go
       t@@ -10,7 +10,6 @@ import (
                "os"
                "strconv"
                "strings"
       -        "time"
        
                "github.com/gin-gonic/gin"
                "gorm.io/driver/postgres"
       t@@ -18,32 +17,48 @@ import (
                "gorm.io/gorm/schema"
        )
        
       +// One row per CPT test (SCPG). Carries duplicated PROJ fields and selected LOCA fields.
        type CptInfo struct {
       -        ID         uint   `gorm:"primaryKey"`
       -        SourceId   string // PROJ_ID
       -        Name       string // PROJ_NAME
       -        Location   string // PROJ_LOC
       -        Client     string // PROJ_CLNT
       -        Contractor string // PROJ_CONT
       -        InsertTime time.Time
       +        ID uint `gorm:"primaryKey"`
       +
       +        ProjSourceId string // PROJ_ID
       +        ProjName     string // PROJ_NAME
       +        ProjLocation string // PROJ_LOC
       +        ProjClient   string // PROJ_CLNT
       +        ProjContract string // PROJ_CONT
       +
       +        // Test identity
       +        LocationId    string `gorm:"index:idx_loc_tesn,unique"` // LOCA_ID
       +        TestReference string `gorm:"index:idx_loc_tesn,unique"` // SCPG_TESN
       +
       +        // Selected LOCA fields for the site
       +        CoordRef string   // LOCA_LREF
       +        Datum    string   // LOCA_DATM
       +        GL       *float64 // LOCA_GL (m)
       +        LocX     *float64 // LOCA_LOCX (m)
       +        LocY     *float64 // LOCA_LOCY (m)
       +        LocZ     *float64 // LOCA_LOCZ (m)
        }
        
       -type Cpt struct { // group SCPG - data
       +// Per-depth CPT data (SCPT)
       +type Cpt struct {
                ID            uint     `gorm:"primaryKey"`
       -        InfoId        uint     //foreign key from CptInfo
       -        LocationId    string   // LOCA_ID
       -        TestReference string   // SCPG_TESN
       +        InfoId        uint     `gorm:"index"`   // FK -> CptInfo.ID
       +        LocationId    string   `gorm:"index"`   // LOCA_ID (redundant but handy)
       +        TestReference string   `gorm:"index"`   // SCPG_TESN (redundant but handy)
                Depth         *float64 // SCPT_DPTH
       -        Qc            *float64 // SCPT_RES
       -        Fs            *float64 // SCPT_FRES
       +        Qc            *float64 // SCPT_RES (cone resistance)
       +        Fs            *float64 // SCPT_FRES (side friction)
                U1            *float64 // SCPT_PWP1
                U2            *float64 // SCPT_PWP2
                U3            *float64 // SCPT_PWP3
       -        Rf            *float64 // SCPT_FRR
       -        Qt            *float64 // SCPT_QT
       +        Rf            *float64 // SCPT_FRR (friction ratio)
       +        Qt            *float64 // SCPT_QT  (corrected cone resistance)
        }
        
       -func ParseAGSProjectAndSCPT(r io.Reader) (*CptInfo, []Cpt, error) {
       +// ParsePROJ+LOCA+SCPG+SCPT in a single pass.
       +// Returns: slice of CptInfo (one per SCPG), and slice of Cpt (SCPT data points)
       +func ParseAGSCptAll(r io.Reader) ([]CptInfo, []Cpt, error) {
                norm, err := dos2unix(r)
                if err != nil {
                        return nil, nil, fmt.Errorf("read: %w", err)
       t@@ -55,33 +70,48 @@ func ParseAGSProjectAndSCPT(r io.Reader) (*CptInfo, []Cpt, error) {
        
                var (
                        curGroup     string
       -                headersByGrp = map[string]map[string]int{} // GROUP -> header index map
       -                project      *CptInfo
       -                cpts         []Cpt
       +                headersByGrp = map[string]map[string]int{}
       +
       +                // first PROJ row captured
       +                proj struct {
       +                        id, name, loc, clnt, cont string
       +                }
       +
       +                // LOCA rows keyed by LOCA_ID
       +                locas = map[string]struct {
       +                        LRef, Datum string
       +                        GL, X, Y, Z         *float64
       +                }{}
       +
       +                // CptInfo keyed by (LOCA_ID, SCPG_TESN)
       +                infosByKey = map[string]CptInfo{}
       +
       +                // SCPT data rows
       +                cpts []Cpt
                )
        
                get := func(group string, data []string, name string) string {
       -                hm := headersByGrp[group]
       -                if hm == nil {
       +                m := headersByGrp[group]
       +                if m == nil {
                                return ""
                        }
       -                if idx, ok := hm[strings.ToUpper(name)]; ok && idx >= 0 && idx < len(data) {
       +                if idx, ok := m[strings.ToUpper(name)]; ok && idx >= 0 && idx < len(data) {
                                return data[idx]
                        }
                        return ""
                }
       -        parseF64Ptr := func(s string) *float64 {
       +        fptr := func(s string) *float64 {
       +                s = strings.TrimSpace(s)
                        if s == "" {
                                return nil
                        }
       -                // Optional: handle decimal commas
                        s = strings.ReplaceAll(s, ",", ".")
       -                f, err := strconv.ParseFloat(s, 64)
       -                if err != nil {
       -                        return nil
       +                if f, err := strconv.ParseFloat(s, 64); err == nil {
       +                        return &f
                        }
       -                return &f
       +                return nil
                }
       +        mapKey := func(locID, tesn string) string { return locID + "\x00" + tesn }
        
                for {
                        rec, err := cr.Read()
       t@@ -98,22 +128,21 @@ func ParseAGSProjectAndSCPT(r io.Reader) (*CptInfo, []Cpt, error) {
                                rec[i] = strings.TrimSpace(rec[i])
                        }
        
       -                tag := strings.ToUpper(rec[0])
       -                switch tag {
       +                switch strings.ToUpper(rec[0]) {
                        case "GROUP":
                                if len(rec) > 1 {
                                        curGroup = strings.ToUpper(strings.TrimSpace(rec[1]))
                                } else {
                                        curGroup = ""
                                }
       +
                        case "HEADING":
                                if curGroup == "" {
                                        continue
                                }
                                m := make(map[string]int, len(rec)-1)
                                for i := 1; i < len(rec); i++ {
       -                                key := strings.ToUpper(strings.TrimSpace(rec[i]))
       -                                m[key] = i - 1 // position in DATA after skipping tag
       +                                m[strings.ToUpper(strings.TrimSpace(rec[i]))] = i - 1
                                }
                                headersByGrp[curGroup] = m
        
       t@@ -121,47 +150,85 @@ func ParseAGSProjectAndSCPT(r io.Reader) (*CptInfo, []Cpt, error) {
                                if curGroup == "" {
                                        continue
                                }
       -                        data := rec[1:]
       +                        row := rec[1:]
        
                                switch curGroup {
                                case "PROJ":
       -                                if project != nil {
       -                                        // If multiple PROJ rows exist, keep the first (typical).
       -                                        continue
       +                                if proj.id == "" {
       +                                        proj.id = get("PROJ", row, "PROJ_ID")
       +                                        proj.name = get("PROJ", row, "PROJ_NAME")
       +                                        proj.loc = get("PROJ", row, "PROJ_LOC")
       +                                        proj.clnt = get("PROJ", row, "PROJ_CLNT")
       +                                        proj.cont = get("PROJ", row, "PROJ_CONT")
       +                                }
       +
       +                        case "LOCA":
       +                                lid := get("LOCA", row, "LOCA_ID")
       +                                if lid == "" {
       +                                        break
                                        }
       -                                project = &CptInfo{
       -                                        SourceId:   get("PROJ", data, "PROJ_ID"),
       -                                        Name:       get("PROJ", data, "PROJ_NAME"),
       -                                        Location:   get("PROJ", data, "PROJ_LOC"),
       -                                        Client:     get("PROJ", data, "PROJ_CLNT"),
       -                                        Contractor: get("PROJ", data, "PROJ_CONT"),
       -                                        InsertTime: time.Now(),
       +                                locas[lid] = struct {
       +                                        LRef, Datum string
       +                                        GL, X, Y, Z         *float64
       +                                }{
       +                                        LRef:   get("LOCA", row, "LOCA_LREF"),
       +                                        Datum:  get("LOCA", row, "LOCA_DATM"),
       +                                        GL:     fptr(get("LOCA", row, "LOCA_GL")),
       +                                        X:      fptr(get("LOCA", row, "LOCA_LOCX")),
       +                                        Y:      fptr(get("LOCA", row, "LOCA_LOCY")),
       +                                        Z:      fptr(get("LOCA", row, "LOCA_LOCZ")),
       +                                }
       +
       +                        case "SCPG":
       +                                locID := get("SCPG", row, "LOCA_ID")
       +                                tesn := get("SCPG", row, "SCPG_TESN")
       +                                if locID == "" || tesn == "" {
       +                                        break
       +                                }
       +                                li := locas[locID] // if missing, zero-value ok
       +                                infosByKey[mapKey(locID, tesn)] = CptInfo{
       +                                        ProjSourceId: proj.id,
       +                                        ProjName:     proj.name,
       +                                        ProjLocation: proj.loc,
       +                                        ProjClient:   proj.clnt,
       +                                        ProjContract: proj.cont,
       +
       +                                        LocationId:    locID,
       +                                        TestReference: tesn,
       +
       +                                        CoordRef: li.LRef,
       +                                        Datum:    li.Datum,
       +                                        GL:       li.GL,
       +                                        LocX:     li.X,
       +                                        LocY:     li.Y,
       +                                        LocZ:     li.Z,
                                        }
        
                                case "SCPT":
       +                                locID := get("SCPT", row, "LOCA_ID")
       +                                tesn := get("SCPT", row, "SCPG_TESN") // links SCPT to SCPG
                                        cpts = append(cpts, Cpt{
       -                                        LocationId:    get("SCPT", data, "LOCA_ID"),
       -                                        TestReference: get("SCPT", data, "SCPG_TESN"),
       -                                        Depth:         parseF64Ptr(get("SCPT", data, "SCPT_DPTH")),
       -                                        Qc:            parseF64Ptr(get("SCPT", data, "SCPT_RES")),
       -                                        Fs:            parseF64Ptr(get("SCPT", data, "SCPT_FRES")),
       -                                        U1:            parseF64Ptr(get("SCPT", data, "SCPT_PWP1")),
       -                                        U2:            parseF64Ptr(get("SCPT", data, "SCPT_PWP2")),
       -                                        U3:            parseF64Ptr(get("SCPT", data, "SCPT_PWP3")),
       -                                        Rf:            parseF64Ptr(get("SCPT", data, "SCPT_FRR")),
       -                                        Qt:            parseF64Ptr(get("SCPT", data, "SCPT_QT")),
       +                                        LocationId:    locID,
       +                                        TestReference: tesn,
       +                                        Depth:         fptr(get("SCPT", row, "SCPT_DPTH")),
       +                                        Qc:            fptr(get("SCPT", row, "SCPT_RES")),
       +                                        Fs:            fptr(get("SCPT", row, "SCPT_FRES")),
       +                                        U1:            fptr(get("SCPT", row, "SCPT_PWP1")),
       +                                        U2:            fptr(get("SCPT", row, "SCPT_PWP2")),
       +                                        U3:            fptr(get("SCPT", row, "SCPT_PWP3")),
       +                                        Rf:            fptr(get("SCPT", row, "SCPT_FRR")),
       +                                        Qt:            fptr(get("SCPT", row, "SCPT_QT")),
                                        })
       -                        default:
       -                                // ignore other groups for now
                                }
       -
       -                // ignore UNIT, TYPE, etc.
       -                default:
       -                        continue
                        }
                }
        
       -        return project, cpts, nil
       +        // Flatten infos map to slice
       +        infos := make([]CptInfo, 0, len(infosByKey))
       +        for _, v := range infosByKey {
       +                infos = append(infos, v)
       +        }
       +        return infos, cpts, nil
        }
        
        func dos2unix(r io.Reader) (io.Reader, error) {
       t@@ -171,13 +238,13 @@ func dos2unix(r io.Reader) (io.Reader, error) {
                }
                all = bytes.ReplaceAll(all, []byte("\r\n"), []byte("\n"))
                all = bytes.ReplaceAll(all, []byte("\r"), []byte("\n"))
       -
                return bytes.NewReader(all), nil
        }
        
        func main() {
                dsn := os.Getenv("DB_CONN")
                dbSchema := "jupiter"
       +
                db, err := gorm.Open(postgres.Open(dsn),
                        &gorm.Config{
                                NamingStrategy: schema.NamingStrategy{
       t@@ -194,97 +261,66 @@ func main() {
                        log.Fatal(err)
                }
        
       -        if err := db.AutoMigrate(&CptInfo{}); err != nil {
       -                log.Fatal(err)
       -        }
       -
       -        if err := db.AutoMigrate(&Cpt{}); err != nil {
       +        if err := db.AutoMigrate(&CptInfo{}, &Cpt{}); err != nil {
                        log.Fatal(err)
                }
        
                r := gin.Default()
       -
       -        // ~32 MB file cap for multipart
       -        r.MaxMultipartMemory = 32 << 20
       +        r.MaxMultipartMemory = 32 << 20 // ~32 MB
        
                r.POST("/ingest/ags", func(c *gin.Context) {
                        file, _, err := c.Request.FormFile("file")
                        if err != nil {
       -                        c.String(400, "missing multipart file: %v", err)
       +                        c.String(http.StatusBadRequest, "missing multipart file: %v", err)
                                return
                        }
                        defer file.Close()
        
       -                proj, cpts, err := ParseAGSProjectAndSCPT(file)
       +                infos, cpts, err := ParseAGSCptAll(file)
                        if err != nil {
       -                        c.String(400, "parse error: %v", err)
       +                        c.String(http.StatusBadRequest, "parse error: %v", err)
                                return
                        }
        
                        err = db.Transaction(func(tx *gorm.DB) error {
       -
       -                        // Upsert project by SourceId (make SourceId unique if you rely on it)
       -                        if proj != nil {
       +                        // Upsert CptInfo by (LocationId, TestReference) and build a key->ID lookup
       +                        lookup := make(map[string]uint, len(infos))
       +                        for i := range infos {
       +                                ci := infos[i] // copy for pointer stability
                                        if err := tx.
       -                                        Where("source_id = ?", proj.SourceId).
       -                                        Assign(proj).
       -                                        FirstOrCreate(proj).Error; err != nil {
       +                                        Where("location_id = ? AND test_reference = ?", ci.LocationId, ci.TestReference).
       +                                        Assign(&ci).
       +                                        FirstOrCreate(&ci).Error; err != nil {
                                                return err
                                        }
       +                                lookup[ci.LocationId+"\x00"+ci.TestReference] = ci.ID
                                }
        
       -                        if len(cpts) > 0 {
       -                                // Optional: add a foreign key to project if you want (e.g., ProjectID)
       -                                for i := range cpts {
       -                                        cpts[i].InfoId = proj.ID
       +                        out := make([]Cpt, 0, len(cpts))
       +                        for i := range cpts {
       +                                if id := lookup[cpts[i].LocationId+"\x00"+cpts[i].TestReference]; id != 0 {
       +                                        cpts[i].InfoId = id
       +                                        out = append(out, cpts[i])
                                        }
       +                        }
        
       -                                if err := tx.CreateInBatches(cpts, 2000).Error; err != nil {
       +                        if len(out) > 0 {
       +                                if err := tx.CreateInBatches(out, 2000).Error; err != nil {
                                                return err
                                        }
                                }
       -
                                return nil
                        })
                        if err != nil {
       -                        c.String(500, "db error: %v", err)
       +                        c.String(http.StatusInternalServerError, "db error: %v", err)
                                return
                        }
        
       -                c.JSON(201, gin.H{
       -                        "project": proj,
       -                        "cpts":    len(cpts),
       +                c.JSON(http.StatusCreated, gin.H{
       +                        "infos": len(infos),
       +                        "cpts":  len(cpts),
                        })
                })
        
                _ = r.Run(":8080")
        }
       -
       -func getAGSReader(req *http.Request) (io.Reader, func(), error) {
       -        ct := req.Header.Get("Content-Type")
       -
       -        // Multipart form upload
       -        if strings.HasPrefix(ct, "multipart/form-data") {
       -                file, _, err := req.FormFile("file")
       -                if err != nil {
       -                        return nil, nil, err
       -                }
       -                return file, func() { _ = file.Close() }, nil
       -        }
       -
       -        // Raw body upload
       -        switch {
       -        case strings.HasPrefix(ct, "text/plain"),
       -                strings.HasPrefix(ct, "text/csv"),
       -                strings.HasPrefix(ct, "application/octet-stream"),
       -                ct == "":
       -                bodyBytes, err := io.ReadAll(req.Body)
       -                if err != nil {
       -                        return nil, nil, err
       -                }
       -                _ = req.Body.Close()
       -                return strings.NewReader(string(bodyBytes)), nil, nil
       -        default:
       -                return nil, nil, http.ErrNotSupported
       -        }
       -}