func unpackDact(data, xmldir, dact, stderr string, chKill chan bool) (tokens, nline int, err error) { os.Mkdir(xmldir, 0777) dc, err := dbxml.Open(dact) if err != nil { return 0, 0, fmt.Errorf( "Openen mislukt. PaQu kan geen dact-bestanden lezen die gemaakt zijn met DbXml nieuwer dan versie %d", dbxml_version_major()) } defer dc.Close() var dbx *dbxml.Db if Cfg.Dactx { os.Remove(dact + "x") dbx, err = dbxml.Open(dact + "x") if err != nil { return 0, 0, err } defer dbx.Close() } docs, err := dc.All() if err != nil { return 0, 0, fmt.Errorf("Lezen dact-bestand: %s", err) } defer docs.Close() fperr, err := os.Create(stderr) if err != nil { return 0, 0, err } defer fperr.Close() fplines, err := os.Create(data + ".lines") if err != nil { return 0, 0, err } defer fplines.Close() tokens = 0 nline = 0 nd := -1 sdir := "" for docs.Next() { select { case <-chGlobalExit: return 0, 0, errGlobalExit case <-chKill: return 0, 0, errKilled default: } nline++ if nline%10000 == 1 { nd++ sdir = fmt.Sprintf("%04d", nd) os.Mkdir(filepath.Join(xmldir, sdir), 0777) } name := docs.Name() if strings.HasSuffix(strings.ToLower(name), ".xml") { name = name[:len(name)-4] } encname := encode_filename(name) data := []byte(docs.Content()) fp, err := os.Create(filepath.Join(xmldir, sdir, encname+".xml")) if err != nil { return 0, 0, err } _, err = fp.Write(data) fp.Close() if err != nil { return 0, 0, err } alpino := Alpino_ds_no_node{} err = xml.Unmarshal(data, &alpino) if err != nil { return 0, 0, fmt.Errorf("Parsen van %q uit dact-bestand: %s", docs.Name(), err) } tokens += len(strings.Fields(alpino.Sentence)) fmt.Fprintf(fplines, "%s|%s\n", name, strings.TrimSpace(alpino.Sentence)) for _, c := range alpino.Comments { if strings.HasPrefix(c.Comment, "Q#") { a := strings.SplitN(c.Comment, "|", 2) if len(a) == 2 { fmt.Fprintf(fperr, "Q#%s|%s\n", name, strings.TrimSpace(a[1])) break } } } if Cfg.Dactx { content, err := dactExpand(data) if err != nil { return 0, 0, err } if content == "" { content = string(data) } err = dbx.PutXml(docs.Name(), content, false) if err != nil { return 0, 0, err } } } return tokens, nline, nil }
func makeDact(dact, xml string, stripchar string, chKill chan bool) error { files, err := filenames2(xml, false) if err != nil { return err } os.Remove(dact) db, err := dbxml.Open(dact) if err != nil { return err } defer db.Close() var dbx *dbxml.Db if Cfg.Dactx { os.Remove(dact + "x") dbx, err = dbxml.Open(dact + "x") if err != nil { return err } defer dbx.Close() } for _, name := range files { select { case <-chGlobalExit: return errGlobalExit case <-chKill: return errKilled default: } data, err := ioutil.ReadFile(filepath.Join(xml, name)) if err != nil { return err } name = decode_filename(name) if stripchar != "" { name = name[1+strings.Index(name, stripchar):] } err = db.PutXml(name, string(data), false) if err != nil { return err } if Cfg.Dactx { content, err := dactExpand(data) if err != nil { return err } if content == "" { content = string(data) } err = dbx.PutXml(name, content, false) if err != nil { return err } } } return nil }