func main() { cpuprofile := flag.String("cpuprofile", "", "write cpu profile to file") ignoreErrors := flag.Bool("i", false, "ignore marc errors (not recommended)") numWorkers := flag.Int("w", runtime.NumCPU(), "number of workers") version := flag.Bool("v", false, "prints current program version and exit") filterVar := flag.String("r", "", "only dump the given tags (e.g. 001,003)") includeLeader := flag.Bool("l", false, "dump the leader as well") metaVar := flag.String("m", "", "a key=value pair to pass to meta") plainMode := flag.Bool("p", false, "plain mode: dump without content and meta") recordKey := flag.String("recordkey", "record", "key name of the record") var PrintUsage = func() { fmt.Fprintf(os.Stderr, "Usage: %s [OPTIONS] MARCFILE\n", os.Args[0]) flag.PrintDefaults() } flag.Parse() if *numWorkers > 0 { runtime.GOMAXPROCS(*numWorkers) } if *cpuprofile != "" { f, err := os.Create(*cpuprofile) if err != nil { log.Fatal(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } if *version { fmt.Println(marctools.AppVersion) os.Exit(0) } if flag.NArg() < 1 { PrintUsage() os.Exit(1) } file, err := os.Open(flag.Args()[0]) if err != nil { log.Fatalln(err) } defer func() { if err := file.Close(); err != nil { log.Fatalln(err) } }() filterMap := marctools.StringToMapSet(*filterVar) metaMap, err := marctools.KeyValueStringToMap(*metaVar) if err != nil { log.Fatalln(err) } queue := make(chan *marc22.Record) results := make(chan []byte) done := make(chan bool) writer := bufio.NewWriter(os.Stdout) defer writer.Flush() go marctools.FanInWriter(writer, results, done) options := marctools.JsonConversionOptions{ FilterMap: filterMap, MetaMap: metaMap, IncludeLeader: *includeLeader, PlainMode: *plainMode, IgnoreErrors: *ignoreErrors, RecordKey: *recordKey, } var wg sync.WaitGroup for i := 0; i < *numWorkers; i++ { wg.Add(1) go marctools.Worker(queue, results, &wg, options) } decoder := xml.NewDecoder(file) for { t, _ := decoder.Token() if t == nil { break } switch se := t.(type) { case xml.StartElement: if se.Name.Local == "record" { var record marc22.Record decoder.DecodeElement(&record, &se) queue <- &record } } } close(queue) wg.Wait() close(results) select { case <-time.After(1e9): break case <-done: break } }
func main() { cpuprofile := flag.String("cpuprofile", "", "write cpu profile to file") ignoreErrors := flag.Bool("i", false, "ignore marc errors (not recommended)") numWorkers := flag.Int("w", runtime.NumCPU(), "number of workers") version := flag.Bool("v", false, "prints current program version and exit") filterVar := flag.String("r", "", "only dump the given tags (e.g. 001,003)") includeLeader := flag.Bool("l", false, "dump the leader as well") metaVar := flag.String("m", "", "a key=value pair to pass to meta") recordKey := flag.String("recordkey", "record", "key name of the record") plainMode := flag.Bool("p", false, "plain mode: dump without content and meta") batchSize := flag.Int("b", 10000, "batch size for intercom") var PrintUsage = func() { fmt.Fprintf(os.Stderr, "Usage: %s [OPTIONS] MARCFILE\n", os.Args[0]) flag.PrintDefaults() } flag.Parse() if *numWorkers > 0 { runtime.GOMAXPROCS(*numWorkers) } if *cpuprofile != "" { f, err := os.Create(*cpuprofile) if err != nil { log.Fatal(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } if *version { fmt.Println(marctools.AppVersion) os.Exit(0) } if flag.NArg() < 1 { PrintUsage() os.Exit(1) } file, err := os.Open(flag.Args()[0]) if err != nil { log.Fatal(err) } defer func() { if err := file.Close(); err != nil { log.Fatal(err) } }() filterMap := marctools.StringToMapSet(*filterVar) metaMap, err := marctools.KeyValueStringToMap(*metaVar) if err != nil { log.Fatal(err) } queue := make(chan []*marc22.Record) results := make(chan []byte) done := make(chan bool) writer := bufio.NewWriter(os.Stdout) defer writer.Flush() go marctools.FanInWriter(writer, results, done) var wg sync.WaitGroup options := marctools.JsonConversionOptions{ FilterMap: filterMap, MetaMap: metaMap, IncludeLeader: *includeLeader, PlainMode: *plainMode, IgnoreErrors: *ignoreErrors, RecordKey: *recordKey, } for i := 0; i < *numWorkers; i++ { wg.Add(1) go marctools.BatchWorker(queue, results, &wg, options) } counter := 0 var records []*marc22.Record for { record, err := marc22.ReadRecord(file) if err == io.EOF { break } if err != nil { if *ignoreErrors { log.Println(err) continue } else { log.Fatal(err) } } records = append(records, record) counter += 1 if counter%*batchSize == 0 { queue <- records records = records[:0] } } queue <- records close(queue) wg.Wait() close(results) <-done }