Beispiel #1
0
func main() {
	log.SetFlags(0)
	log.SetPrefix("marcdump: ")
	var (
		useColors = flag.Bool("color", true, "use colored terminal output")
		//filter    = flag.String("filter", "", "only print specified fields, ex.: 100b,245a")
	)

	flag.Usage = func() {
		fmt.Fprintf(os.Stderr, "Usage: marcdump [options...] file\n\nOptions:\n")
		flag.PrintDefaults()
	}

	flag.Parse()

	if len(flag.Args()) == 0 {
		flag.Usage()
		os.Exit(1)
	}

	f, err := os.Open(flag.Args()[0])
	if err != nil {
		log.Fatal(err)
	}

	// Detect format
	sniff := make([]byte, 64)
	_, err = f.Read(sniff)
	if err != nil {
		log.Fatal(err)
	}
	format := marc.DetectFormat(sniff)
	switch format {
	case marc.MARC, marc.LineMARC, marc.MARCXML:
		break
	default:
		log.Fatal("Unknown MARC format")
	}

	// rewind reader
	_, err = f.Seek(0, 0)
	if err != nil {
		log.Fatal(err)
	}

	dec := marc.NewDecoder(f, format)
	for r, err := dec.Decode(); err != io.EOF; r, err = dec.Decode() {
		if err != nil {
			log.Fatal(err)
		}
		r.DumpTo(os.Stdout, *useColors)
	}

}
Beispiel #2
0
func main() {
	log.SetFlags(0)
	log.SetPrefix("marccheck: ")
	if len(os.Args) < 2 {
		fmt.Fprintf(os.Stderr, "Usage: marccheck <marcdatabase>\n")
		os.Exit(1)
	}

	f, err := os.Open(os.Args[1])
	if err != nil {
		log.Fatal(err)
	}
	stats, err := os.Stat(f.Name())
	if err != nil {
		log.Fatal(err)
	}
	size := stats.Size()

	// Detect format
	sniff := make([]byte, 64)
	_, err = f.Read(sniff)
	if err != nil {
		log.Fatal(err)
	}
	format := marc.DetectFormat(sniff)
	switch format {
	case marc.MARC, marc.LineMARC, marc.MARCXML:
		break
	default:
		log.Fatal("Unknown MARC format")
	}

	// rewind reader
	_, err = f.Seek(0, 0)
	if err != nil {
		log.Fatal(err)
	}

	dec := marc.NewDecoder(f, format)
	c := 0
	start := time.Now()

	for _, err := dec.Decode(); err != io.EOF; _, err = dec.Decode() {
		if err != nil {
			log.Fatal(err)
		}
		c++
	}
	fmt.Printf("Done in %s\n", time.Now().Sub(start))
	fmt.Printf("Number of records: %d\n", c)
	fmt.Printf("Average parsing speed: %.2f MB/s", float64(size)/time.Now().Sub(start).Seconds()/1048576)
}
Beispiel #3
0
func detectFormat(f *os.File) (marc.Format, error) {
	sniff := make([]byte, 64)
	_, err := f.Read(sniff)
	if err != nil {
		log.Fatal(err)
	}
	format := marc.DetectFormat(sniff)

	// rewind reader
	_, err = f.Seek(0, 0)
	if err != nil {
		log.Fatal(err)
	}

	switch format {
	case marc.MARC, marc.LineMARC, marc.MARCXML:
		return format, nil
	default:
		return format, errors.New("unknown MARC format")
	}
}
Beispiel #4
0
func main() {
	log.SetFlags(0)
	log.SetPrefix("marcstats: ")

	tags := flag.String("t", "", "generate statistics from these tag contents (ex '100e,700a' )")
	flag.Parse()

	if len(flag.Args()) < 1 {
		fmt.Fprintf(os.Stderr, "Usage: marcstats -t<tags> <marcdatabase>\n")
		os.Exit(1)
	}

	f, err := os.Open(flag.Args()[0])
	if err != nil {
		log.Fatal(err)
	}

	// Detect format
	sniff := make([]byte, 64)
	_, err = f.Read(sniff)
	if err != nil {
		log.Fatal(err)
	}
	format := marc.DetectFormat(sniff)
	switch format {
	case marc.MARC, marc.LineMARC, marc.MARCXML:
		break
	default:
		log.Fatal("Unknown MARC format")
	}

	// rewind reader
	_, err = f.Seek(0, 0)
	if err != nil {
		log.Fatal(err)
	}

	dec := marc.NewDecoder(f, format)
	c := 0
	start := time.Now()
	stats := newStats()
	var filterTags []string
	if *tags != "" {
		filterTags = strings.Split(*tags, ",")
		for _, t := range filterTags {
			if len(t) < 4 {
				log.Fatalf("wrong tag filter format: %q (should be a three-digit number plus one or more subfieldcharacters, ex. \"100e,655ax\")", t)
			}
		}
	}
	filterStats := make(map[string]tagStats)

	for rec, err := dec.Decode(); err != io.EOF; rec, err = dec.Decode() {
		if err != nil {
			log.Fatal(err)
		}
		if len(filterTags) > 0 {
			filterCounts(filterTags, filterStats, rec)
		} else {
			if err := stats.countRecord(rec); err != nil {
				log.Println(err)
			}
		}
		c++
	}
	fmt.Printf("Done in %s\n", time.Now().Sub(start))
	fmt.Printf("Number of records: %d\n", c)
	if len(filterTags) > 0 {
		for tag, e := range filterStats {
			fmt.Println(tag)
			ranks := rankByWordCount(e)
			for _, p := range ranks {
				fmt.Printf("\t%6d %s\n", p.Value, p.Key)
			}
			fmt.Println()
		}
	} else {
		stats.Dump()
	}
}