Example #1
0
func main() {
	showVersion := flag.Bool("v", false, "prints current program version")
	size := flag.Int("b", 20000, "batch size")
	numWorkers := flag.Int("w", runtime.NumCPU(), "number of workers")

	flag.Parse()

	if *showVersion {
		fmt.Println(span.AppVersion)
		os.Exit(0)
	}

	var readers []io.Reader

	if flag.NArg() == 0 {
		readers = append(readers, os.Stdin)
	} else {
		for _, filename := range flag.Args() {
			file, err := os.Open(filename)
			if err != nil {
				log.Fatal(err)
			}
			defer file.Close()
			readers = append(readers, file)
		}
	}

	for _, r := range readers {
		p := bytebatch.NewLineProcessor(r, os.Stdout, func(b []byte) ([]byte, error) {
			is := finc.IntermediateSchema{}

			if err := json.Unmarshal(b, &is); err != nil {
				log.Printf("failed to unmarshal: %s", string(b))
				return b, err
			}

			// Redact full text.
			is.Fulltext = ""

			bb, err := json.Marshal(is)
			if err != nil {
				return bb, err
			}
			bb = append(bb, '\n')
			return bb, nil
		})

		p.NumWorkers = *numWorkers
		p.BatchSize = *size

		if err := p.Run(); err != nil {
			log.Fatal(err)
		}
	}
}
Example #2
0
func main() {

	verbose := flag.Bool("verbose", false, "be verbose")
	showVersion := flag.Bool("v", false, "prints current program version")
	size := flag.Int("b", 20000, "batch size")
	numWorkers := flag.Int("w", runtime.NumCPU(), "number of workers")

	flag.Parse()

	if *showVersion {
		fmt.Println(span.AppVersion)
		os.Exit(0)
	}

	var readers []io.Reader

	if flag.NArg() == 0 {
		readers = append(readers, os.Stdin)
	} else {
		for _, filename := range flag.Args() {
			file, err := os.Open(filename)
			if err != nil {
				log.Fatal(err)
			}
			defer file.Close()
			readers = append(readers, file)
		}
	}

	errc := make(chan string)
	done := make(chan bool)

	go statsCounter(errc, done)

	out := make(chan []byte)

	go span.ByteSink(os.Stdout, out, done)

	for _, r := range readers {
		p := bytebatch.NewLineProcessor(r, os.Stdout, func(b []byte) ([]byte, error) {

			var is finc.IntermediateSchema
			if err := json.Unmarshal(b, &is); err != nil {
				return b, err
			}

			for _, t := range qa.TestSuite {
				if err := t.TestRecord(is); err != nil {
					issue, ok := err.(qa.Issue)
					if !ok {
						log.Fatalf("unexpected error type: %s", err)
					}
					errc <- issue.Err.Error()
					if *verbose {
						b, err := json.Marshal(issue)
						if err != nil {
							log.Fatal(err)
						}
						out <- b
					}
				}
			}

			return nil, nil

		})

		p.NumWorkers = *numWorkers
		p.BatchSize = *size

		if err := p.Run(); err != nil {
			log.Fatal(err)
		}
	}

	close(errc)
	close(out)
	// wait for both queue and writer
	<-done
	<-done

	b, err := json.Marshal(map[string]interface{}{"stats": stats})
	if err != nil {
		log.Fatal(err)
	}
	fmt.Fprintln(os.Stderr, string(b))
}
Example #3
0
func main() {
	showVersion := flag.Bool("v", false, "prints current program version")
	size := flag.Int("b", 20000, "batch size")
	numWorkers := flag.Int("w", runtime.NumCPU(), "number of workers")
	cpuprofile := flag.String("cpuprofile", "", "write cpu profile to file")
	format := flag.String("o", "solr5vu3", "output format")
	listFormats := flag.Bool("list", false, "list output formats")
	withFullrecord := flag.Bool("with-fullrecord", false, "populate fullrecord field with originating intermediate schema record")

	flag.Parse()

	if *showVersion {
		fmt.Println(span.AppVersion)
		os.Exit(0)
	}

	if *listFormats {
		var keys []string
		for key := range Exporters {
			keys = append(keys, key)
		}
		sort.Strings(keys)
		fmt.Println(strings.Join(keys, "\n"))
		os.Exit(0)
	}

	if *cpuprofile != "" {
		f, err := os.Create(*cpuprofile)
		if err != nil {
			log.Fatal(err)
		}
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	if *format == "solr5vu3v12" {
		*withFullrecord = true
		*format = "solr5vu3"
	}

	exportSchemaFunc, ok := Exporters[*format]
	if !ok {
		log.Fatalf("unknown export schema: %s", *format)
	}

	var readers []io.Reader

	if flag.NArg() == 0 {
		readers = append(readers, os.Stdin)
	} else {
		for _, filename := range flag.Args() {
			file, err := os.Open(filename)
			if err != nil {
				log.Fatal(err)
			}
			defer file.Close()
			readers = append(readers, file)
		}
	}

	for _, r := range readers {
		// business logic
		p := bytebatch.NewLineProcessor(r, os.Stdout, func(b []byte) ([]byte, error) {
			is := finc.IntermediateSchema{}

			// TODO(miku): Unmarshal date correctly.
			if err := json.Unmarshal(b, &is); err != nil {
				log.Printf("failed to unmarshal: %s", string(b))
				return b, err
			}

			// Get export format.
			schema := exportSchemaFunc()
			if err := schema.Convert(is, *withFullrecord); err != nil {
				log.Printf("failed to convert: %v", is)
				return b, err
			}

			// TODO(miku): maybe move marshalling into Exporter, if we have
			// anything else than JSON - function could be somethings like
			// func Marshal() ([]byte, error)
			bb, err := json.Marshal(schema)
			if err != nil {
				return b, err
			}
			bb = append(bb, '\n')
			return bb, nil
		})

		p.NumWorkers = *numWorkers
		p.BatchSize = *size

		if err := p.Run(); err != nil {
			log.Fatal(err)
		}
	}
}
Example #4
0
func main() {
	config := flag.String("c", "", "JSON config file for filters")
	version := flag.Bool("v", false, "show version")
	size := flag.Int("b", 20000, "batch size")
	numWorkers := flag.Int("w", runtime.NumCPU(), "number of workers")
	cpuprofile := flag.String("cpuprofile", "", "write cpu profile to file")

	flag.Parse()

	if *version {
		fmt.Println(span.AppVersion)
		os.Exit(0)
	}

	if *config == "" {
		log.Fatal("config file required")
	}

	if *cpuprofile != "" {
		file, err := os.Create(*cpuprofile)
		if err != nil {
			log.Fatal(err)
		}
		pprof.StartCPUProfile(file)
		defer pprof.StopCPUProfile()
	}

	// read and parse config file
	configfile, err := os.Open(*config)
	if err != nil {
		log.Fatal(err)
	}

	dec := json.NewDecoder(configfile)

	var tagger filter.Tagger
	if err := dec.Decode(&tagger); err != nil {
		log.Fatal(err)
	}

	var readers []io.Reader

	if flag.NArg() == 0 {
		readers = append(readers, os.Stdin)
	} else {
		for _, filename := range flag.Args() {
			file, err := os.Open(filename)
			if err != nil {
				log.Fatal(err)
			}
			defer file.Close()
			readers = append(readers, file)
		}
	}

	for _, r := range readers {
		p := bytebatch.NewLineProcessor(r, os.Stdout, func(b []byte) ([]byte, error) {
			// business logic
			var is finc.IntermediateSchema
			if err := json.Unmarshal(b, &is); err != nil {
				return b, err
			}

			tagged := tagger.Tag(is)

			bb, err := json.Marshal(tagged)
			if err != nil {
				return bb, err
			}
			bb = append(bb, '\n')
			return bb, nil
		})

		p.NumWorkers = *numWorkers
		p.BatchSize = *size

		if err := p.Run(); err != nil {
			log.Fatal(err)
		}
	}
}