Ejemplo n.º 1
0
func addDocs(t *testing.T, index *index.Index) {
	err := index.Add(1, []byte(`{"title": "teste"}`), nil)

	if err != nil {
		t.Error(err)
		return
	}
}
Ejemplo n.º 2
0
func main() {
	var (
		fileOpt,
		dataDirOpt,
		databaseName,
		profileFile,
		metadataStr string
		metadata                    = index.Metadata{}
		helpOpt, newIndex, debugOpt bool
		err                         error
		index                       *index.Index
		batchSize                   int
	)

	optarg.Header("General options")
	optarg.Add("f", "file", "Read NeoSearch JSON database from file. (Required)", "")
	optarg.Add("c", "create", "Create new index database", false)
	optarg.Add("b", "batch-size", "Batch size", 1000)
	optarg.Add("n", "name", "Name of index database", "")
	optarg.Add("d", "data-dir", "Data directory", "")
	optarg.Add("t", "trace-debug", "Enable trace for debug", false)
	optarg.Add("h", "help", "Display this help", false)
	optarg.Add("p", "cpuprofile", "write cpu profile to file", "")
	optarg.Add("m", "metadata", "metadata of documents", "")

	for opt := range optarg.Parse() {
		switch opt.ShortName {
		case "f":
			fileOpt = opt.String()
		case "b":
			batchSize = opt.Int()
		case "d":
			dataDirOpt = opt.String()
		case "n":
			databaseName = opt.String()
		case "c":
			newIndex = true
		case "t":
			debugOpt = true
		case "p":
			profileFile = opt.String()
		case "m":
			metadataStr = opt.String()
		case "h":
			helpOpt = true
		}
	}

	if helpOpt {
		optarg.Usage()
		os.Exit(0)
	}

	if dataDirOpt == "" {
		dataDirOpt, _ = os.Getwd()
	}

	if fileOpt == "" {
		optarg.Usage()
		os.Exit(1)
	}

	if profileFile != "" {
		f, err := os.Create(profileFile)
		if err != nil {
			log.Fatal(err)
		}

		fmt.Println("Profiling to file: ", profileFile)
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	if metadataStr != "" {
		err = json.Unmarshal([]byte(metadataStr), &metadata)

		if err != nil {
			log.Fatal(err)
		}
	}

	cfg := neosearch.NewConfig()

	cfg.Option(neosearch.DataDir(dataDirOpt))
	cfg.Option(neosearch.Debug(debugOpt))
	cfg.Option(neosearch.KVCacheSize(1 << 15))

	neo := neosearch.New(cfg)

	if newIndex {
		log.Printf("Creating index %s\n", databaseName)
		index, err = neo.CreateIndex(databaseName)
	} else {
		log.Printf("Opening index %s ...\n", databaseName)
		index, err = neo.OpenIndex(databaseName)
	}

	if err != nil {
		log.Fatalf("Failed to open database '%s': %v", err)
		return
	}

	file, err := os.OpenFile(fileOpt, os.O_RDONLY, 0)

	if err != nil {
		log.Fatalf("Unable to open file: %s", fileOpt)
		return
	}

	jsonBytes, err := gommap.Map(file.Fd(), gommap.PROT_READ,
		gommap.MAP_PRIVATE)

	if err != nil {
		panic(err)
	}

	data := make([]map[string]interface{}, 0)

	err = json.Unmarshal(jsonBytes, &data)

	if err != nil {
		panic(err)
	}

	jsonBytes = nil

	startTime := time.Now()

	index.Batch()
	var count int
	totalResults := len(data)

	runtime.GC()

	cleanup := func() {
		neo.Close()
		file.Close()
		if profileFile != "" {
			fmt.Println("stopping profile: ", profileFile)
			pprof.StopCPUProfile()
		}
	}

	c := make(chan os.Signal, 1)
	signal.Notify(c, os.Interrupt)
	go func() {
		<-c
		cleanup()
		os.Exit(1)
	}()

	defer func() {
		if r := recover(); r != nil {
			fmt.Println("Recovered from panic", r)
			cleanup()
			os.Exit(1)
		}

		cleanup()
	}()

	fmt.Println("Importing ", len(data), " records")

	for idx := range data {
		dataEntry := data[idx]

		if dataEntry["_id"] == nil {
			dataEntry["_id"] = idx
		}

		entryJSON, err := json.Marshal(&dataEntry)
		if err != nil {
			log.Println(err)
			return
		}

		err = index.Add(uint64(idx), entryJSON, metadata)
		if err != nil {
			panic(err)
		}

		if count == batchSize {
			count = 0

			fmt.Println("Flushing batch: ", idx, " from ", totalResults)
			index.FlushBatch()
			if idx != (totalResults - 1) {
				index.Batch()
			}

			runtime.GC()
		} else {
			count = count + 1
		}

		data[idx] = nil
	}

	index.FlushBatch()
	index.Close()
	neo.Close()

	elapsed := time.Since(startTime)

	log.Printf("Database indexed in %v\n", elapsed)
}