Example #1
0
// 检索模式运行
func (this *Goose) searchModeRun() {

	log.Debug("run in search mode")

	if this.searchSty == nil {
		log.Error("Please set search strategy,see Goose.SetSearchStrategy()")
		return
	}

	if this.indexSty == nil {
		log.Warn("can't build index real time witout Index Strategy")
	}

	gooseSearch := NewGooseSearch()
	err := gooseSearch.Init(this.confPath, this.indexSty, this.searchSty)
	if err != nil {
		log.Error(err)
		return
	}

	log.Debug("goose search init succ")

	err = gooseSearch.Run()
	if err != nil {
		log.Error(err)
		return
	}
}
Example #2
0
func NewMergeEngine(db DataBaseReader, termList []TermInQuery) (*MergeEngine, error) {
	mg := MergeEngine{}
	if len(termList) >= GOOSE_MAX_QUERY_TERM {
		return nil, log.Warn("to much terms [%d]", len(termList))
	}

	mg.omitflag = 0
	mg.lstheap = &listMinHeap{}
	mg.termCount = len(termList)
	heap.Init(mg.lstheap)

	// 把全部拉链建成小顶堆
	for i, e := range termList {
		var err error
		item := listMinHeapItem{}

		item.list, err = db.ReadIndex(e.Sign)
		if err != nil {
			log.Warn("read term[%d] : %s", e.Sign, err)
			item.list = nil
		}
		item.no = i
		item.pos = 0
		item.sign = e.Sign
		if e.CanOmit {
			item.omit = 0 // 0表示可省
		} else {
			item.omit = 1 << uint(i) // 不可省term
		}

		// 拉链有效才放入堆
		if item.list != nil && item.list.Len() > 0 {
			heap.Push(mg.lstheap, item)
		}

		// 同时记下不可省term的标记
		if e.CanOmit == false {
			mg.omitflag ^= 1 << uint(i)
		}

		log.Debug("term[%d] omit[%d] weight[%d] listLen[%d]", item.sign,
			item.omit, e.Weight, len(*item.list))
	}

	log.Debug("termCnt[%d] omitflag[%d]", mg.termCount, mg.omitflag)

	return &mg, nil
}
Example #3
0
func (this *GooseSearch) Run() error {

	// read conf
	log.Debug("GooseSearch Run begin")

	searchGoroutineNum := this.conf.Int64("GooseSearch.Search.GoroutineNum")
	searchSvrPort := this.conf.Int64("GooseSearch.Search.ServerPort")
	indexSvrPort := this.conf.Int64("GooseSearch.Index.ServerPort")

	searchReqBufSize := this.conf.Int64("GooseSearch.Search.RequestBufferSize")
	searchResBufSize := this.conf.Int64("GooseSearch.Search.ResponseBufferSize")

	indexReqBufSize := this.conf.Int64("GooseSearch.Index.RequestBufferSize")
	//indexResBufSize := this.conf.GetInt("GooseSearch.Index.ResponseBufferSize")

	refreshSleepTime := this.conf.Int64("GooseSearch.Refresh.SleepTime")

	log.Debug("Read Conf searchGoroutineNum[%d] searchSvrPort[%d] "+
		"indexSvrPort[%d] searchReqBufSize[%d] searchResBufSize[%d] "+
		"indexReqBufSize[%d] refreshSleepTime[%d]", searchGoroutineNum,
		searchSvrPort, indexSvrPort, searchReqBufSize, searchResBufSize,
		indexReqBufSize, refreshSleepTime)

	err := this.runSearchServer(int(searchGoroutineNum), int(searchSvrPort),
		int(searchReqBufSize), int(searchResBufSize))
	if err != nil {
		return err
	}

	err = this.runIndexServer(int(indexSvrPort), int(indexReqBufSize))
	if err != nil {
		return err
	}

	err = this.runRefreshServer(int(refreshSleepTime))
	if err != nil {
		return err
	}

	neverReturn := sync.WaitGroup{}
	neverReturn.Add(1)
	neverReturn.Wait()

	return nil
}
Example #4
0
// 程序入口,解析程序参数,启动[建库|检索]模式
func (this *Goose) Run() {
	defer func() {
		if r := recover(); r != nil {
			os.Exit(1)
		}
	}()

	// 解析命令行参数
	var opts struct {
		// build mode
		BuildMode bool `short:"b" long:"build" description:"run in build mode"`

		// configure file
		Configure string `short:"c" long:"conf" description:"congfigure file" default:"conf/goose.toml"`

		// log configure file
		LogConf string `short:"l" long:"logconf" description:"log congfigure file" default:"conf/log.toml"`

		// build mode data file
		DataFile string `short:"d" long:"datafile" description:"build mode data file"`
	}
	parser := flags.NewParser(&opts, flags.HelpFlag)
	_, err := parser.ParseArgs(os.Args)
	if err != nil {
		fmt.Println(this.showLogo())
		fmt.Println(err)
		os.Exit(1)
	}
	if opts.BuildMode && len(opts.DataFile) == 0 {
		fmt.Println(this.showLogo())
		parser.WriteHelp(os.Stderr)
		os.Exit(1)
	}

	this.confPath = opts.Configure
	this.dataPath = opts.DataFile
	this.logConfPath = opts.LogConf

	// init log
	err = log.LoadConfiguration(this.logConfPath)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}
	log.Debug("Load log conf finish")

	// run
	if opts.BuildMode {
		this.buildModeRun()
	} else {
		this.searchModeRun()
	}

	// BUG(log4go) log4go need time to sync ...(wtf)
	// see http://stackoverflow.com/questions/14252766/abnormal-behavior-of-log4go
	time.Sleep(100 * time.Millisecond)
}
Example #5
0
func (this *GooseSearch) runRefreshServer(sleeptime int) error {

	if 0 == sleeptime {
		return log.Error("arg error sleeptime[%d]", sleeptime)
	}

	go func() {
		for {
			time.Sleep(time.Duration(sleeptime) * time.Second)
			log.Debug("refresh now")

			// sync search db
			err := this.searchDB.Sync()
			if err != nil {
				log.Warn(err)
			}
		}
	}()

	return nil
}
Example #6
0
func (this *GooseSearch) Init(confPath string,
	indexSty IndexStrategy, searchSty SearchStrategy) (err error) {

	defer func() {
		if r := recover(); r != nil {
			err = log.Error(r)
		}
	}()

	// load conf
	this.conf, err = config.NewConf(confPath)
	if err != nil {
		return
	}

	// set max procs
	maxProcs := int(this.conf.Int64("GooseSearch.MaxProcs"))
	if maxProcs <= 0 {
		maxProcs = runtime.NumCPU()
	}
	runtime.GOMAXPROCS(maxProcs)
	log.Debug("set max procs [%d]", maxProcs)

	// init dbsearcher
	dbPath := this.conf.String("GooseBuild.DataBase.DbPath")
	log.Debug("init db [%s]", dbPath)

	this.searchDB = NewDBSearcher()
	err = this.searchDB.Init(dbPath)
	if err != nil {
		return
	}
	log.Debug("init db [%s]", dbPath)

	// index strategy global init
	if indexSty != nil {
		err = indexSty.Init(this.conf)
		if err != nil {
			return
		}
	}
	log.Debug("index strategy init finish")

	// search strategy global init
	if searchSty != nil {
		err = searchSty.Init(this.conf)
		if err != nil {
			return
		}
	}
	log.Debug("search strategy init finish")

	// var indexer
	if indexSty != nil {
		this.varIndexer, err = NewVarIndexer(this.searchDB, indexSty)
		if err != nil {
			return
		}
	}
	log.Debug("VarIndexer init finish")

	// searcher
	if searchSty != nil {
		this.searcher, err = NewSearcher(this.searchDB, searchSty)
		if err != nil {
			return
		}
	}
	log.Debug("Searcher init finish")

	return
}
Example #7
0
func (this *MergeEngine) Next(termInDoclist []TermInDoc) (inId InIdType, currValid, allfinish bool) {

	if len(termInDoclist) != this.termCount {
		log.Warn("len(termInDoclist) != this.termCount")
		return 0, false, true
	}

	if this.lstheap.Len() == 0 {
		return 0, false, true
	}

	// 初始化
	for i, _ := range termInDoclist {
		termInDoclist[i].Sign = 0
		termInDoclist[i].Weight = 0
	}
	oflag := 0

	/*
	   // 先看当前id最小的堆顶
	   item := this.lstheap.Pop().(listMinHeapItem)
	   currInID := item.Curr().InID

	   // 记下当前doc
	   termInDoclist[ item.no ].Sign = item.sign
	   termInDoclist[ item.no ].Weight = item.Curr().Weight
	   oflag ^= item.omit
	*/

	top := this.lstheap.Top().(listMinHeapItem)
	currInID := top.Curr().InID

	currValid = true
	allfinish = false

	for this.lstheap.Len() > 0 {
		top := this.lstheap.Top().(listMinHeapItem)

		if top.Curr().InID != currInID {
			// 遇到新的doc了,就是归并完一个doc
			// 跳出去校验currInID的命中情况
			break
		}

		// 堆里面还有相同的doc,先弹出
		item := heap.Pop(this.lstheap).(listMinHeapItem)

		// 记下当前doc
		termInDoclist[item.no].Sign = item.sign
		termInDoclist[item.no].Weight = item.Curr().Weight
		oflag ^= item.omit

		// 如果拉链没遍历完,继续加入堆
		if item.Next() {
			heap.Push(this.lstheap, item)
		} else {
			// 如果拉链遍历完,且这个拉链是不可省term
			// 处理完当前doc后后面不需要再归并了
			if item.omit > 0 {
				allfinish = true
				log.Debug("not omit item travel end no[%d] pos[%d] list.len[%d]",
					item.no, item.pos, len(*item.list))
			}
		}
	}

	// 检查不可省term是否有全部命中
	if oflag != this.omitflag {
		// 这次归并得到的doc没有用,丢掉吧
		currValid = false
	} else {
		currValid = true
	}

	inId = currInID
	return
}