// 检索模式运行 func (this *Goose) searchModeRun() { log.Debug("run in search mode") if this.searchSty == nil { log.Error("Please set search strategy,see Goose.SetSearchStrategy()") return } if this.indexSty == nil { log.Warn("can't build index real time witout Index Strategy") } gooseSearch := NewGooseSearch() err := gooseSearch.Init(this.confPath, this.indexSty, this.searchSty) if err != nil { log.Error(err) return } log.Debug("goose search init succ") err = gooseSearch.Run() if err != nil { log.Error(err) return } }
func NewMergeEngine(db DataBaseReader, termList []TermInQuery) (*MergeEngine, error) { mg := MergeEngine{} if len(termList) >= GOOSE_MAX_QUERY_TERM { return nil, log.Warn("to much terms [%d]", len(termList)) } mg.omitflag = 0 mg.lstheap = &listMinHeap{} mg.termCount = len(termList) heap.Init(mg.lstheap) // 把全部拉链建成小顶堆 for i, e := range termList { var err error item := listMinHeapItem{} item.list, err = db.ReadIndex(e.Sign) if err != nil { log.Warn("read term[%d] : %s", e.Sign, err) item.list = nil } item.no = i item.pos = 0 item.sign = e.Sign if e.CanOmit { item.omit = 0 // 0表示可省 } else { item.omit = 1 << uint(i) // 不可省term } // 拉链有效才放入堆 if item.list != nil && item.list.Len() > 0 { heap.Push(mg.lstheap, item) } // 同时记下不可省term的标记 if e.CanOmit == false { mg.omitflag ^= 1 << uint(i) } log.Debug("term[%d] omit[%d] weight[%d] listLen[%d]", item.sign, item.omit, e.Weight, len(*item.list)) } log.Debug("termCnt[%d] omitflag[%d]", mg.termCount, mg.omitflag) return &mg, nil }
func (this *GooseSearch) Run() error { // read conf log.Debug("GooseSearch Run begin") searchGoroutineNum := this.conf.Int64("GooseSearch.Search.GoroutineNum") searchSvrPort := this.conf.Int64("GooseSearch.Search.ServerPort") indexSvrPort := this.conf.Int64("GooseSearch.Index.ServerPort") searchReqBufSize := this.conf.Int64("GooseSearch.Search.RequestBufferSize") searchResBufSize := this.conf.Int64("GooseSearch.Search.ResponseBufferSize") indexReqBufSize := this.conf.Int64("GooseSearch.Index.RequestBufferSize") //indexResBufSize := this.conf.GetInt("GooseSearch.Index.ResponseBufferSize") refreshSleepTime := this.conf.Int64("GooseSearch.Refresh.SleepTime") log.Debug("Read Conf searchGoroutineNum[%d] searchSvrPort[%d] "+ "indexSvrPort[%d] searchReqBufSize[%d] searchResBufSize[%d] "+ "indexReqBufSize[%d] refreshSleepTime[%d]", searchGoroutineNum, searchSvrPort, indexSvrPort, searchReqBufSize, searchResBufSize, indexReqBufSize, refreshSleepTime) err := this.runSearchServer(int(searchGoroutineNum), int(searchSvrPort), int(searchReqBufSize), int(searchResBufSize)) if err != nil { return err } err = this.runIndexServer(int(indexSvrPort), int(indexReqBufSize)) if err != nil { return err } err = this.runRefreshServer(int(refreshSleepTime)) if err != nil { return err } neverReturn := sync.WaitGroup{} neverReturn.Add(1) neverReturn.Wait() return nil }
// 程序入口,解析程序参数,启动[建库|检索]模式 func (this *Goose) Run() { defer func() { if r := recover(); r != nil { os.Exit(1) } }() // 解析命令行参数 var opts struct { // build mode BuildMode bool `short:"b" long:"build" description:"run in build mode"` // configure file Configure string `short:"c" long:"conf" description:"congfigure file" default:"conf/goose.toml"` // log configure file LogConf string `short:"l" long:"logconf" description:"log congfigure file" default:"conf/log.toml"` // build mode data file DataFile string `short:"d" long:"datafile" description:"build mode data file"` } parser := flags.NewParser(&opts, flags.HelpFlag) _, err := parser.ParseArgs(os.Args) if err != nil { fmt.Println(this.showLogo()) fmt.Println(err) os.Exit(1) } if opts.BuildMode && len(opts.DataFile) == 0 { fmt.Println(this.showLogo()) parser.WriteHelp(os.Stderr) os.Exit(1) } this.confPath = opts.Configure this.dataPath = opts.DataFile this.logConfPath = opts.LogConf // init log err = log.LoadConfiguration(this.logConfPath) if err != nil { fmt.Println(err) os.Exit(1) } log.Debug("Load log conf finish") // run if opts.BuildMode { this.buildModeRun() } else { this.searchModeRun() } // BUG(log4go) log4go need time to sync ...(wtf) // see http://stackoverflow.com/questions/14252766/abnormal-behavior-of-log4go time.Sleep(100 * time.Millisecond) }
func (this *GooseSearch) runRefreshServer(sleeptime int) error { if 0 == sleeptime { return log.Error("arg error sleeptime[%d]", sleeptime) } go func() { for { time.Sleep(time.Duration(sleeptime) * time.Second) log.Debug("refresh now") // sync search db err := this.searchDB.Sync() if err != nil { log.Warn(err) } } }() return nil }
func (this *GooseSearch) Init(confPath string, indexSty IndexStrategy, searchSty SearchStrategy) (err error) { defer func() { if r := recover(); r != nil { err = log.Error(r) } }() // load conf this.conf, err = config.NewConf(confPath) if err != nil { return } // set max procs maxProcs := int(this.conf.Int64("GooseSearch.MaxProcs")) if maxProcs <= 0 { maxProcs = runtime.NumCPU() } runtime.GOMAXPROCS(maxProcs) log.Debug("set max procs [%d]", maxProcs) // init dbsearcher dbPath := this.conf.String("GooseBuild.DataBase.DbPath") log.Debug("init db [%s]", dbPath) this.searchDB = NewDBSearcher() err = this.searchDB.Init(dbPath) if err != nil { return } log.Debug("init db [%s]", dbPath) // index strategy global init if indexSty != nil { err = indexSty.Init(this.conf) if err != nil { return } } log.Debug("index strategy init finish") // search strategy global init if searchSty != nil { err = searchSty.Init(this.conf) if err != nil { return } } log.Debug("search strategy init finish") // var indexer if indexSty != nil { this.varIndexer, err = NewVarIndexer(this.searchDB, indexSty) if err != nil { return } } log.Debug("VarIndexer init finish") // searcher if searchSty != nil { this.searcher, err = NewSearcher(this.searchDB, searchSty) if err != nil { return } } log.Debug("Searcher init finish") return }
func (this *MergeEngine) Next(termInDoclist []TermInDoc) (inId InIdType, currValid, allfinish bool) { if len(termInDoclist) != this.termCount { log.Warn("len(termInDoclist) != this.termCount") return 0, false, true } if this.lstheap.Len() == 0 { return 0, false, true } // 初始化 for i, _ := range termInDoclist { termInDoclist[i].Sign = 0 termInDoclist[i].Weight = 0 } oflag := 0 /* // 先看当前id最小的堆顶 item := this.lstheap.Pop().(listMinHeapItem) currInID := item.Curr().InID // 记下当前doc termInDoclist[ item.no ].Sign = item.sign termInDoclist[ item.no ].Weight = item.Curr().Weight oflag ^= item.omit */ top := this.lstheap.Top().(listMinHeapItem) currInID := top.Curr().InID currValid = true allfinish = false for this.lstheap.Len() > 0 { top := this.lstheap.Top().(listMinHeapItem) if top.Curr().InID != currInID { // 遇到新的doc了,就是归并完一个doc // 跳出去校验currInID的命中情况 break } // 堆里面还有相同的doc,先弹出 item := heap.Pop(this.lstheap).(listMinHeapItem) // 记下当前doc termInDoclist[item.no].Sign = item.sign termInDoclist[item.no].Weight = item.Curr().Weight oflag ^= item.omit // 如果拉链没遍历完,继续加入堆 if item.Next() { heap.Push(this.lstheap, item) } else { // 如果拉链遍历完,且这个拉链是不可省term // 处理完当前doc后后面不需要再归并了 if item.omit > 0 { allfinish = true log.Debug("not omit item travel end no[%d] pos[%d] list.len[%d]", item.no, item.pos, len(*item.list)) } } } // 检查不可省term是否有全部命中 if oflag != this.omitflag { // 这次归并得到的doc没有用,丢掉吧 currValid = false } else { currValid = true } inId = currInID return }