func (this *GooseSearch) runIndexServer(listenPort int, requestBufSize int) error { if 0 == listenPort || 0 == requestBufSize { return log.Error("arg error istenPort[%d] requestBufSize[%d]", listenPort, requestBufSize) } if this.varIndexer == nil { return nil } listener, err := net.Listen("tcp", fmt.Sprintf("localhost:%d", listenPort)) if err != nil { log.Error("runIndexServer listen fail : %s", err.Error()) return err } // 简单一个协程完成接受请求和完成处理.索引更新不要求高并发性. go func() { reqbuf := make([]byte, requestBufSize) for { var reqlen int conn, err := listener.Accept() if err != nil { log.Warn("IndexServer accept fail : %s", err.Error()) goto LabelError } // receive data reqlen, err = conn.Read(reqbuf) if err != nil { log.Warn("IndexSearcher read fail : %s", err.Error()) goto LabelError } // index err = this.varIndexer.BuildIndex(NewBufferIterOnce(reqbuf[:reqlen])) if err != nil { log.Warn("IndexSearcher BuildIndex fail : %s", err.Error()) goto LabelError } LabelError: conn.Close() } }() return nil }
func NewMergeEngine(db DataBaseReader, termList []TermInQuery) (*MergeEngine, error) { mg := MergeEngine{} if len(termList) >= GOOSE_MAX_QUERY_TERM { return nil, log.Warn("to much terms [%d]", len(termList)) } mg.omitflag = 0 mg.lstheap = &listMinHeap{} mg.termCount = len(termList) heap.Init(mg.lstheap) // 把全部拉链建成小顶堆 for i, e := range termList { var err error item := listMinHeapItem{} item.list, err = db.ReadIndex(e.Sign) if err != nil { log.Warn("read term[%d] : %s", e.Sign, err) item.list = nil } item.no = i item.pos = 0 item.sign = e.Sign if e.CanOmit { item.omit = 0 // 0表示可省 } else { item.omit = 1 << uint(i) // 不可省term } // 拉链有效才放入堆 if item.list != nil && item.list.Len() > 0 { heap.Push(mg.lstheap, item) } // 同时记下不可省term的标记 if e.CanOmit == false { mg.omitflag ^= 1 << uint(i) } log.Debug("term[%d] omit[%d] weight[%d] listLen[%d]", item.sign, item.omit, e.Weight, len(*item.list)) } log.Debug("termCnt[%d] omitflag[%d]", mg.termCount, mg.omitflag) return &mg, nil }
// 检索模式运行 func (this *Goose) searchModeRun() { log.Debug("run in search mode") if this.searchSty == nil { log.Error("Please set search strategy,see Goose.SetSearchStrategy()") return } if this.indexSty == nil { log.Warn("can't build index real time witout Index Strategy") } gooseSearch := NewGooseSearch() err := gooseSearch.Init(this.confPath, this.indexSty, this.searchSty) if err != nil { log.Error(err) return } log.Debug("goose search init succ") err = gooseSearch.Run() if err != nil { log.Error(err) return } }
// 分配内部id func (this *IdManager) AllocID(outId OutIdType) (InIdType, error) { this.lock.Lock() defer this.lock.Unlock() if outId == 0 { return 0, log.Warn("illegal outId [%d]", 0) } if this.idStatus.CurId >= this.idStatus.MaxInId { return 0, log.Error("InId [%d] out of limit MaxInId[%d]", this.idStatus.CurId, this.idStatus.MaxInId) } inID := this.idStatus.CurId // 分配信息,写入mmap offset := inID * idSize err := this.mfile.WriteNum(uint32(offset), uint32(outId)) if err != nil { return 0, err } // 确认分配成功才真正占用这个id this.idStatus.CurId++ return inID, nil }
// 打开已存在的大文件,如果不存在,直接返回错误 func (this *BigFile) Open(path string, name string) error { // 是打开已有数据文件状态 this.fileModel = bigFileModelOpen this.filePath = path this.fileName = name this.statFileFullPath = filepath.Join(this.filePath, fmt.Sprintf("%s%s", this.fileName, statFileSuffix)) // 解析获取文件信息 err := this.parseStatFile() if err != nil { return log.Warn(err) } // 检验状态文件 if this.bigfileStat.SuggestFileSize == 0 { return log.Error("BigFile.Open stat file error") } // 除了最后一个文件,其它以只读方式打开 readOnlyFileCnt := uint8(0) if this.bigfileStat.FileCnt > 0 { readOnlyFileCnt = this.bigfileStat.FileCnt - 1 } this.readOnlyFile = make([]*os.File, readOnlyFileCnt) for i := 0; uint8(i) < readOnlyFileCnt; i++ { f, err := this.openRoFile(uint8(i)) if err != nil { return err } this.readOnlyFile[i] = f // 校验这些只读文件的大小,他们肯定是大于等于配置才对 // TODO } // 最后一个文件已读写方式打开 if this.bigfileStat.FileCnt > 0 { err = this.openRwFile(this.bigfileStat.FileCnt - 1) if err != nil { return err } // 设置文件指针 this.readwriteFile.Seek(int64(this.bigfileStat.LastFileOffset), 0) // 最后一个文件的文件指针应该就是文件大小 sz, _ := FileSize(this.readwriteFile) if sz != int64(this.bigfileStat.LastFileOffset) { return log.Error("BigFile.Open", "FileStatInfo Error LastFileOffset:[%d] != FileSize:[%d]", this.bigfileStat.LastFileOffset, sz) } } else { this.readwriteFile = nil } return nil }
func (this *GooseSearch) runRefreshServer(sleeptime int) error { if 0 == sleeptime { return log.Error("arg error sleeptime[%d]", sleeptime) } go func() { for { time.Sleep(time.Duration(sleeptime) * time.Second) log.Debug("refresh now") // sync search db err := this.searchDB.Sync() if err != nil { log.Warn(err) } } }() return nil }
func (this *GooseSearch) runSearchServer(routineNum int, listenPort int, requestBufSize int, responseBufSize int) error { if 0 == routineNum || 0 == listenPort || 0 == requestBufSize || 0 == responseBufSize { return log.Error("arg error routineNum[%d] listenPort[%d] "+ "requestBufSize[%d] responseBufSize[%d]", routineNum, listenPort, requestBufSize, responseBufSize) } listener, err := net.Listen("tcp", fmt.Sprintf("localhost:%d", listenPort)) if err != nil { log.Error("runSearchServer listen fail : %s", err.Error()) return err } for i := 0; i < routineNum; i++ { go func() { reqbuf := make([]byte, requestBufSize) resbuf := make([]byte, responseBufSize) context := NewStyContext() for { var reqlen int var reslen int var t1, t2 int64 // clear buf context.Clear() conn, err := listener.Accept() if err != nil { log.Warn("SearchServer accept fail : %s", err.Error()) goto LabelError } context.Log.Info("IP", conn.RemoteAddr().String()) // receive data reqlen, err = conn.Read(reqbuf) if err != nil { log.Warn("SearchServer read fail : %s receive len[%d]", err.Error(), reqlen) goto LabelError } context.Log.Info("reqlen", reqlen) // do search t1 = time.Now().UnixNano() reslen, err = this.searcher.Search(context, reqbuf, resbuf) t2 = time.Now().UnixNano() if err != nil { log.Warn("SearchServer Search fail : %s", err.Error()) goto LabelError } context.Log.Info("time(ms)", Ns2Ms(t2-t1)) // write data _, err = conn.Write(resbuf[:reslen]) if err != nil { log.Warn("SearchServer conn write fail : %s", err.Error()) goto LabelError } LabelError: conn.Close() context.Log.PrintAllInfo() } }() } return nil }
func (this *MergeEngine) Next(termInDoclist []TermInDoc) (inId InIdType, currValid, allfinish bool) { if len(termInDoclist) != this.termCount { log.Warn("len(termInDoclist) != this.termCount") return 0, false, true } if this.lstheap.Len() == 0 { return 0, false, true } // 初始化 for i, _ := range termInDoclist { termInDoclist[i].Sign = 0 termInDoclist[i].Weight = 0 } oflag := 0 /* // 先看当前id最小的堆顶 item := this.lstheap.Pop().(listMinHeapItem) currInID := item.Curr().InID // 记下当前doc termInDoclist[ item.no ].Sign = item.sign termInDoclist[ item.no ].Weight = item.Curr().Weight oflag ^= item.omit */ top := this.lstheap.Top().(listMinHeapItem) currInID := top.Curr().InID currValid = true allfinish = false for this.lstheap.Len() > 0 { top := this.lstheap.Top().(listMinHeapItem) if top.Curr().InID != currInID { // 遇到新的doc了,就是归并完一个doc // 跳出去校验currInID的命中情况 break } // 堆里面还有相同的doc,先弹出 item := heap.Pop(this.lstheap).(listMinHeapItem) // 记下当前doc termInDoclist[item.no].Sign = item.sign termInDoclist[item.no].Weight = item.Curr().Weight oflag ^= item.omit // 如果拉链没遍历完,继续加入堆 if item.Next() { heap.Push(this.lstheap, item) } else { // 如果拉链遍历完,且这个拉链是不可省term // 处理完当前doc后后面不需要再归并了 if item.omit > 0 { allfinish = true log.Debug("not omit item travel end no[%d] pos[%d] list.len[%d]", item.no, item.pos, len(*item.list)) } } } // 检查不可省term是否有全部命中 if oflag != this.omitflag { // 这次归并得到的doc没有用,丢掉吧 currValid = false } else { currValid = true } inId = currInID return }