Ejemplo n.º 1
0
func (this *GooseSearch) runIndexServer(listenPort int, requestBufSize int) error {

	if 0 == listenPort || 0 == requestBufSize {
		return log.Error("arg error istenPort[%d] requestBufSize[%d]",
			listenPort, requestBufSize)
	}

	if this.varIndexer == nil {
		return nil
	}

	listener, err := net.Listen("tcp", fmt.Sprintf("localhost:%d", listenPort))
	if err != nil {
		log.Error("runIndexServer listen fail : %s", err.Error())
		return err
	}

	// 简单一个协程完成接受请求和完成处理.索引更新不要求高并发性.
	go func() {
		reqbuf := make([]byte, requestBufSize)
		for {
			var reqlen int

			conn, err := listener.Accept()
			if err != nil {
				log.Warn("IndexServer accept fail : %s", err.Error())
				goto LabelError
			}

			// receive data
			reqlen, err = conn.Read(reqbuf)
			if err != nil {
				log.Warn("IndexSearcher read fail : %s", err.Error())
				goto LabelError
			}

			// index
			err = this.varIndexer.BuildIndex(NewBufferIterOnce(reqbuf[:reqlen]))
			if err != nil {
				log.Warn("IndexSearcher BuildIndex fail : %s", err.Error())
				goto LabelError
			}

		LabelError:
			conn.Close()
		}
	}()

	return nil
}
Ejemplo n.º 2
0
func NewMergeEngine(db DataBaseReader, termList []TermInQuery) (*MergeEngine, error) {
	mg := MergeEngine{}
	if len(termList) >= GOOSE_MAX_QUERY_TERM {
		return nil, log.Warn("to much terms [%d]", len(termList))
	}

	mg.omitflag = 0
	mg.lstheap = &listMinHeap{}
	mg.termCount = len(termList)
	heap.Init(mg.lstheap)

	// 把全部拉链建成小顶堆
	for i, e := range termList {
		var err error
		item := listMinHeapItem{}

		item.list, err = db.ReadIndex(e.Sign)
		if err != nil {
			log.Warn("read term[%d] : %s", e.Sign, err)
			item.list = nil
		}
		item.no = i
		item.pos = 0
		item.sign = e.Sign
		if e.CanOmit {
			item.omit = 0 // 0表示可省
		} else {
			item.omit = 1 << uint(i) // 不可省term
		}

		// 拉链有效才放入堆
		if item.list != nil && item.list.Len() > 0 {
			heap.Push(mg.lstheap, item)
		}

		// 同时记下不可省term的标记
		if e.CanOmit == false {
			mg.omitflag ^= 1 << uint(i)
		}

		log.Debug("term[%d] omit[%d] weight[%d] listLen[%d]", item.sign,
			item.omit, e.Weight, len(*item.list))
	}

	log.Debug("termCnt[%d] omitflag[%d]", mg.termCount, mg.omitflag)

	return &mg, nil
}
Ejemplo n.º 3
0
// 检索模式运行
func (this *Goose) searchModeRun() {

	log.Debug("run in search mode")

	if this.searchSty == nil {
		log.Error("Please set search strategy,see Goose.SetSearchStrategy()")
		return
	}

	if this.indexSty == nil {
		log.Warn("can't build index real time witout Index Strategy")
	}

	gooseSearch := NewGooseSearch()
	err := gooseSearch.Init(this.confPath, this.indexSty, this.searchSty)
	if err != nil {
		log.Error(err)
		return
	}

	log.Debug("goose search init succ")

	err = gooseSearch.Run()
	if err != nil {
		log.Error(err)
		return
	}
}
Ejemplo n.º 4
0
// 分配内部id
func (this *IdManager) AllocID(outId OutIdType) (InIdType, error) {
	this.lock.Lock()
	defer this.lock.Unlock()

	if outId == 0 {
		return 0, log.Warn("illegal outId [%d]", 0)
	}

	if this.idStatus.CurId >= this.idStatus.MaxInId {
		return 0, log.Error("InId [%d] out of limit MaxInId[%d]", this.idStatus.CurId, this.idStatus.MaxInId)
	}

	inID := this.idStatus.CurId

	// 分配信息,写入mmap
	offset := inID * idSize
	err := this.mfile.WriteNum(uint32(offset), uint32(outId))
	if err != nil {
		return 0, err
	}

	// 确认分配成功才真正占用这个id
	this.idStatus.CurId++

	return inID, nil
}
Ejemplo n.º 5
0
// 打开已存在的大文件,如果不存在,直接返回错误
func (this *BigFile) Open(path string, name string) error {

	// 是打开已有数据文件状态
	this.fileModel = bigFileModelOpen

	this.filePath = path
	this.fileName = name

	this.statFileFullPath = filepath.Join(this.filePath,
		fmt.Sprintf("%s%s", this.fileName, statFileSuffix))

	// 解析获取文件信息
	err := this.parseStatFile()
	if err != nil {
		return log.Warn(err)
	}
	// 检验状态文件
	if this.bigfileStat.SuggestFileSize == 0 {
		return log.Error("BigFile.Open stat file error")
	}

	// 除了最后一个文件,其它以只读方式打开
	readOnlyFileCnt := uint8(0)
	if this.bigfileStat.FileCnt > 0 {
		readOnlyFileCnt = this.bigfileStat.FileCnt - 1
	}
	this.readOnlyFile = make([]*os.File, readOnlyFileCnt)
	for i := 0; uint8(i) < readOnlyFileCnt; i++ {
		f, err := this.openRoFile(uint8(i))
		if err != nil {
			return err
		}
		this.readOnlyFile[i] = f
		// 校验这些只读文件的大小,他们肯定是大于等于配置才对
		// TODO
	}

	// 最后一个文件已读写方式打开
	if this.bigfileStat.FileCnt > 0 {
		err = this.openRwFile(this.bigfileStat.FileCnt - 1)
		if err != nil {
			return err
		}
		// 设置文件指针
		this.readwriteFile.Seek(int64(this.bigfileStat.LastFileOffset), 0)

		// 最后一个文件的文件指针应该就是文件大小
		sz, _ := FileSize(this.readwriteFile)
		if sz != int64(this.bigfileStat.LastFileOffset) {
			return log.Error("BigFile.Open", "FileStatInfo Error LastFileOffset:[%d] != FileSize:[%d]",
				this.bigfileStat.LastFileOffset, sz)
		}
	} else {
		this.readwriteFile = nil
	}

	return nil
}
Ejemplo n.º 6
0
func (this *GooseSearch) runRefreshServer(sleeptime int) error {

	if 0 == sleeptime {
		return log.Error("arg error sleeptime[%d]", sleeptime)
	}

	go func() {
		for {
			time.Sleep(time.Duration(sleeptime) * time.Second)
			log.Debug("refresh now")

			// sync search db
			err := this.searchDB.Sync()
			if err != nil {
				log.Warn(err)
			}
		}
	}()

	return nil
}
Ejemplo n.º 7
0
func (this *GooseSearch) runSearchServer(routineNum int, listenPort int,
	requestBufSize int, responseBufSize int) error {

	if 0 == routineNum || 0 == listenPort || 0 == requestBufSize || 0 == responseBufSize {
		return log.Error("arg error routineNum[%d] listenPort[%d] "+
			"requestBufSize[%d] responseBufSize[%d]", routineNum, listenPort,
			requestBufSize, responseBufSize)
	}

	listener, err := net.Listen("tcp", fmt.Sprintf("localhost:%d", listenPort))
	if err != nil {
		log.Error("runSearchServer listen fail : %s", err.Error())
		return err
	}

	for i := 0; i < routineNum; i++ {
		go func() {
			reqbuf := make([]byte, requestBufSize)
			resbuf := make([]byte, responseBufSize)
			context := NewStyContext()

			for {

				var reqlen int
				var reslen int
				var t1, t2 int64
				// clear buf
				context.Clear()

				conn, err := listener.Accept()
				if err != nil {
					log.Warn("SearchServer accept fail : %s", err.Error())
					goto LabelError
				}
				context.Log.Info("IP", conn.RemoteAddr().String())
				// receive data
				reqlen, err = conn.Read(reqbuf)
				if err != nil {
					log.Warn("SearchServer read fail : %s receive len[%d]", err.Error(), reqlen)
					goto LabelError
				}
				context.Log.Info("reqlen", reqlen)

				// do search
				t1 = time.Now().UnixNano()
				reslen, err = this.searcher.Search(context, reqbuf, resbuf)
				t2 = time.Now().UnixNano()
				if err != nil {
					log.Warn("SearchServer Search fail : %s", err.Error())
					goto LabelError
				}
				context.Log.Info("time(ms)", Ns2Ms(t2-t1))

				// write data
				_, err = conn.Write(resbuf[:reslen])
				if err != nil {
					log.Warn("SearchServer conn write fail : %s", err.Error())
					goto LabelError
				}

			LabelError:
				conn.Close()
				context.Log.PrintAllInfo()
			}
		}()
	}
	return nil
}
Ejemplo n.º 8
0
func (this *MergeEngine) Next(termInDoclist []TermInDoc) (inId InIdType, currValid, allfinish bool) {

	if len(termInDoclist) != this.termCount {
		log.Warn("len(termInDoclist) != this.termCount")
		return 0, false, true
	}

	if this.lstheap.Len() == 0 {
		return 0, false, true
	}

	// 初始化
	for i, _ := range termInDoclist {
		termInDoclist[i].Sign = 0
		termInDoclist[i].Weight = 0
	}
	oflag := 0

	/*
	   // 先看当前id最小的堆顶
	   item := this.lstheap.Pop().(listMinHeapItem)
	   currInID := item.Curr().InID

	   // 记下当前doc
	   termInDoclist[ item.no ].Sign = item.sign
	   termInDoclist[ item.no ].Weight = item.Curr().Weight
	   oflag ^= item.omit
	*/

	top := this.lstheap.Top().(listMinHeapItem)
	currInID := top.Curr().InID

	currValid = true
	allfinish = false

	for this.lstheap.Len() > 0 {
		top := this.lstheap.Top().(listMinHeapItem)

		if top.Curr().InID != currInID {
			// 遇到新的doc了,就是归并完一个doc
			// 跳出去校验currInID的命中情况
			break
		}

		// 堆里面还有相同的doc,先弹出
		item := heap.Pop(this.lstheap).(listMinHeapItem)

		// 记下当前doc
		termInDoclist[item.no].Sign = item.sign
		termInDoclist[item.no].Weight = item.Curr().Weight
		oflag ^= item.omit

		// 如果拉链没遍历完,继续加入堆
		if item.Next() {
			heap.Push(this.lstheap, item)
		} else {
			// 如果拉链遍历完,且这个拉链是不可省term
			// 处理完当前doc后后面不需要再归并了
			if item.omit > 0 {
				allfinish = true
				log.Debug("not omit item travel end no[%d] pos[%d] list.len[%d]",
					item.no, item.pos, len(*item.list))
			}
		}
	}

	// 检查不可省term是否有全部命中
	if oflag != this.omitflag {
		// 这次归并得到的doc没有用,丢掉吧
		currValid = false
	} else {
		currValid = true
	}

	inId = currInID
	return
}