예제 #1
0
// Build implements defIndexBuilder.
func (x *defQueryIndex) Build(defs []*graph.Def, ofs byteOffsets) (err error) {
	x.Lock()
	defer x.Unlock()
	vlog.Printf("defQueryIndex: building index... (%d defs)", len(defs))

	defer func() {
		if r := recover(); r != nil {
			err = fmt.Errorf("panic in defQueryIndex.Build (%d defs): %v", len(defs), err)
		}
	}()

	// Clone slice so we can sort it by whatever we want.
	dofs := make([]*defLowerNameAndOffset, 0, len(defs))
	for i, def := range defs {
		if x.f.SelectDef(def) && !hasNonASCIIChars(def.Name) {
			// See https://github.com/smartystreets/mafsa/issues/1 for
			// why we need to kick out non-ASCII.

			dofs = append(dofs, &defLowerNameAndOffset{strings.ToLower(def.Name), ofs[i]})
		}
	}
	if len(dofs) == 0 {
		x.mt = &mafsaTable{}
		x.ready = true
		return nil
	}
	sort.Sort(defsByLowerName(dofs))
	vlog.Printf("defQueryIndex: done sorting by def name (%d defs).", len(defs))

	bt := mafsa.New()
	x.mt = &mafsaTable{}
	x.mt.Values = make([]byteOffsets, 0, len(dofs))
	j := 0 // index of earliest def with same name
	for i, def := range dofs {
		if i > 0 && dofs[j].lowerName == def.lowerName {
			x.mt.Values[len(x.mt.Values)-1] = append(x.mt.Values[len(x.mt.Values)-1], def.ofs)
		} else {
			bt.Insert(def.lowerName)
			x.mt.Values = append(x.mt.Values, byteOffsets{def.ofs})
			j = i
		}
	}
	bt.Finish()
	vlog.Printf("defQueryIndex: done adding %d defs to MAFSA & table and minimizing.", len(defs))

	b, err := bt.MarshalBinary()
	if err != nil {
		return err
	}
	vlog.Printf("defQueryIndex: done serializing MAFSA & table to %d bytes.", len(b))

	x.mt.B = b
	x.mt.t, err = new(mafsa.Decoder).Decode(x.mt.B)
	if err != nil {
		return err
	}
	x.ready = true
	vlog.Printf("defQueryIndex: done building index (%d defs).", len(defs))
	return nil
}
예제 #2
0
// Build implements defQueryTreeIndexBuilder.
func (x *defQueryTreeIndex) Build(xs map[unit.ID2]*defQueryIndex) (err error) {
	x.Lock()
	defer x.Unlock()
	vlog.Printf("defQueryTreeIndex: building index... (%d unit indexes)", len(xs))

	defer func() {
		if r := recover(); r != nil {
			err = fmt.Errorf("panic in defQueryTreeIndex.Build (%d unit indexes): %v", len(xs), err)
		}
	}()

	units := make([]unit.ID2, 0, len(xs))
	for u := range xs {
		units = append(units, u)
	}
	sort.Sort(unitID2s(units))

	const maxUnits = math.MaxUint16
	if len(units) > maxUnits {
		log.Printf("Warning: the def query index supports a maximum of %d source units in a tree, but this tree has %d. Source units that exceed the limit will not be indexed for def queries.", maxUnits, len(units))
		units = units[:maxUnits]
	}

	unitNums := make(map[unit.ID2]uint16, len(units))
	for _, u := range units {
		unitNums[u] = uint16(len(unitNums))
	}

	termToUOffs := make(map[string][]unitOffsets)

	var traverse func(term string, unit uint16, node *mafsa.MinTreeNode)
	for u, qx := range xs {
		i := 0
		traverse = func(term string, unit uint16, node *mafsa.MinTreeNode) {
			if node == nil {
				return
			}
			if node.Final {
				uoffs := unitOffsets{Unit: unit, byteOffsets: qx.mt.Values[i]}
				termToUOffs[term] = append(termToUOffs[term], uoffs)
				i++
			}
			for _, e := range node.OrderedEdges() {
				traverse(term+string([]rune{e}), unit, node.Edges[e])
			}
		}
		if qx.mt.t != nil {
			if _, present := unitNums[u]; !present {
				// Skip unit - it is the 65536th or above unit (and we
				// store that index in a uint16 now :( ).
				continue
			}
			traverse("", unitNums[u], qx.mt.t.Root)
		}
	}
	vlog.Printf("defQueryTreeIndex: done traversing unit indexes.")

	terms := make([]string, 0, len(termToUOffs))
	for term := range termToUOffs {
		terms = append(terms, term)
	}
	sort.Strings(terms)

	if len(terms) == 0 {
		x.mt = &mafsaUnitTable{}
		x.ready = true
		return nil
	}

	bt := mafsa.New()
	x.mt = &mafsaUnitTable{}
	x.mt.Values = make([][]unitOffsets, len(terms))
	for i, term := range terms {
		bt.Insert(term)
		x.mt.Values[i] = termToUOffs[term]
	}
	bt.Finish()
	vlog.Printf("defQueryTreeIndex: done adding %d terms to MAFSA & table and minimizing.", len(terms))

	b, err := bt.MarshalBinary()
	if err != nil {
		return err
	}
	vlog.Printf("defQueryTreeIndex: done serializing MAFSA & table to %d bytes.", len(b))

	x.mt.B = b
	x.mt.Units = units
	x.mt.t, err = new(mafsa.Decoder).Decode(x.mt.B)
	if err != nil {
		return err
	}
	x.ready = true
	vlog.Printf("defQueryTreeIndex: done building index (%d terms).", len(terms))
	return nil
}