// Build implements defIndexBuilder. func (x *defQueryIndex) Build(defs []*graph.Def, ofs byteOffsets) (err error) { vlog.Printf("defQueryIndex: building index... (%d defs)", len(defs)) defer func() { if r := recover(); r != nil { err = fmt.Errorf("panic in defQueryIndex.Build (%d defs): %v", len(defs), err) } }() // Clone slice so we can sort it by whatever we want. dofs := make([]*defLowerNameAndOffset, 0, len(defs)) for i, def := range defs { if x.f.SelectDef(def) && !hasNonASCIIChars(def.Name) { // See https://github.com/smartystreets/mafsa/issues/1 for // why we need to kick out non-ASCII. dofs = append(dofs, &defLowerNameAndOffset{strings.ToLower(def.Name), ofs[i]}) } } if len(dofs) == 0 { x.mt = &mafsaTable{} x.ready = true return nil } sort.Sort(defsByLowerName(dofs)) vlog.Printf("defQueryIndex: done sorting by def name (%d defs).", len(defs)) bt := mafsa.New() x.mt = &mafsaTable{} x.mt.Values = make([]byteOffsets, 0, len(dofs)) j := 0 // index of earliest def with same name for i, def := range dofs { if i > 0 && dofs[j].lowerName == def.lowerName { x.mt.Values[len(x.mt.Values)-1] = append(x.mt.Values[len(x.mt.Values)-1], def.ofs) } else { bt.Insert(def.lowerName) x.mt.Values = append(x.mt.Values, byteOffsets{def.ofs}) j = i } } bt.Finish() vlog.Printf("defQueryIndex: done adding %d defs to MAFSA & table and minimizing.", len(defs)) b, err := bt.MarshalBinary() if err != nil { return err } vlog.Printf("defQueryIndex: done serializing MAFSA & table to %d bytes.", len(b)) x.mt.B = b x.mt.t, err = new(mafsa.Decoder).Decode(x.mt.B) if err != nil { return err } x.ready = true vlog.Printf("defQueryIndex: done building index (%d defs).", len(defs)) return nil }
// Build implements defQueryTreeIndexBuilder. func (x *defQueryTreeIndex) Build(xs map[unit.ID2]*defQueryIndex) (err error) { vlog.Printf("defQueryTreeIndex: building index... (%d unit indexes)", len(xs)) defer func() { if r := recover(); r != nil { err = fmt.Errorf("panic in defQueryTreeIndex.Build (%d unit indexes): %v", len(xs), err) } }() units := make([]unit.ID2, 0, len(xs)) for u := range xs { units = append(units, u) } sort.Sort(unitID2s(units)) const maxUnits = math.MaxUint8 if len(units) > maxUnits { log.Printf("Warning: the def query index supports a maximum of %d source units in a tree, but this tree has %d. Source units that exceed the limit will not be indexed for def queries.", maxUnits, len(units)) units = units[:maxUnits] } unitNums := make(map[unit.ID2]uint8, len(units)) for _, u := range units { unitNums[u] = uint8(len(unitNums)) } termToUOffs := make(map[string][]unitOffsets) var traverse func(term string, unit uint8, node *mafsa.MinTreeNode) for u, qx := range xs { i := 0 traverse = func(term string, unit uint8, node *mafsa.MinTreeNode) { if node == nil { return } if node.Final { uoffs := unitOffsets{Unit: unit, byteOffsets: qx.mt.Values[i]} termToUOffs[term] = append(termToUOffs[term], uoffs) i++ } for _, e := range node.OrderedEdges() { traverse(term+string([]rune{e}), unit, node.Edges[e]) } } if qx.mt.t != nil { if _, present := unitNums[u]; !present { // Skip unit - it is the 256th or above unit (and we // store that index in a uint8 now :( ). continue } traverse("", unitNums[u], qx.mt.t.Root) } } vlog.Printf("defQueryTreeIndex: done traversing unit indexes.") terms := make([]string, 0, len(termToUOffs)) for term := range termToUOffs { terms = append(terms, term) } sort.Strings(terms) if len(terms) == 0 { x.mt = &mafsaUnitTable{} x.ready = true return nil } bt := mafsa.New() x.mt = &mafsaUnitTable{} x.mt.Values = make([][]unitOffsets, len(terms)) for i, term := range terms { bt.Insert(term) x.mt.Values[i] = termToUOffs[term] } bt.Finish() vlog.Printf("defQueryTreeIndex: done adding %d terms to MAFSA & table and minimizing.", len(terms)) b, err := bt.MarshalBinary() if err != nil { return err } vlog.Printf("defQueryTreeIndex: done serializing MAFSA & table to %d bytes.", len(b)) x.mt.B = b x.mt.Units = units x.mt.t, err = new(mafsa.Decoder).Decode(x.mt.B) if err != nil { return err } x.ready = true vlog.Printf("defQueryTreeIndex: done building index (%d terms).", len(terms)) return nil }