// This will need to be a bit more generic, // but for testing this will work fine. func (search *Search) GetSearcher() IndexReader { idxLocation := cb_newf(search.Location) search.lucySearcher = C.LucyIxSearcherNew(idxLocation) C.DECREF(idxLocation) return func(q string, field string, offset, limit uint) (uint, []string) { query := cb_new_from_utf8(q) getField := cb_newf(field) hits := C.LucyIxSearcherHits(search.lucySearcher, query, C.uint32_t(offset), C.uint32_t(limit), nil) totalNumHits := uint(C.LucyHitsTotal(hits)) requestedNumHits := minUInt(limit, totalNumHits) results := make([]string, requestedNumHits) var hit *C.LucyHitDoc for i := uint(0); i < requestedNumHits; i++ { hit = C.LucyHitsNext(hits) if hit == nil { break } value_cb := C.LucyHitDocExtract(hit, getField, nil) // do i need to free this value := cb_ptr2char(value_cb) // do i need to free this results[i] = C.GoString(value) C.DECREF(hit) } C.DECREF(query) C.DECREF(getField) C.DECREF(hits) return totalNumHits, results } }
func NewAnalyzer(language string, stemTerms bool) *Analyzer { lang := cb_newf(language) defer C.DECREF((*C.cfish_Obj)(lang)) // non-stemming analyzer still does case-folding (normalizing) and tokenizing var analyzer *Analyzer if stemTerms { // see https://lucy.apache.org/docs/test/Lucy/Docs/Tutorial/Analysis.html analyzer = &Analyzer{Language: language, lucyAnalyzer: (*C.lucy_Analyzer)(C.LucyEasyAnalyzerNew(lang))} } else { tokenizer := C.LucyStandardTokenizerNew() normalizer := C.LucyNormalizerNew(nil, (C.bool)(true), (C.bool)(false)) analyzers := C.CFishVArrayNew((C.uint32_t)(2)) //defer C.DECREF(tokenizer) get a segfault if i do this.. //defer C.DECREF(normalizer) get a segfault if i do this.. defer C.DECREF((*C.cfish_Obj)(analyzers)) // this works, however // have to push the tokenizer before the normalizer - otherwise // bad bad bad bad bad bad things will happen. C.CFishVArrayPush(analyzers, (*C.cfish_Obj)(tokenizer)) C.CFishVArrayPush(analyzers, (*C.cfish_Obj)(normalizer)) analyzer = &Analyzer{Language: language, lucyAnalyzer: (*C.lucy_Analyzer)(C.LucyPolyAnalyzerNew(lang, analyzers))} } runtime.SetFinalizer(analyzer, freeAnalyzer) return analyzer }
func (schema *Schema) createLucySchema() { lucySchema := C.LucySchemaNew() for _, item := range schema.PlanItems { var specType *C.CFishObj if item.Type == FullTextType { var language *C.CFishCharBuf if item.Options != nil && item.Options.Language != "" { language = cb_newf(item.Options.Language) } else { language = cb_newf("en") } analyzer := C.LucyEasyAnalyzerNew(language) specType = C.LucyFullTextTypeNew(analyzer) // TODO: come up with a better way to handle options. // This isn't very friendly. if item.Options != nil { specType = C.LucyFullTextTypeInitOptions(specType, analyzer, (C.float)(item.Options.Boost), (C.bool)(item.Options.Indexed), (C.bool)(item.Options.Stored), (C.bool)(item.Options.Sortable), (C.bool)(item.Options.Highlightable), ) } C.DECREF(language) C.DECREF(analyzer) } else if item.Type == StringType { specType = C.LucyStringTypeNew() if item.Options != nil { specType = C.LucyStringTypeInitOptions(specType, (C.float)(item.Options.Boost), (C.bool)(item.Options.Indexed), (C.bool)(item.Options.Stored), (C.bool)(item.Options.Sortable), ) } } else if item.Type == BlobType { isStored := (C.bool)(false) if item.Options != nil && item.Options.Stored { isStored = (C.bool)(true) } specType = C.LucyBlobTypeNew(isStored) // need to send []cfish_byte castable value panic("BlobType not supported yet") } else { panic("Type not supported yet") } fieldName := cb_newf(item.Field) C.LucySchemaSpecField(lucySchema, fieldName, specType) C.DECREF(fieldName) C.DECREF(specType) } schema.lucySchema = lucySchema }
func (ixWriter *IndexWriter) AddDoc(doc Document) { lDoc := C.LucyDocNew(nil, 0) // Are these sane defaults? for k, v := range doc { name := cb_newf(k) value := cb_new_from_utf8(v) C.LucyDocStore(lDoc, name, value) C.DECREF(name) C.DECREF(value) } C.LucyIndexerAddDoc(ixWriter.lucyIndexer, lDoc, 1.0) // Is 1.0 a sane default? C.DECREF(lDoc) }
// encodeTuple translates a Go array to a Python object. func encodeTuple(array []interface{}) (pyTuple *C.PyObject, err error) { if len(array) == 0 { pyTuple = pyEmptyTuple C.INCREF(pyTuple) } else { pyTuple = C.PyTuple_New(C.Py_ssize_t(len(array))) var ok bool defer func() { if !ok { C.DECREF(pyTuple) pyTuple = nil } }() for i, item := range array { var pyItem *C.PyObject if pyItem, err = encode(item); err != nil { return } C.Tuple_SET_ITEM(pyTuple, C.Py_ssize_t(i), pyItem) } ok = true } return }
func (ixWriter *IndexWriter) Close() { // Should this be here or in Commit? if ixWriter.lucyIndexer != nil { C.DECREF(ixWriter.lucyIndexer) ixWriter.lucyIndexer = nil } }
// getError translates the current Python exception to a Go error, and clears // the Python exception state. func getError() error { var ( pyType *C.PyObject pyValue *C.PyObject pyTrace *C.PyObject ) C.PyErr_Fetch(&pyType, &pyValue, &pyTrace) defer C.DECREF(pyType) defer C.DECREF(pyValue) defer xDECREF(pyTrace) C.PyErr_Clear() return fmt.Errorf("Python: %s", stringify(pyValue)) }
func (schema *Schema) AddField(field *Field) { schema.Fields = append(schema.Fields, field) var specType *C.lucy_FieldType defer C.DECREF((*C.cfish_Obj)(specType)) name := cb_newf(field.Name) defer C.DECREF((*C.cfish_Obj)(name)) switch field.IndexType { case FullTextType: specType = fullTextSpecType(field) case StringType: specType = stringSpecType(field) default: panic("Specified IndexType not supported yet") } C.LucySchemaSpecField(schema.lucySchema, name, specType) }
func (index *Index) GetIndexWriter(flags IndexOpenFlags) IndexWriter { idxLocation := cb_newf(index.Location) index.lucyIndexer = C.LucyIndexerNew(index.Schema.lucySchema, idxLocation, nil, C.int32_t(flags)) C.DECREF(idxLocation) return func(ixValueColumns ...*IndexValueColumn) { doc := C.LucyDocNew(nil, 0) for _, ixColumn := range ixValueColumns { fieldName := cb_newf(ixColumn.Field) fieldValue := cb_new_from_utf8(ixColumn.Value) C.LucyDocStore(doc, fieldName, fieldValue) C.DECREF(fieldName) C.DECREF(fieldValue) } C.LucyIndexerAddDoc(index.lucyIndexer, doc, 1.0) C.DECREF(doc) } }
func call(pyObject *C.PyObject, name string, args []interface{}) (pyType C.int, pyResult *C.PyObject, err error) { pyMember, err := getAttr(pyObject, name) if err != nil { return } defer C.DECREF(pyMember) return invoke(pyMember, args) }
func encodeDictItem(pyDict *C.PyObject, key, value interface{}) (err error) { pyKey, err := encode(key) if err != nil { return } defer C.DECREF(pyKey) pyValue, err := encode(value) if err != nil { return } defer C.DECREF(pyValue) if C.PyDict_SetItem(pyDict, pyKey, pyValue) < 0 { err = getError() } return }
func (ixReader *IndexReader) Search(query *Query, offset, limit uint, idField string, contentField string, includeMatchedTerms bool) (uint, []*SearchResult) { // Should probably have some sort // of `Results` object/iterator so that we don't have to specify // offset/limit and where I can attach matched terms to the result. lIdField, lContentField := cb_newf(idField), cb_newf(contentField) // total hack, need to return more than one field defer C.DECREF(lIdField) defer C.DECREF(lContentField) hits := C.LucyIxSearcherHits(ixReader.lucySearcher, query.lucyQuery, C.uint32_t(offset), C.uint32_t(limit), nil) defer C.DECREF(hits) totalNumHits := uint(C.LucyHitsTotal(hits)) num2Return := minUInt(limit, totalNumHits) results := make([]*SearchResult, num2Return) var hit *C.LucyHitDoc compiler := C.LucyQueryMakeCompiler(query.lucyQuery, ixReader.lucySearcher, 1.0, false) defer C.DECREF(compiler) matchedTerms := func(docId C.int32_t, result *SearchResult) { docVec := C.LucyIxSearchFetchDocVec(ixReader.lucySearcher, docId) defer C.DECREF(docVec) spans := C.LucyCompilerHighlightSpans(compiler, ixReader.lucySearcher, docVec, lContentField) defer C.DECREF(spans) spanCnt := C.VaGetSize(spans) if spanCnt == 0 { // should never get here, but just in case... return } result.MatchedTerms = make([]string, spanCnt) var i C.uint32_t for i = 0; i < spanCnt; i++ { span := C.VaFetch(spans, i) offset := C.LucySpanGetOffset(span) length := C.LucySpanGetLength(span) result.MatchedTerms[i] = string([]rune(result.Text)[offset : offset+length]) } // make terms unique? result.MatchedTerms = set(result.MatchedTerms) } var i uint for i = 0; i < num2Return; i++ { hit = C.LucyHitsNext(hits) if hit == nil { break } docId := C.LucyHitDocGetDocId(hit) contentValue := cb_ptr2char(C.LucyHitDocExtract(hit, lContentField, nil)) // do i need to free this idValue := cb_ptr2char(C.LucyHitDocExtract(hit, lIdField, nil)) // do i need to free this results[i] = &SearchResult{ Id: C.GoString(idValue), Text: C.GoString(contentValue), Score: float32(C.LucyHitDocGetScore(hit)), } if includeMatchedTerms { matchedTerms(docId, results[i]) } C.DECREF(hit) } return totalNumHits, results }
func (index *Index) NewIndexReader() *IndexReader { ixLocation := cb_newf(index.Path) defer C.DECREF(ixLocation) ixReader := &IndexReader{ Index: index, lucySearcher: C.LucyIxSearcherNew(ixLocation), } runtime.SetFinalizer(ixReader, freeIndexReader) return ixReader }
func stringify(pyObject *C.PyObject) (s string) { if pyResult := C.PyObject_Str(pyObject); pyResult != nil { defer C.DECREF(pyResult) if cString := C.PyString_AsString(pyResult); cString != nil { s = C.GoString(cString) } } C.PyErr_Clear() return }
func (ixReader *IndexReader) ParseQuery(queryStr string, stemTerms bool) *Query { lucySchema := C.LucyIxSearcherGetSchema(ixReader.lucySearcher) language := cb_newf("en") // should be configurable defer C.DECREF(language) var analyzer *C.LucyAnalyzer if stemTerms { analyzer = C.LucyEasyAnalyzerNew(language) } else { // this seems rather verbose for just creating an analyzer.. tokenizer := C.LucyStandardTokenizerNew() normalizer := C.LucyNormalizerNew(nil, (C.bool)(true), (C.bool)(false)) analyzers := C.CFishVArrayNew((C.uint32_t)(2)) //defer C.DECREF(tokenizer) get a segfault if i do this.. //defer C.DECREF(normalizer) get a segfault if i do this.. defer C.DECREF(analyzers) // this works, however C.CFishVArrayPush(analyzers, tokenizer) C.CFishVArrayPush(analyzers, normalizer) analyzer = C.LucyPolyAnalyzerNew(language, analyzers) } defer C.DECREF(analyzer) qp := C.LucyQParserNew( lucySchema, analyzer, //should this be configurable? cb_newf("AND"), // should be configurable C.LucySchemaAllFields(lucySchema), // should be configurable ) defer C.DECREF(qp) qs := cb_new_from_utf8(queryStr) defer C.DECREF(qs) query := &Query{ QueryStr: queryStr, lucyQuery: C.LucyQParserParse(qp, qs), } runtime.SetFinalizer(query, freeQuery) return query }
func (o *object) Item(t *Thread, i int) (item Object, err error) { t.execute(func() { pyItem := C.PySequence_GetItem(o.pyObject, C.Py_ssize_t(i)) if pyItem == nil { err = getError() return } defer C.DECREF(pyItem) item = newObject(pyItem) }) return }
func (o *object) ItemValue(t *Thread, i int) (item interface{}, err error) { t.execute(func() { pyItem := C.PySequence_GetItem(o.pyObject, C.Py_ssize_t(i)) if pyItem == nil { err = getError() return } defer C.DECREF(pyItem) item, err = decode(pyItem) }) return }
func (o *object) AttrValue(t *Thread, name string) (attr interface{}, err error) { t.execute(func() { var pyAttr *C.PyObject pyAttr, err = getAttr(o.pyObject, name) if err != nil { return } defer C.DECREF(pyAttr) attr, err = decode(pyAttr) }) return }
func (o *object) Attr(t *Thread, name string) (attr Object, err error) { t.execute(func() { var pyAttr *C.PyObject pyAttr, err = getAttr(o.pyObject, name) if err != nil { return } defer C.DECREF(pyAttr) attr = newObject(pyAttr) }) return }
// Import a Python module. func Import(t *Thread, name string) (module Object, err error) { cName := C.CString(name) defer C.free(unsafe.Pointer(cName)) t.execute(func() { pyModule := C.PyImport_ImportModule(cName) if pyModule == nil { err = getError() return } defer C.DECREF(pyModule) module = newObject(pyModule) }) return }
// Preferred method of creating an `IndexWriter` // Set's up all the necessary C bindings. func (index *Index) NewIndexWriter() *IndexWriter { flags := indexOpen if index.Create { flags |= indexCreate } if index.Truncate { flags |= indexTruncate } ixLocation := cb_newf(index.Path) defer C.DECREF(ixLocation) ixWriter := &IndexWriter{ Index: index, lucyIndexer: C.LucyIndexerNew(index.Schema.lucySchema, ixLocation, nil, C.int32_t(flags)), } runtime.SetFinalizer(ixWriter, freeIndexWriter) return ixWriter }
func (o *object) GetValue(t *Thread, key interface{}) (value interface{}, found bool, err error) { t.execute(func() { var pyKey *C.PyObject if pyKey, err = encode(key); err != nil { return } defer C.DECREF(pyKey) pyValue := C.PyDict_GetItem(o.pyObject, pyKey) if pyValue == nil { return } value, err = decode(pyValue) found = (err == nil) }) return }
// encodeDict translates a Go map to a Python object. func encodeDict(m map[interface{}]interface{}) (pyDict *C.PyObject, err error) { pyDict = C.PyDict_New() var ok bool defer func() { if !ok { C.DECREF(pyDict) pyDict = nil } }() for key, value := range m { if err = encodeDictItem(pyDict, key, value); err != nil { return } } ok = true return }
func (query *Query) Close() { if query.lucyQuery != nil { C.DECREF(query.lucyQuery) query.lucyQuery = nil } }
func (search *Search) Close() { C.DECREF(search.lucySearcher) }
func (schema *Schema) Close() { if schema.lucySchema != nil { C.DECREF(schema.lucySchema) schema.lucySchema = nil } }
func (analyzer *Analyzer) Close() { if analyzer.lucyAnalyzer != nil { C.DECREF(analyzer.lucyAnalyzer) analyzer.lucyAnalyzer = nil } }
func (schema *Schema) Close() { if schema.lucySchema != nil { C.DECREF((*C.cfish_Obj)(schema.lucySchema)) schema.lucySchema = nil } }
func (index *Index) Commit() { C.LucyIndexerCommit(index.lucyIndexer) C.DECREF(index.lucyIndexer) }
func (analyzer *Analyzer) Close() { if analyzer.lucyAnalyzer != nil { C.DECREF((*C.cfish_Obj)(analyzer.lucyAnalyzer)) analyzer.lucyAnalyzer = nil } }