func NewAnalyzer(language string, stemTerms bool) *Analyzer { lang := cb_newf(language) defer C.DECREF((*C.cfish_Obj)(lang)) // non-stemming analyzer still does case-folding (normalizing) and tokenizing var analyzer *Analyzer if stemTerms { // see https://lucy.apache.org/docs/test/Lucy/Docs/Tutorial/Analysis.html analyzer = &Analyzer{Language: language, lucyAnalyzer: (*C.lucy_Analyzer)(C.LucyEasyAnalyzerNew(lang))} } else { tokenizer := C.LucyStandardTokenizerNew() normalizer := C.LucyNormalizerNew(nil, (C.bool)(true), (C.bool)(false)) analyzers := C.CFishVArrayNew((C.uint32_t)(2)) //defer C.DECREF(tokenizer) get a segfault if i do this.. //defer C.DECREF(normalizer) get a segfault if i do this.. defer C.DECREF((*C.cfish_Obj)(analyzers)) // this works, however // have to push the tokenizer before the normalizer - otherwise // bad bad bad bad bad bad things will happen. C.CFishVArrayPush(analyzers, (*C.cfish_Obj)(tokenizer)) C.CFishVArrayPush(analyzers, (*C.cfish_Obj)(normalizer)) analyzer = &Analyzer{Language: language, lucyAnalyzer: (*C.lucy_Analyzer)(C.LucyPolyAnalyzerNew(lang, analyzers))} } runtime.SetFinalizer(analyzer, freeAnalyzer) return analyzer }
func (schema *Schema) createLucySchema() { lucySchema := C.LucySchemaNew() for _, item := range schema.PlanItems { var specType *C.CFishObj if item.Type == FullTextType { var language *C.CFishCharBuf if item.Options != nil && item.Options.Language != "" { language = cb_newf(item.Options.Language) } else { language = cb_newf("en") } analyzer := C.LucyEasyAnalyzerNew(language) specType = C.LucyFullTextTypeNew(analyzer) // TODO: come up with a better way to handle options. // This isn't very friendly. if item.Options != nil { specType = C.LucyFullTextTypeInitOptions(specType, analyzer, (C.float)(item.Options.Boost), (C.bool)(item.Options.Indexed), (C.bool)(item.Options.Stored), (C.bool)(item.Options.Sortable), (C.bool)(item.Options.Highlightable), ) } C.DECREF(language) C.DECREF(analyzer) } else if item.Type == StringType { specType = C.LucyStringTypeNew() if item.Options != nil { specType = C.LucyStringTypeInitOptions(specType, (C.float)(item.Options.Boost), (C.bool)(item.Options.Indexed), (C.bool)(item.Options.Stored), (C.bool)(item.Options.Sortable), ) } } else if item.Type == BlobType { isStored := (C.bool)(false) if item.Options != nil && item.Options.Stored { isStored = (C.bool)(true) } specType = C.LucyBlobTypeNew(isStored) // need to send []cfish_byte castable value panic("BlobType not supported yet") } else { panic("Type not supported yet") } fieldName := cb_newf(item.Field) C.LucySchemaSpecField(lucySchema, fieldName, specType) C.DECREF(fieldName) C.DECREF(specType) } schema.lucySchema = lucySchema }
func (ixReader *IndexReader) ParseQuery(queryStr string, stemTerms bool) *Query { lucySchema := C.LucyIxSearcherGetSchema(ixReader.lucySearcher) language := cb_newf("en") // should be configurable defer C.DECREF(language) var analyzer *C.LucyAnalyzer if stemTerms { analyzer = C.LucyEasyAnalyzerNew(language) } else { // this seems rather verbose for just creating an analyzer.. tokenizer := C.LucyStandardTokenizerNew() normalizer := C.LucyNormalizerNew(nil, (C.bool)(true), (C.bool)(false)) analyzers := C.CFishVArrayNew((C.uint32_t)(2)) //defer C.DECREF(tokenizer) get a segfault if i do this.. //defer C.DECREF(normalizer) get a segfault if i do this.. defer C.DECREF(analyzers) // this works, however C.CFishVArrayPush(analyzers, tokenizer) C.CFishVArrayPush(analyzers, normalizer) analyzer = C.LucyPolyAnalyzerNew(language, analyzers) } defer C.DECREF(analyzer) qp := C.LucyQParserNew( lucySchema, analyzer, //should this be configurable? cb_newf("AND"), // should be configurable C.LucySchemaAllFields(lucySchema), // should be configurable ) defer C.DECREF(qp) qs := cb_new_from_utf8(queryStr) defer C.DECREF(qs) query := &Query{ QueryStr: queryStr, lucyQuery: C.LucyQParserParse(qp, qs), } runtime.SetFinalizer(query, freeQuery) return query }