Beispiel #1
0
func NewAnalyzer(language string, stemTerms bool) *Analyzer {
	lang := cb_newf(language)
	defer C.DECREF((*C.cfish_Obj)(lang))

	// non-stemming analyzer still does case-folding (normalizing) and tokenizing
	var analyzer *Analyzer
	if stemTerms {
		// see https://lucy.apache.org/docs/test/Lucy/Docs/Tutorial/Analysis.html
		analyzer = &Analyzer{Language: language, lucyAnalyzer: (*C.lucy_Analyzer)(C.LucyEasyAnalyzerNew(lang))}
	} else {
		tokenizer := C.LucyStandardTokenizerNew()
		normalizer := C.LucyNormalizerNew(nil, (C.bool)(true), (C.bool)(false))
		analyzers := C.CFishVArrayNew((C.uint32_t)(2))

		//defer C.DECREF(tokenizer) get a segfault if i do this..
		//defer C.DECREF(normalizer) get a segfault if i do this..
		defer C.DECREF((*C.cfish_Obj)(analyzers)) // this works, however

		// have to push the tokenizer before the normalizer - otherwise
		// bad bad bad bad bad bad things will happen.
		C.CFishVArrayPush(analyzers, (*C.cfish_Obj)(tokenizer))
		C.CFishVArrayPush(analyzers, (*C.cfish_Obj)(normalizer))
		analyzer = &Analyzer{Language: language, lucyAnalyzer: (*C.lucy_Analyzer)(C.LucyPolyAnalyzerNew(lang, analyzers))}
	}
	runtime.SetFinalizer(analyzer, freeAnalyzer)
	return analyzer
}
Beispiel #2
0
func (schema *Schema) createLucySchema() {
	lucySchema := C.LucySchemaNew()
	for _, item := range schema.PlanItems {
		var specType *C.CFishObj
		if item.Type == FullTextType {
			var language *C.CFishCharBuf
			if item.Options != nil && item.Options.Language != "" {
				language = cb_newf(item.Options.Language)
			} else {
				language = cb_newf("en")
			}
			analyzer := C.LucyEasyAnalyzerNew(language)
			specType = C.LucyFullTextTypeNew(analyzer)
			// TODO: come up with a better way to handle options.
			// This isn't very friendly.
			if item.Options != nil {
				specType = C.LucyFullTextTypeInitOptions(specType, analyzer,
					(C.float)(item.Options.Boost),
					(C.bool)(item.Options.Indexed),
					(C.bool)(item.Options.Stored),
					(C.bool)(item.Options.Sortable),
					(C.bool)(item.Options.Highlightable),
				)
			}
			C.DECREF(language)
			C.DECREF(analyzer)
		} else if item.Type == StringType {
			specType = C.LucyStringTypeNew()
			if item.Options != nil {
				specType = C.LucyStringTypeInitOptions(specType,
					(C.float)(item.Options.Boost),
					(C.bool)(item.Options.Indexed),
					(C.bool)(item.Options.Stored),
					(C.bool)(item.Options.Sortable),
				)
			}
		} else if item.Type == BlobType {
			isStored := (C.bool)(false)
			if item.Options != nil && item.Options.Stored {
				isStored = (C.bool)(true)
			}
			specType = C.LucyBlobTypeNew(isStored)
			// need to send []cfish_byte castable value
			panic("BlobType not supported yet")
		} else {
			panic("Type not supported yet")
		}
		fieldName := cb_newf(item.Field)
		C.LucySchemaSpecField(lucySchema, fieldName, specType)
		C.DECREF(fieldName)
		C.DECREF(specType)
	}
	schema.lucySchema = lucySchema
}
Beispiel #3
0
func (ixReader *IndexReader) ParseQuery(queryStr string, stemTerms bool) *Query {
	lucySchema := C.LucyIxSearcherGetSchema(ixReader.lucySearcher)
	language := cb_newf("en") // should be configurable
	defer C.DECREF(language)

	var analyzer *C.LucyAnalyzer
	if stemTerms {
		analyzer = C.LucyEasyAnalyzerNew(language)
	} else {
		// this seems rather verbose for just creating an analyzer..
		tokenizer := C.LucyStandardTokenizerNew()
		normalizer := C.LucyNormalizerNew(nil, (C.bool)(true), (C.bool)(false))
		analyzers := C.CFishVArrayNew((C.uint32_t)(2))

		//defer C.DECREF(tokenizer) get a segfault if i do this..
		//defer C.DECREF(normalizer) get a segfault if i do this..
		defer C.DECREF(analyzers) // this works, however

		C.CFishVArrayPush(analyzers, tokenizer)
		C.CFishVArrayPush(analyzers, normalizer)
		analyzer = C.LucyPolyAnalyzerNew(language, analyzers)
	}

	defer C.DECREF(analyzer)
	qp := C.LucyQParserNew(
		lucySchema,
		analyzer,                          //should this be configurable?
		cb_newf("AND"),                    // should be configurable
		C.LucySchemaAllFields(lucySchema), // should be configurable
	)
	defer C.DECREF(qp)
	qs := cb_new_from_utf8(queryStr)
	defer C.DECREF(qs)
	query := &Query{
		QueryStr:  queryStr,
		lucyQuery: C.LucyQParserParse(qp, qs),
	}
	runtime.SetFinalizer(query, freeQuery)
	return query
}