コード例 #1
0
ファイル: locutions.go プロジェクト: payfriendz/go-freeling
func NewLocutionStatus() *LocutionStatus {
	return &LocutionStatus{
		accMW:      set.New(),
		longestMW:  set.New(),
		mwAnalysis: list.New(),
		components: make([]*Word, 0),
	}
}
コード例 #2
0
ファイル: mitie.go プロジェクト: payfriendz/go-freeling
func (this *MITIE) Process(body string) *list.List {
	tokens := C.mitie_tokenize(C.CString(body))
	defer C.mitie_free(unsafe.Pointer(tokens))
	dets := C.mitie_extract_entities(this.ner, tokens)
	defer C.mitie_free(unsafe.Pointer(dets))
	num_dets := C.mitie_ner_get_num_detections(dets)
	duplicates := set.New()
	entites := list.New()
	for i := 0; i < int(num_dets); i++ {
		centity := C.get_entity(tokens, dets, C.ulong(i))
		model := C.GoString(centity.model)
		score := float64(centity.score)
		value := C.GoString(centity.value)
		key := fmt.Sprintf("%s:%s", value, model)
		if duplicates.Has(key) {
			continue
		}
		duplicates.Add(key)
		if score > 0.5 {
			entity := models.NewEntity(model, score, value)
			entites.PushBack(entity)
		}
	}
	return entites
}
コード例 #3
0
func TestJobQueuePushWaitStress(t *testing.T) {
	RegisterTestingT(t)

	if testing.Short() {
		t.Skip("skipping test in short mode.")
	}

	pushed := set.New()
	popped := set.New()

	q := newJobListQueue()
	Ω(q).ShouldNot(BeNil())

	wg := sync.WaitGroup{}

	N := 50 * 1000
	wg.Add(N)

	for i := 0; i < N; i++ {
		pusher, item := i%2 == 0, i/2

		if pusher {
			go func(item int) {
				defer wg.Done()
				pushed.Add(item)
				q.Push(item)
			}(item)
		} else {
			go func() {
				defer wg.Done()
				item, ok := q.WaitForJob()
				Ω(ok).ShouldNot(BeFalse())
				popped.Add(item)
			}()
		}
	}

	wg.Wait()

	unpopped := set.Difference(pushed, popped)
	Ω(set.IntSlice(unpopped)).Should(BeEmpty())
	unpushed := set.Difference(popped, pushed)
	Ω(set.IntSlice(unpushed)).Should(BeEmpty())
	Ω(set.IntSlice(pushed)).Should(HaveLen(N / 2))
	Ω(set.IntSlice(popped)).Should(HaveLen(N / 2))
}
コード例 #4
0
func NewDisambiguator(disFile string) *Disambiguator {
	this := Disambiguator{
		wnids: make(map[string]*Synset),
		binds: make(map[string]*set.Set),
	}

	fileString, err := ioutil.ReadFile(disFile)
	if err != nil {
		LOG.Panic("Error loading file " + disFile)
	}
	lines := strings.Split(string(fileString), "\n")
	for _, line := range lines {
		if line == "" {
			continue
		}
		items := Split(line, "\t")
		sscope := items[0]
		scope := DOCUMENT_SCOPE
		if sscope == "d" {
			scope = DOCUMENT_SCOPE
		} else if sscope == "sd" {
			scope = SENTENCE_SCOPE
		} else if sscope == "sb" {
			scope = SENTENCE_BIND
		} else if sscope == "nd" {
			scope = ND_SCOPE
		}
		switch scope {
		case DOCUMENT_SCOPE, SENTENCE_SCOPE, ND_SCOPE:
			{
				lemma := items[1]
				wnid := items[2]
				pos, _ := strconv.ParseFloat(items[3], 64)
				neg, _ := strconv.ParseFloat(items[4], 64)
				domain := items[5][1:]
				score, _ := strconv.Atoi(items[6])
				gloss := items[7]
				syn := NewSynset(scope, lemma, wnid, pos, neg, domain, score, gloss)
				this.wnids[wnid] = syn
				break
			}
		case SENTENCE_BIND:
			{
				key := items[1][1:]
				for i := 2; i < len(items); i++ {
					if this.binds[key] == nil {
						this.binds[key] = set.New()
					}
					this.binds[key].Add(items[i][1:])
				}
			}
		}
	}

	return &this
}
コード例 #5
0
ファイル: dictionary.go プロジェクト: payfriendz/go-freeling
func (this *Dictionary) ParseDictEntry(data string, lems *list.List) bool {
	aux := make(map[string]*set.Set)
	dataItems := Split(data, " ")
	sl := set.New()

	for i := 0; i < len(dataItems)-1; i = i + 2 {
		lemma := dataItems[i]
		sl.Add(lemma)
		if i == len(dataItems) {
			return false
		}
		tag := dataItems[i+1]

		l := aux[lemma]
		if l == nil {
			l = set.New()
			aux[lemma] = l
		}
		l.Add(tag)
	}

	ll := list.New()
	for _, l := range sl.List() {
		ll.PushBack(l.(string))
	}
	this.SortList(ll, this.lemmaPrefs)

	for k := ll.Front(); k != nil; k = k.Next() {

		l := aux[k.Value.(string)]

		lt := list.New()
		for _, s := range l.List() {
			lt.PushBack(s.(string))
		}
		this.SortList(lt, this.posPrefs)
		lems.PushBack(Pair{k.Value.(string), lt})

	}

	return true
}
コード例 #6
0
ファイル: language.go プロジェクト: payfriendz/go-freeling
func NewAnalysis(lemma string, tag string) *Analysis {
	return &Analysis{
		lemma:         lemma,
		tag:           tag,
		prob:          -1.0,
		distance:      -1.0,
		senses:        list.New(),
		retok:         list.New(),
		selectedKBest: set.New(),
	}
}
コード例 #7
0
ファイル: queue_test.go プロジェクト: PlanitarInc/workqueue
func TestQueueStress(t *testing.T) {
	RegisterTestingT(t)

	if testing.Short() {
		t.Skip("skipping test in short mode.")
	}

	addedJobs := set.New()
	processedJobs := set.New()

	q := New("stress", 100, func(j Job) {
		time.Sleep(3 * time.Millisecond)
		processedJobs.Add(j)
	})

	Ω(q.Start()).Should(BeNil())

	N := 5 * 1000
	wg := sync.WaitGroup{}
	wg.Add(N)
	for i := 0; i < N; i++ {
		go func(item int) {
			defer wg.Done()
			addedJobs.Add(item)
			Ω(q.Add(item)).Should(BeNil())
		}(i)
	}
	wg.Wait() // Wait while all jobs are added to the queue

	Ω(q.Stop(true)).Should(BeNil())

	unprocessed := set.Difference(addedJobs, processedJobs)
	Ω(set.IntSlice(unprocessed)).Should(BeEmpty())
	unadded := set.Difference(processedJobs, addedJobs)
	Ω(set.IntSlice(unadded)).Should(BeEmpty())
	Ω(set.IntSlice(addedJobs)).Should(HaveLen(N))
	Ω(set.IntSlice(processedJobs)).Should(HaveLen(N))
}
コード例 #8
0
ファイル: hmm.go プロジェクト: payfriendz/go-freeling
func (this *HMMTagger) FindStates(sent *Sentence) *list.List {
	st := set.New()
	ls := list.New()
	w2 := sent.Front()
	TRACE(3, "obtaining the states that may have emmited the initial word: "+w2.Value.(*Word).getForm(), MOD_HMM)
	for a2 := w2.Value.(*Word).selectedBegin(0).Element; a2 != nil; a2 = a2.Next() {
		st.Add(&Bigram{"0", this.Tags.GetShortTag(a2.Value.(*Analysis).getTag())})
	}
	ls.PushBack(st)

	for w1, w2 := w2, w2.Next(); w1 != nil && w2 != nil; w1, w2 = w2, w2.Next() {
		TRACE(3, "obtaining the states that may have emmited the word: "+w2.Value.(*Word).getForm(), MOD_HMM)
		st := set.New()
		for a1 := w1.Value.(*Word).selectedBegin(0).Element; a1 != nil; a1 = a1.Next() {
			for a2 := w2.Value.(*Word).selectedBegin(0).Element; a2 != nil; a2 = a2.Next() {
				st.Add(&Bigram{this.Tags.GetShortTag(a1.Value.(*Analysis).getTag()), this.Tags.GetShortTag(a2.Value.(*Analysis).getTag())})
			}
		}

		ls.PushBack(st)
	}

	return ls
}
コード例 #9
0
ファイル: probability.go プロジェクト: payfriendz/go-freeling
func (this *Probability) guesser(w *Word, mass float64) float64 {
	form := w.getLCForm()

	sum := If(w.getNAnalysis() > 0, mass, 0.0).(float64)
	sum2 := 0.0

	TRACE(2, "Initial sum="+strconv.FormatFloat(sum, 'f', 3, 64), MOD_PROBABILITY)

	stags := set.New()
	for li := w.Front(); li != nil; li = li.Next() {
		stags.Add(this.Tags.GetShortTag(li.Value.(*Analysis).getTag()))
	}

	la := list.New()
	for k, v := range this.unkTags {
		TRACE(2, "   guesser checking tag "+k, MOD_PROBABILITY)
		hasit := stags.Has(this.Tags.GetShortTag(k))

		if !hasit {
			p := this.computeProbability(k, v, form)
			a := NewAnalysis(form, k)
			a.setProb(p)

			if p >= this.ProbabilityThreshold {
				sum += p
				w.addAnalysis(a)
				TRACE(2, "   added. sum is:"+strconv.FormatFloat(sum, 'f', 3, 64), MOD_PROBABILITY)
			} else {
				sum2 += p
				la.PushBack(a)
			}
		}
	}

	if w.getNAnalysis() == 0 {
		w.setAnalysis(List2Array(la)...)
		sum = sum2
	}

	return sum
}
コード例 #10
0
ファイル: suffixes.go プロジェクト: payfriendz/go-freeling
func (this *Affixes) GenerateRoots(kind int, suf *sufrule, rt string) *set.Set {
	cand := set.New()
	var term, r string
	var pe int

	cand.Clear()
	term = suf.term
	TRACE(3, "Possible terminations/beginnings: "+term, MOD_AFFIX)
	pe = strings.Index(term, "|")
	for pe > -1 {
		r = term[0:pe]
		if r == "*" {
			r = ""
		}

		if kind == SUF {
			TRACE(3, "Adding to t_roots the element "+rt+r, MOD_AFFIX)
			cand.Add(rt + r)
		} else if kind == PREF {
			TRACE(3, "Adding to t_roots the element "+r+rt, MOD_AFFIX)
			cand.Add(r + rt)
		}

		term = term[pe+1:]
		pe = strings.Index(term, "|")
	}

	if term == "*" {
		term = ""
	}

	if kind == SUF {
		TRACE(3, "Adding to t_roots the element "+rt+term, MOD_AFFIX)
		cand.Add(rt + term)
	} else if kind == PREF {
		TRACE(3, "Adding to t_roots the element "+term+rt, MOD_AFFIX)
		cand.Add(term + rt)
	}

	return cand
}
コード例 #11
0
ファイル: language.go プロジェクト: payfriendz/go-freeling
func NewAnalysisFromAnalysis(a *Analysis) *Analysis {
	this := Analysis{
		lemma:         a.lemma,
		tag:           a.tag,
		prob:          a.prob,
		distance:      a.distance,
		senses:        list.New(),
		retok:         list.New(),
		selectedKBest: set.New(a.selectedKBest),
	}

	for s := a.senses.Front(); s != nil; s = s.Next() {
		this.senses.PushBack(s.Value.(*Senses))
	}

	for r := a.retok.Front(); r != nil; r = r.Next() {
		this.retok.PushBack(r.Value.(*Word))
	}

	return &this
}
コード例 #12
0
ファイル: grammar.go プロジェクト: payfriendz/go-freeling
func NewGrammar(fname string) *Grammar {
	this := Grammar{
		RulesMap:    make(RulesMap),
		nonterminal: set.New(),
		wild:        make(RulesMap),
		filemap:     make(RulesMap),
		prior:       make(map[string]int),
		hidden:      set.New(),
		flat:        set.New(),
		notop:       set.New(),
		onlytop:     set.New(),
	}

	MAX := 32
	var tok, stat, newstat, i, j int
	what := 0
	var trans [32][32]int
	first := false
	wildcard := false
	var head, err, categ, name string
	ls := list.New()
	var priorVal int

	for i = 0; i < MAX; i++ {
		for j = 0; j < MAX; j++ {
			trans[i][j] = 0
		}
	}

	trans[1][GRAMMAR_COMMENT] = 1
	trans[1][GRAMMAR_CATEGORY] = 2
	trans[1][GRAMMAR_PRIOR] = 6
	trans[1][GRAMMAR_START] = 8
	trans[1][GRAMMAR_HIDDEN] = 6
	trans[1][GRAMMAR_FLAT] = 6
	trans[1][GRAMMAR_NOTOP] = 6
	trans[1][GRAMMAR_ONLYTOP] = 6

	trans[2][GRAMMAR_ARROW] = 3

	trans[3][GRAMMAR_CATEGORY] = 4
	trans[3][GRAMMAR_HEAD] = 10

	trans[4][GRAMMAR_COMMA] = 3
	trans[4][GRAMMAR_BAR] = 3
	trans[4][GRAMMAR_DOT] = 1
	trans[4][GRAMMAR_LEMMA] = 5
	trans[4][GRAMMAR_FORM] = 5
	trans[4][GRAMMAR_FILENAME] = 5

	trans[5][GRAMMAR_COMMA] = 3
	trans[5][GRAMMAR_BAR] = 3
	trans[5][GRAMMAR_DOT] = 1

	trans[6][GRAMMAR_CATEGORY] = 7

	trans[7][GRAMMAR_CATEGORY] = 7
	trans[7][GRAMMAR_DOT] = 1

	trans[8][GRAMMAR_CATEGORY] = 9

	trans[9][GRAMMAR_DOT] = 1

	trans[10][GRAMMAR_CATEGORY] = 4

	rules := make([]*Pair, 0)

	rules = append(rules, &Pair{regexp.MustCompile("[ \\t\\n\\r]+"), 0})
	rules = append(rules, &Pair{regexp.MustCompile("%.*"), GRAMMAR_COMMENT})
	rules = append(rules, &Pair{regexp.MustCompile("==>"), GRAMMAR_ARROW})
	rules = append(rules, &Pair{regexp.MustCompile("\\([[:alpha:]_'·\\-]+\\)"), GRAMMAR_FORM})
	rules = append(rules, &Pair{regexp.MustCompile("<[[:lower:]_'·\\-]+>"), GRAMMAR_LEMMA})
	rules = append(rules, &Pair{regexp.MustCompile("\\(\\\"([A-Za-z]:)?[[:alnum:]_\\-\\./\\\\]+\\\"\\)"), GRAMMAR_FILENAME})
	rules = append(rules, &Pair{regexp.MustCompile("<\\\"([A-Za-z]:)?[[:alnum:]_\\-\\./\\\\]+\\\">"), GRAMMAR_FILENAME})
	rules = append(rules, &Pair{regexp.MustCompile("[A-Za-z][\\-A-Za-z0-9]*[*]?"), GRAMMAR_CATEGORY})
	rules = append(rules, &Pair{regexp.MustCompile("@PRIOR"), GRAMMAR_PRIOR})
	rules = append(rules, &Pair{regexp.MustCompile("@START"), GRAMMAR_START})
	rules = append(rules, &Pair{regexp.MustCompile("@HIDDEN"), GRAMMAR_HIDDEN})
	rules = append(rules, &Pair{regexp.MustCompile("@FLAT"), GRAMMAR_FLAT})
	rules = append(rules, &Pair{regexp.MustCompile("@NOTOP"), GRAMMAR_NOTOP})
	rules = append(rules, &Pair{regexp.MustCompile("@ONLYTOP"), GRAMMAR_ONLYTOP})
	rules = append(rules, &Pair{regexp.MustCompile("\\|"), GRAMMAR_BAR})
	rules = append(rules, &Pair{regexp.MustCompile("\\."), GRAMMAR_DOT})
	rules = append(rules, &Pair{regexp.MustCompile(","), GRAMMAR_COMMA})
	rules = append(rules, &Pair{regexp.MustCompile("\\+"), GRAMMAR_HEAD})

	fl := NewLexer(rules)

	filestr, e := ioutil.ReadFile(fname)
	if e != nil {
		CRASH("Error opening file "+fname, MOD_GRAMMAR)
	}
	gov := 0
	havegov := false
	stat = 1
	priorVal = 1
	stream := string(filestr)
	err = ""
	for {
		tok = fl.getToken(stream)
		if tok == -1 {
			break
		}
		newstat = trans[stat][tok]
		switch newstat {
		case 0:
			{
				if tok == GRAMMAR_COMMENT {
					err = "Unexpected comment. Missing dot ending previous rule/directive ?"
				}
				if err == "" {
					err = "Unexpected '" + fl.getText() + "' found."
				}

				LOG.Warnln("File "+fname+", line "+strconv.Itoa(fl.lineno())+":"+err, MOD_GRAMMAR)

				for tok > -1 && tok != GRAMMAR_DOT {
					tok = fl.getToken(stream)
				}
				newstat = 1
				break
			}
		case 1:
			{
				if tok == GRAMMAR_DOT && (stat == 4 || stat == 5) {
					ls.PushBack(categ)
					if !havegov {
						gov = GRAMMAR_DEFGOV
						if ls.Len() != 1 {
							err = "Non-unary rule with no governor. First component taken as governor."
							LOG.Warnln("File "+fname+", line "+strconv.Itoa(fl.lineno())+":"+err, MOD_GRAMMAR)
						}
					}
					this.newRule(head, ls, wildcard, gov)
					gov = GRAMMAR_NOGOV
					havegov = false
				}
				break
			}
		case 2:
			{
				head = fl.getText()
				this.nonterminal.Add(head)
				break
			}
		case 3:
			{
				if tok == GRAMMAR_ARROW {
					ls = list.New()
					first = true
					wildcard = false
				} else if tok == GRAMMAR_COMMA {
					ls.PushBack(categ)
				} else if tok == GRAMMAR_BAR {
					ls.PushBack(categ)
					if !havegov {
						gov = GRAMMAR_DEFGOV
						if ls.Len() != -1 {
							err = "Non-unary rule with no governor. First component taken as governor."
							LOG.Warnln("File "+fname+", line "+strconv.Itoa(fl.lineno())+":"+err, MOD_GRAMMAR)
						}
					}

					this.newRule(head, ls, wildcard, gov)

					gov = GRAMMAR_NOGOV
					havegov = false
					ls = list.New()
					break
				}
			}
		case 4:
			{
				categ = fl.getText()
				if first && strings.Index(categ, "*") > -1 {
					wildcard = true
				}
				first = false
				break
			}
		case 5:
			{
				name = fl.getText()
				categ = categ + name

				if tok == GRAMMAR_FILENAME {
					var sname string

					sname = name[2 : len(name)-2]
					sname = fname[0:strings.LastIndex(fname, "/")+1] + "/" + sname

					fs, e := ioutil.ReadFile(sname)
					if e != nil {
						LOG.Stackln("Error opening file " + sname)
					}

					var op, clo string
					if string(name[0]) == "<" {
						op = "<"
						clo = ">"
					} else if string(name[0]) == "(" {
						op = "("
						clo = ")"
					}

					lines := Split(string(fs), "\n")
					for _, line := range lines {
						lfm, ok := this.filemap[op+line+clo]
						if !ok {
							this.filemap[op+line+clo] = list.New()
							lfm = this.filemap[op+line+clo]
						}
						exists := false
						for l := lfm.Front(); l != nil && !exists; l = l.Next() {
							if l.Value.(string) == name {
								exists = true
								break
							}
						}
						if !exists {
							lfm.PushBack(name)
						}
					}
				}

				break
			}
		case 6:
			{
				what = tok
				break
			}
		case 7:
			{
				categ = fl.getText()
				if this.nonterminal.Has(categ) {
					switch what {
					case GRAMMAR_PRIOR:
						{
							_, ok := this.prior[categ]
							if !ok {
								this.prior[categ] = priorVal
								priorVal++
							}
							break
						}
					case GRAMMAR_HIDDEN:
						{
							this.hidden.Add(categ)
							break
						}
					case GRAMMAR_FLAT:
						{
							this.flat.Add(categ)
							break
						}
					case GRAMMAR_NOTOP:
						{
							this.notop.Add(categ)
							break
						}
					case GRAMMAR_ONLYTOP:
						{
							this.onlytop.Add(categ)
							break
						}
					default:
						break
					}
				} else {
					err = "Terminal symbol '" + fl.getText() + "' not allowed in directive."
					newstat = 0
				}
				break
			}
		case 8:
			{
				if this.start != "" {
					err = "@START specified more than once."
					newstat = 0
				}
				break
			}
		case 9:
			{
				this.start = fl.getText()
				if !this.nonterminal.Has(this.start) {
					this.nonterminal.Add(this.start)
				}
				break
			}
		case 10:
			{
				gov = ls.Len()
				havegov = true
				break
			}
		default:
			break
		}

		stat = newstat
	}

	if this.start == "" {
		err = "@START symbol not specified."
		LOG.Warnln("File " + fname + ", line " + strconv.Itoa(fl.lineno()) + ":" + err)
	}
	if this.hidden.Has(this.start) {
		err = "@START symbol cannot be @HIDDEN."
		LOG.Warnln("File " + fname + ", line " + strconv.Itoa(fl.lineno()) + ":" + err)
	}
	if this.notop.Has(this.start) {
		err = "@START symbol cannot be @NOTOP."
		LOG.Warnln("File " + fname + ", line " + strconv.Itoa(fl.lineno()) + ":" + err)
	}

	for _, x := range this.onlytop.List() {
		if this.hidden.Has(x.(string)) {
			err = "@HIDDEN directive for '" + (x.(string)) + "' overrides @ONLYTOP."
			LOG.Warnln("File " + fname + ", line " + strconv.Itoa(fl.lineno()) + ":" + err)
		}
	}

	/*
		for k,v := range this.filemap {
			//println("FILEMAP ===== ",k," =====")
			for i := v.Front(); i != nil; i = i.Next() {
				//println(i.Value.(string))
			}
		}


		for k,v := range this.RulesMap {
			//println("===== " + k + " =====")
			for i := v.Front(); i != nil; i = i.Next() {
				//println(i.Value.(*Rule).getHead(), i.Value.(*Rule).getRight().Front().Value.(string))

			}
		}

		for k,v := range this.wild {
			//println("===== " + k + " =====")
			for i := v.Front(); i != nil; i = i.Next() {
				//println(i.Value.(*Rule).getHead(), i.Value.(*Rule).getRight().Front().Value.(string))

			}
		}
	*/
	TRACE(3, "Grammar loaded", MOD_GRAMMAR)
	return &this
}
コード例 #13
0
ファイル: nlp.go プロジェクト: payfriendz/go-freeling
func (this *NLPEngine) Workflow(document *models.DocumentEntity, output chan *models.DocumentEntity) {
	defer func() {
		if r := recover(); r != nil {
			err, _ := r.(error)
			if err != nil {
				output <- nil //err.Error()
			} else {
				output <- nil
			}
		}
	}()
	document.Init()
	tokens := list.New()
	url := document.Url
	content := document.Content

	if url != "" && content == "" {
		crawler := NewDefaultCrawler()
		article := crawler.Analyze(url)
		document.Title = article.Title
		document.Description = article.MetaDescription
		document.Keywords = article.MetaKeywords
		document.TopImage = article.TopImage
		document.Content = article.CleanedText
	}

	body := StringsAppend(document.Title, document.Description, document.Keywords, document.Content)

	if this.tokenizer != nil {
		this.tokenizer.Tokenize(body, 0, tokens)
	}

	sentences := list.New()

	if this.splitter != nil {
		sid := this.splitter.OpenSession()
		this.splitter.Split(sid, tokens, true, sentences)
		this.splitter.CloseSession(sid)
	}

	for ss := sentences.Front(); ss != nil; ss = ss.Next() {
		s := ss.Value.(*Sentence)
		if this.morfo != nil {
			this.morfo.Analyze(s)
		}
		if this.sense != nil {
			this.sense.Analyze(s)
		}
		if this.tagger != nil {
			this.tagger.Analyze(s)
		}
		if this.shallowParser != nil {
			this.shallowParser.Analyze(s)
		}
	}

	if this.dsb != nil {
		this.dsb.Analyze(sentences)
	}

	entities := make(map[string]int64)

	for ss := sentences.Front(); ss != nil; ss = ss.Next() {
		se := models.NewSentenceEntity()
		body := ""
		s := ss.Value.(*Sentence)
		for ww := s.Front(); ww != nil; ww = ww.Next() {
			w := ww.Value.(*Word)
			a := w.Front().Value.(*Analysis)
			te := models.NewTokenEntity(w.getForm(), a.getLemma(), a.getTag(), a.getProb())
			if a.getTag() == TAG_NP {
				entities[w.getForm()]++
			}
			body += w.getForm() + " "
			se.AddTokenEntity(te)
		}
		body = strings.Trim(body, " ")
		se.SetBody(body)
		se.SetSentence(s)

		document.AddSentenceEntity(se)
	}

	tempEntities := set.New()

	mitieEntities := this.mitie.Process(body)
	for e := mitieEntities.Front(); e != nil; e = e.Next() {
		entity := e.Value.(*models.Entity)
		tempEntities.Add(entity.GetValue())
	}

	for name, frequency := range entities {
		name = strings.Replace(name, "_", " ", -1)
		if !tempEntities.Has(name) {
			document.AddUnknownEntity(name, frequency)
		}
	}

	document.Entities = mitieEntities
	output <- document
}
コード例 #14
0
ファイル: ner.go プロジェクト: payfriendz/go-freeling
func NewNP(npFile string) *NP {
	this := NP{
		fun:            set.New(),
		punct:          set.New(),
		names:          set.New(),
		ignoreTags:     make(map[string]int),
		ignoreWords:    make(map[string]int),
		prefixes:       set.New(),
		suffixes:       set.New(),
		RENounAdj:      regexp.MustCompile(NP_RE_NA),
		REClosed:       regexp.MustCompile(NP_RE_CLO),
		REDateNumPunct: regexp.MustCompile(NP_RE_DNP),
	}
	this.NERModule = NewNERModule(npFile)
	this.final = set.New()

	cfg := NewConfigFile(false, "##")
	cfg.AddSection("Type", NP_NER_TYPE)
	cfg.AddSection("FunctionWords", NP_FUNCTION)
	cfg.AddSection("SpecialPunct", NP_SPECIAL)
	cfg.AddSection("Names", NP_NAMES)
	cfg.AddSection("Ignore", NP_NE_IGNORE)
	cfg.AddSection("RE_NounAdj", NP_REX_NOUNADJ)
	cfg.AddSection("RE_Closed", NP_REX_CLOSED)
	cfg.AddSection("RE_DateNumPunct", NP_REX_DATNUMPUNT)
	cfg.AddSection("Affixes", NP_AFFIXES)
	cfg.skipUnknownSections = true

	if !cfg.Open(npFile) {
		CRASH("Error opening file "+npFile, MOD_NER)
	}

	line := ""

	for cfg.GetContentLine(&line) {
		items := Split(line, " ")
		switch cfg.GetSection() {
		case NP_NER_TYPE:
			{
				if strings.ToLower(line) != "basic" {
					CRASH("Invalid configuration file for 'basic' NER, "+npFile, MOD_NER)
				}
				break
			}

		case NP_FUNCTION:
			{
				this.fun.Add(line)
				break
			}

		case NP_SPECIAL:
			{
				this.punct.Add(line)
				break
			}

		case NP_NAMES:
			{
				this.names.Add(line)
				break
			}

		case NP_NE_IGNORE:
			{
				key := items[0]
				tpe, _ := strconv.Atoi(items[1])
				if IsCapitalized(key) {
					this.ignoreTags[key] = tpe + 1
				} else {
					this.ignoreWords[key] = tpe + 1
				}
				break
			}

		case NP_REX_NOUNADJ:
			{
				this.RENounAdj = regexp.MustCompile(line)
				break
			}

		case NP_REX_CLOSED:
			{
				this.REClosed = regexp.MustCompile(line)
				break
			}

		case NP_REX_DATNUMPUNT:
			{
				this.REDateNumPunct = regexp.MustCompile(line)
				break
			}

		case NP_AFFIXES:
			{
				word := items[0]
				tpe := items[1]
				if tpe == "SUF" {
					this.suffixes.Add(word)
				} else if tpe == "PRE" {
					this.prefixes.Add(word)
				} else {
					WARNING("Ignored affix with unknown type '"+tpe+"' in file", MOD_NER)
				}
				break
			}
		}
	}

	this.initialState = NP_ST_IN
	this.stopState = NP_ST_STOP
	this.final.Add(NP_ST_NP)
	this.final.Add(NP_ST_SUF)

	var s, t int
	for s = 0; s < AUTOMAT_MAX_STATES; s++ {
		for t = 0; t < AUTOMAT_MAX_TOKENS; t++ {
			this.trans[s][t] = NP_ST_STOP
		}
	}

	this.trans[NP_ST_IN][NP_TK_sUnkUpp] = NP_ST_NP
	this.trans[NP_ST_IN][NP_TK_sNounUpp] = NP_ST_NP
	this.trans[NP_ST_IN][NP_TK_mUpper] = NP_ST_NP
	this.trans[NP_ST_IN][NP_TK_mPref] = NP_ST_PREF

	this.trans[NP_ST_PREF][NP_TK_mPref] = NP_ST_PREF
	this.trans[NP_ST_PREF][NP_TK_mUpper] = NP_ST_NP

	this.trans[NP_ST_NP][NP_TK_mUpper] = NP_ST_NP
	this.trans[NP_ST_NP][NP_TK_mFun] = NP_ST_FUN
	this.trans[NP_ST_NP][NP_TK_mSuf] = NP_ST_SUF

	this.trans[NP_ST_FUN][NP_TK_mUpper] = NP_ST_NP
	this.trans[NP_ST_FUN][NP_TK_mFun] = NP_ST_FUN

	this.trans[NP_ST_SUF][NP_TK_mSuf] = NP_ST_SUF

	LOG.Trace("analyzer succesfully created")

	return &this
}
コード例 #15
0
ファイル: suffixes.go プロジェクト: payfriendz/go-freeling
func NewAffixes(sufFile string) *Affixes {
	this := Affixes{}

	filestr, err := ioutil.ReadFile(sufFile)
	if err != nil {
		CRASH("Error opening file "+sufFile, MOD_AFFIX)
		return nil
	}
	lines := strings.Split(string(filestr), "\n")

	this.Longest[SUF] = 0
	this.Longest[PREF] = 0

	kind := -1
	for _, line := range lines {
		if line != "" && !strings.HasPrefix(line, "#") {
			items := Split(line, "\t")
			if line == "<Suffixes>" {
				kind = SUF
			} else if line == "<Prefixes>" {
				kind = PREF
			} else if line == "</Suffixes>" {
				kind = -1
			} else if line == "</Prefixes>" {
				kind = -1
			} else if kind == SUF || kind == PREF {
				key := items[0]
				term := items[1]
				cond := items[2]
				output := items[3]
				acc := items[4]
				enc := items[5]
				nomore := items[6]
				lema := items[7]
				always := items[8]
				retok := items[9]

				suf := NewSufRuleFromRexEx(cond)
				suf.term = term
				suf.output = output
				suf.acc, _ = strconv.Atoi(acc)
				suf.enc, _ = strconv.Atoi(enc)
				suf.nomore, _ = strconv.Atoi(nomore)
				suf.lema = lema
				suf.always, _ = strconv.Atoi(always)
				suf.retok = retok

				if suf.retok == "-" {
					suf.retok = ""
				}

				if this.affix[kind] == nil {
					this.affix[kind] = make(map[string]*set.Set)
				}

				if this.affix[kind][key] == nil {
					this.affix[kind][key] = set.New()
				}

				this.affix[kind][key].Add(suf)
				if suf.always == 1 {
					if this.affixAlways[kind] == nil {
						this.affixAlways[kind] = make(map[string]*set.Set)
					}
					if this.affixAlways[kind][key] == nil {
						this.affixAlways[kind][key] = set.New()
					}
					this.affixAlways[kind][key].Add(suf)
				}

				if this.ExistingLength[kind] == nil {
					this.ExistingLength[kind] = set.New()
				}

				this.ExistingLength[kind].Add(len(key))

				if len(key) > this.Longest[kind] {
					this.Longest[kind] = len(key)
				}
			}

		}
	}

	TRACE(3, "analyzer succesfully created", MOD_AFFIX)

	return &this
}
コード例 #16
0
ファイル: semdb.go プロジェクト: payfriendz/go-freeling
func NewSemanticDB(wsdFile string) *SemanticDB {
	this := SemanticDB{
		posMap: list.New(),
	}
	var formFile, dictFile, wnFile string

	path := wsdFile[0:strings.LastIndex(wsdFile, "/")]

	posset := set.New()
	cfg := NewConfigFile(true, "")
	cfg.AddSection("WNposMap", SEMDB_WN_POS_MAP)
	cfg.AddSection("DataFiles", SEMDB_DATA_FILES)

	if !cfg.Open(wsdFile) {
		LOG.Panic("Error opening configuration file " + wsdFile)
	}

	line := ""

	for cfg.GetContentLine(&line) {
		items := Split(line, " ")
		switch cfg.GetSection() {
		case SEMDB_WN_POS_MAP:
			{
				r := PosMapRule{}
				r.pos = items[0]
				r.wnpos = items[1]
				r.lemma = items[2]
				this.posMap.PushBack(r)
				if r.lemma != "L" && r.lemma != "F" {
					posset.Add(r.lemma)
				}
				break
			}
		case SEMDB_DATA_FILES:
			{
				key := items[0]
				fname := items[1]
				if key == "formDictFile" {
					formFile = path + "/" + strings.Replace(fname, "./", "", -1)
				} else if key == "senseDictFile" {
					dictFile = path + "/" + strings.Replace(fname, "./", "", -1)
				} else if key == "wnFile" {
					wnFile = path + "/" + strings.Replace(fname, "./", "", -1)
				}
				break
			}
		default:
			break
		}
	}

	if formFile == "" || posset.Size() == 0 {
		this.formDict = nil
	} else {
		fileString, err := ioutil.ReadFile(formFile)
		if err != nil {
			LOG.Panic("Error loading file " + formFile)
		}
		lines := strings.Split(string(fileString), "\n")
		this.formDict = NewDatabase(DB_MAP)
		for _, line := range lines {
			items := Split(line, " ")
			form := items[0]
			for i := 1; i < len(items); i = i + 2 {
				lemma := items[i]
				tag := items[i+1]
				if posset.Has(tag) {
					this.formDict.addDatabase(lemma+" "+tag, form)
				}
			}
		}
	}

	if dictFile == "" {
		this.senseDB = nil
	} else {
		fileString, err := ioutil.ReadFile(dictFile)
		if err != nil {
			LOG.Panic("Error loading file " + dictFile)
		}
		lines := strings.Split(string(fileString), "\n")
		this.senseDB = NewDatabase(DB_MAP)
		for _, line := range lines {
			items := Split(line, " ")
			sens := items[0]
			tag := sens[strings.Index(sens, "-")+1:]
			for i := 1; i < len(items); i++ {
				wd := items[i]
				this.senseDB.addDatabase("S:"+sens, wd)
				this.senseDB.addDatabase("W:"+wd+":"+tag, sens)
			}
		}
	}

	if wnFile == "" {
		this.wndb = nil
	} else {
		this.wndb = NewDatabaseFromFile(wnFile)
	}

	return &this
}
コード例 #17
0
ファイル: suffixes.go プロジェクト: payfriendz/go-freeling
func (this *Affixes) lookForCombinedAffixes(suff map[string]*set.Set, pref map[string]*set.Set, w *Word, dic *Dictionary) {
	var i, j, ln int
	var lws, formSuf, formPref, formRoot string
	lws = w.getLCForm()
	ln = len(lws)

	var rulesS *set.Set
	var rulesP *set.Set

	var candidates, cand1 *set.Set

	for i = 1; i <= this.Longest[SUF] && i < ln; i++ {
		if this.ExistingLength[SUF].Has(i) == false {
			TRACE(4, "No suffixes  of size "+strconv.Itoa(i), MOD_AFFIX)
			continue
		}

		for j = 1; j <= this.Longest[PREF] && j <= ln-i; j++ {
			if this.ExistingLength[PREF].Has(j) == false {
				TRACE(4, "No prefixes  of size "+strconv.Itoa(i), MOD_AFFIX)
				continue
			}

			formSuf = lws[ln-i:]
			formPref = lws[0:j]

			rulesS = suff[formSuf]
			if rulesS.Size() == 0 || rulesS.List()[0].(string) != formSuf {
				TRACE(3, "No rules for suffix "+formSuf+" (size "+strconv.Itoa(i), MOD_AFFIX)
				continue
			}

			rulesP = suff[formPref]
			if rulesP.Size() == 0 || rulesP.List()[0].(string) != formPref {
				TRACE(3, "No rules for prefix "+formPref+" (size "+strconv.Itoa(i), MOD_AFFIX)
				continue
			}

			formRoot = lws[0 : ln-i][j:]
			TRACE(3, "Trying a decomposition: "+formPref+"+"+formRoot+"+"+formSuf, MOD_AFFIX)

			TRACE(3, "Found "+strconv.Itoa(rulesS.Size())+" rules for suffix "+formSuf+" (size "+strconv.Itoa(i), MOD_AFFIX)
			TRACE(3, "Found "+strconv.Itoa(rulesP.Size())+" rules for prefix "+formPref+" (size "+strconv.Itoa(i), MOD_AFFIX)

			//wfid := w.foundInDict()

			lrulesS := rulesS.List()
			lrulesP := rulesP.List()

			for s := 0; s < rulesS.Size(); s++ {
				sufit := lrulesS[s].(*sufrule)
				for p := 0; p < rulesP.Size(); p++ {
					prefit := lrulesP[p].(*sufrule)
					candidates = set.New()
					cand1 = this.GenerateRoots(SUF, sufit, formRoot)
					this.accen.FixAccentutation(cand1, sufit)
					lcand1 := cand1.List()
					for _, c := range lcand1 {
						cand2 := this.GenerateRoots(PREF, prefit, c.(string))
						this.accen.FixAccentutation(cand2, prefit)
						candidates.Add(cand2)
					}
				}
			}
		}
	}
}
コード例 #18
0
ファイル: tokenizer.go プロジェクト: payfriendz/go-freeling
func NewTokenizer(tokenizerFile string) *Tokenizer {
	this := Tokenizer{
		abrevs:  set.New(),
		rules:   list.New(),
		matches: make(map[string]int),
	}

	cfg := NewConfigFile(false, "##")
	cfg.AddSection("Macros", TOKENIZER_MACROS)
	cfg.AddSection("RegExps", TOKENIZER_REGEXPS)
	cfg.AddSection("Abbreviations", TOKENIZER_ABBREV)

	if !cfg.Open(tokenizerFile) {
		LOG.Panic("Error opening file " + tokenizerFile)
	}

	macros := list.New()
	rul := false
	var ci string
	line := ""
	for cfg.GetContentLine(&line) {
		items := Split(line, " ")
		switch cfg.GetSection() {
		case TOKENIZER_MACROS:
			{
				if rul {
					LOG.Panic("Error reading tokenizer configuration. Macros must be defined before rules.")
				}
				mname := items[0]
				mvalue := items[1]
				macros.PushBack(Pair{mname, mvalue})
				LOG.Trace("Read macro " + mname + ": " + mvalue)
				break
			}
		case TOKENIZER_REGEXPS:
			{
				var substr int
				comm := items[0]
				substr, _ = strconv.Atoi(items[1])
				re := items[2]
				rul = true

				for i := macros.Front(); i != nil; i = i.Next() {
					mname := "{" + i.Value.(Pair).first.(string) + "}"
					mvalue := i.Value.(Pair).second.(string)
					p := strings.Index(re, mname)
					for p > -1 {
						re = strings.Replace(re, mname, mvalue, -1)
						p = strings.Index(re[p:], mname)
					}
				}

				if len(items) > 3 {
					ci = items[3]
				}

				if ci == "CI" {
					newre := "(?i)" + re
					x, err := regexp.Compile(newre)
					if err == nil {
						this.rules.PushBack(Pair{comm, x})
					} else {
						LOG.Warn("Rule " + comm + " [" + newre + "] failed to be compiled")
					}
				} else {
					x, err := regexp.Compile(re)
					if err == nil {
						this.rules.PushBack(Pair{comm, x})
					} else {
						LOG.Warn("Rule " + comm + " [" + re + "] failed to be compiled")
					}
				}

				this.matches[comm] = substr
				LOG.Trace("Stored rule " + comm + " " + re + " " + strconv.Itoa(substr))
				break

			}
		case TOKENIZER_ABBREV:
			{
				this.abrevs.Add(line)
				break
			}
		default:
			break
		}
	}

	LOG.Trace("analyzer succesfully created")
	return &this
}
コード例 #19
0
ファイル: splitter.go プロジェクト: payfriendz/go-freeling
func NewSplitter(splitterFile string) *Splitter {
	this := Splitter{
		starters: set.New(),
		enders:   make(map[string]bool),
		markers:  make(map[string]int),
	}

	cfg := NewConfigFile(false, "##")
	cfg.AddSection("General", SPLITTER_GENERAL)
	cfg.AddSection("Markers", SPLITTER_MARKERS)
	cfg.AddSection("SentenceEnd", SPLITTER_SENT_END)
	cfg.AddSection("SentenceStart", SPLITTER_SENT_START)

	if !cfg.Open(splitterFile) {
		CRASH("Error opening file "+splitterFile, MOD_SPLITTER)
	}

	this.SPLIT_AllowBetweenMarkers = true
	this.SPLIT_MaxWords = 0

	nmk := 1
	line := ""

	for cfg.GetContentLine(&line) {
		items := Split(line, " ")
		switch cfg.GetSection() {
		case SPLITTER_GENERAL:
			{
				name := items[0]
				if name == "AllowBetweenMarkers" {
					this.SPLIT_AllowBetweenMarkers, _ = strconv.ParseBool(items[1])
				} else if name == "MaxWords" {
					this.SPLIT_MaxWords, _ = strconv.ParseInt(items[1], 10, 64)
				} else {
					LOG.Panic("Unexpected splitter option " + name)
				}
				break
			}
		case SPLITTER_MARKERS:
			{
				open := items[0]
				close := items[1]
				if open != close {
					this.markers[open] = nmk
					this.markers[close] = -nmk
				} else {
					this.markers[open] = SAME + nmk
					this.markers[close] = SAME + nmk
				}
				nmk++
				break
			}
		case SPLITTER_SENT_END:
			{
				name := items[0]
				value, _ := strconv.ParseBool(items[1])
				this.enders[name] = !value
				break
			}
		case SPLITTER_SENT_START:
			{
				this.starters.Add(line)
				break
			}
		default:
			break
		}
	}

	LOG.Trace("Analyzer succesfully created")
	return &this
}
コード例 #20
0
ファイル: locutions.go プロジェクト: payfriendz/go-freeling
func (this *Locutions) ComputeToken(state int, j *list.Element, se *Sentence) int {
	st := se.getProcessingStatus().(*LocutionStatus)
	if st.components == nil {
		st.components = make([]*Word, 0)
	}
	st.components = append(st.components, j.Value.(*Word))
	var form, lem, tag string
	form = j.Value.(*Word).getLCForm()

	token := LOCUTIONS_TK_other

	acc := set.New()
	mw := false
	pref := false

	if j.Value.(*Word).Len() == 0 {
		LOG.Trace("checking (" + form + ")")
		if st.accMW.Size() == 0 {
			this.check(form, acc, &mw, &pref, st)
		} else {
			for _, i := range st.accMW.List() {
				LOG.Trace("   acc_mw: [" + i.(string) + "]")
				this.check(i.(string)+"_"+form, acc, &mw, &pref, st)
			}
		}
	} else {
		first := j.Value.(*Word).Front()

		if this.onlySelected {
			first = j.Value.(*Word).selectedBegin(0).Element
			LOG.Trace("Only selected is set.")
		}
		for a := first; a != nil; a = a.Next() {
			bm := false
			bp := false
			lem = "<" + a.Value.(*Analysis).getLemma() + ">"
			tag = a.Value.(*Analysis).getTag()
			if this.Tags != nil {
				tag = this.Tags.GetShortTag(tag)
			}
			LOG.Trace("checking (" + form + "," + lem + "," + tag + ")")
			if st.accMW.Size() == 0 {
				this.check(form, acc, &bm, &bp, st)
				this.check(lem, acc, &bm, &bp, st)
				if this.check(tag, acc, &bm, &bp, st) {
					j.Value.(*Word).unselectAllAnalysis(0)
					a.Value.(*Analysis).markSelected(0)
				}

				mw = mw || bm
				pref = pref || bp
			} else {
				for _, i := range st.accMW.List() {
					LOG.Trace("   acc_mw: [" + i.(string) + "]")
					this.check(i.(string)+"_"+form, acc, &bm, &bp, st)
					this.check(i.(string)+"_"+lem, acc, &bm, &bp, st)
					if this.check(i.(string)+"_"+tag, acc, &bm, &bp, st) {
						j.Value.(*Word).unselectAllAnalysis(0)
						a.Value.(*Analysis).markSelected(0)
					}
					mw = mw || bm
					pref = pref || bp
				}
			}
		}
	}

	LOG.Trace("  fora :" + If(mw, "MW", "noMW").(string) + "," + If(pref, "PREF", "noPref").(string))
	if mw {
		token = LOCUTIONS_TK_mw
	} else if pref {
		token = LOCUTIONS_TK_pref
	}

	st.overLongest++
	st.accMW = acc

	LOG.Trace("Encoded word: [" + form + "," + lem + "," + tag + "] token=" + strconv.Itoa(token))
	return token
}
コード例 #21
0
ファイル: locutions.go プロジェクト: payfriendz/go-freeling
func NewLocutions(locFile string) *Locutions {
	this := Locutions{
		locut:    make(map[string]string),
		prefixes: set.New(),
	}

	/*
		cfg := NewConfigFile(false, "##")
		cfg.AddSection("TagSetFile", LOCUTIONS_TAGSET)
		cfg.AddSection("Multiwords", LOCUTIONS_MULTIWORDS)
		cfg.AddSection("OnlySelected", LOCUTIONS_ONLYSELECTED)
	*/
	filestr, err := ioutil.ReadFile(locFile)
	if err != nil {
		LOG.Panic("Error opening file " + locFile)
	}
	lines := strings.Split(string(filestr), "\n")

	for _, line := range lines {
		this.addLocution(line)
	}

	/*
		if !cfg.Open(locFile) {
			CRASH("Error opening file " + locFile, MOD_LOCUTIONS)
		}

		line := ""
		for cfg.GetContentLine(&line) {
			switch cfg.GetSection() {
				case LOCUTIONS_MULTIWORDS: {
					this.addLocution(line)
					break
				}
				case LOCUTIONS_TAGSET: {
					path := locFile[0:strings.LastIndex(locFile, "/")]
					this.Tags = NewTagset(path + "/" + strings.Replace(line, "./", "", -1))
					break
				}
				case LOCUTIONS_ONLYSELECTED: {
					this.onlySelected = (line == "yes" || line == "true")
					break
				}
			default:
				break
			}
		}
	*/

	this.initialState = LOCUTIONS_ST_P
	this.stopState = LOCUTIONS_ST_STOP
	if this.final == nil {
		this.final = set.New()
	}
	this.final.Add(LOCUTIONS_ST_M)
	var s, t int
	for s = 0; s < AUTOMAT_MAX_STATES; s++ {
		for t = 0; t < AUTOMAT_MAX_TOKENS; t++ {
			this.trans[s][t] = LOCUTIONS_ST_STOP
		}
	}

	this.trans[LOCUTIONS_ST_P][LOCUTIONS_TK_pref] = LOCUTIONS_ST_P
	this.trans[LOCUTIONS_ST_P][LOCUTIONS_TK_prefL] = LOCUTIONS_ST_P
	this.trans[LOCUTIONS_ST_P][LOCUTIONS_TK_prefP] = LOCUTIONS_ST_P
	this.trans[LOCUTIONS_ST_P][LOCUTIONS_TK_mw] = LOCUTIONS_ST_M
	this.trans[LOCUTIONS_ST_P][LOCUTIONS_TK_mwL] = LOCUTIONS_ST_M
	this.trans[LOCUTIONS_ST_P][LOCUTIONS_TK_mwP] = LOCUTIONS_ST_M

	this.trans[LOCUTIONS_ST_M][LOCUTIONS_TK_pref] = LOCUTIONS_ST_P
	this.trans[LOCUTIONS_ST_M][LOCUTIONS_TK_prefL] = LOCUTIONS_ST_P
	this.trans[LOCUTIONS_ST_M][LOCUTIONS_TK_prefP] = LOCUTIONS_ST_P
	this.trans[LOCUTIONS_ST_M][LOCUTIONS_TK_mw] = LOCUTIONS_ST_M
	this.trans[LOCUTIONS_ST_M][LOCUTIONS_TK_mwL] = LOCUTIONS_ST_M
	this.trans[LOCUTIONS_ST_M][LOCUTIONS_TK_mwP] = LOCUTIONS_ST_M

	LOG.Trace("analyzer succesfully created")

	return &this
}
コード例 #22
0
ファイル: tag-set.go プロジェクト: payfriendz/go-freeling
func NewTagset(ftagset string) *TagSet {
	this := &TagSet{
		PAIR_SEP:  "=",
		MSD_SEP:   "|",
		feat:      make(map[string]string),
		val:       make(map[string]string),
		name:      make(map[string]string),
		nameInv:   make(map[string]string),
		direct:    make(map[string]*Pair),
		directInv: make(map[*set.Set]string),
		valInv:    make(map[string]string),
		shtagSize: make(map[string]*list.List),
	}
	cfg := NewConfigFile(false, "##")
	cfg.AddSection("DirectTranslations", DIRECT_TRANSLATIONS)
	cfg.AddSection("DecompositionRules", DECOMPOSITION_RULES)

	if !cfg.Open(ftagset) {
		CRASH("Error opening file "+ftagset, MOD_TAG_SET)
	}

	line := ""
	for cfg.GetContentLine(&line) {
		items := Split(line, " ")
		switch cfg.section {
		case DIRECT_TRANSLATIONS:
			{
				tag := items[0]
				shtag := items[1]
				msd := ""
				if len(items) > 2 {
					msd = items[2]
				}
				this.direct[tag] = &Pair{shtag, msd}
				this.directInv[set.New(msd, this.MSD_SEP)] = tag
				break
			}
		case DECOMPOSITION_RULES:
			{
				cat := items[0]
				shsz := items[1]
				if len(items) > 2 {
					pos := items[2]
					this.name[cat] = pos
					this.nameInv[pos] = cat
				}
				this.shtagSize[cat] = list.New()
				tokens := strings.Split(shsz, ",")
				for _, sitem := range tokens {
					item, _ := strconv.Atoi(sitem)
					this.shtagSize[cat].PushBack(item)
				}
				//TRACE(3, fmt.Sprintf("Read short tag size for %s (%s) %s\n", cat, pos, shsz), MOD_TAG_SET)
				i := 1
				if len(items) > 4 {
					msd := items[4]
					key := cat + "#" + strconv.Itoa(i)
					k := strings.Split(msd, "/")
					this.feat[key] = k[0]
					this.feat[cat+"#"+k[0]] = strconv.Itoa(i)
					v := strings.Split(k[1], ";")
					for j := 0; j < len(v); j++ {
						t := strings.Split(v[j], ":")
						this.val[key+"#"+strings.ToUpper(t[0])] = t[1]
						this.valInv[key+"#"+t[1]] = strings.ToUpper(t[0])
					}

					i++
				}
				break
			}
		default:
			break
		}
	}

	TRACE(1, "Module created successfully", MOD_HMM)

	return this
}