コード例 #1
0
ファイル: automaton.go プロジェクト: kiskovacs/golucene
func newEmptyAutomaton() *Automaton {
	return &Automaton{
		deterministic: true,
		curState:      -1,
		isAccept:      util.NewOpenBitSet(),
	}
}
コード例 #2
0
// Minimizes the given automaton using Hopcroft's alforithm.
func minimizeHopcroft(a *Automaton) *Automaton {
	if a.numStates() == 0 || !a.IsAccept(0) && a.numTransitions(0) == 0 {
		// fastmatch for common case
		return newEmptyAutomaton()
	}
	a = determinize(a)
	if a.numTransitions(0) == 1 {
		t := newTransition()
		a.transition(0, 0, t)
		if t.dest == 0 && t.min == MIN_CODE_POINT &&
			t.max == unicode.MaxRune {
			// accepts all strings
			return a
		}
	}
	a = totalize(a)

	// initialize data structure
	sigma := a.startPoints()
	sigmaLen, statesLen := len(sigma), a.numStates()

	reverse := make([][][]int, statesLen)
	for i, _ := range reverse {
		reverse[i] = make([][]int, sigmaLen)
	}
	partition := make([]map[int]bool, statesLen)
	splitblock := make([][]int, statesLen)
	block := make([]int, statesLen)
	active := make([][]*StateList, statesLen)
	for i, _ := range active {
		active[i] = make([]*StateList, sigmaLen)
	}
	active2 := make([][]*StateListNode, statesLen)
	for i, _ := range active2 {
		active2[i] = make([]*StateListNode, sigmaLen)
	}
	pending := list.New()
	pending2 := util.NewOpenBitSet() // sigmaLen * statesLen bits
	split := util.NewOpenBitSet()    // statesLen bits
	refine := util.NewOpenBitSet()   // statesLen bits
	refine2 := util.NewOpenBitSet()  // statesLen bits
	for q, _ := range splitblock {
		partition[q] = make(map[int]bool)
		for x, _ := range active[q] {
			active[q][x] = new(StateList)
		}
	}
	// find initial partition and reverse edges
	for q := 0; q < statesLen; q++ {
		j := or(a.IsAccept(q), 0, 1).(int)
		partition[j][q] = true
		block[q] = j
		for x, v := range sigma {
			n := a.step(q, v)
			assert2(n >= 0 && n < len(reverse), "%v", n)
			r := reverse[a.step(q, v)]
			r[x] = append(r[x], q)
		}
	}
	// initialize active sets
	for j := 0; j <= 1; j++ {
		for x := 0; x < sigmaLen; x++ {
			for q, _ := range partition[j] {
				if reverse[q][x] != nil {
					active2[q][x] = active[j][x].add(q)
				}
			}
		}
	}
	// initialize pending
	for x := 0; x < sigmaLen; x++ {
		j := or(active[0][x].size <= active[1][x].size, 0, 1).(int)
		pending.PushBack(&IntPair{j, x})
		pending2.Set(int64(x*statesLen + j))
	}
	// process pending until fixed point
	k := 2
	// fmt.Println("start min")
	for pending.Len() > 0 {
		// fmt.Println("  cycle pending")
		ip := pending.Remove(pending.Front()).(*IntPair)
		p, x := ip.n1, ip.n2
		// fmt.Printf("    pop n1=%v n2=%v\n", ip.n1, ip.n2)
		pending2.Clear(int64(x*statesLen + p))
		// find states that need to be split off their blocks
		for m := active[p][x].first; m != nil; m = m.next {
			if r := reverse[m.q][x]; r != nil {
				for _, i := range r {
					if !split.Get(int64(i)) {
						split.Set(int64(i))
						j := block[i]
						splitblock[j] = append(splitblock[j], i)
						if !refine2.Get(int64(j)) {
							refine2.Set(int64(j))
							refine.Set(int64(j))
						}
					}
				}
			}
		}
		// refine blocks
		for j := int(refine.NextSetBit(0)); j >= 0; j = int(refine.NextSetBit(int64(j) + 1)) {
			sb := splitblock[j]
			if len(sb) < len(partition[j]) {
				b1, b2 := partition[j], partition[k]
				for _, s := range sb {
					delete(b1, s)
					b2[s] = true
					block[s] = k
					for c, sn := range active2[s] {
						if sn != nil && sn.sl == active[j][c] {
							sn.remove()
							active2[s][c] = active[k][c].add(s)
						}
					}
				}
				// update pending
				for c, _ := range active[j] {
					aj := active[j][c].size
					ak := active[k][c].size
					ofs := int64(c * statesLen)
					if !pending2.Get(ofs+int64(j)) && 0 < aj && aj <= ak {
						pending2.Set(ofs + int64(j))
						pending.PushBack(&IntPair{j, c})
					} else {
						pending2.Set(ofs + int64(k))
						pending.PushBack(&IntPair{k, c})
					}
				}
				k++
			}
			refine2.Clear(int64(j))
			for _, s := range sb {
				split.Clear(int64(s))
			}
			splitblock[j] = nil // clear sb
		}
		refine = util.NewOpenBitSet() // not quite efficient
	}

	ans := newEmptyAutomaton()
	t := newTransition()
	// fmt.Printf("  k=%v\n", k)

	// make a new state for each equivalence class, set initial state
	stateMap := make([]int, statesLen)
	stateRep := make([]int, k)

	ans.createState()

	// fmt.Printf("min: k=%v\n", k)
	for n := 0; n < k; n++ {
		// fmt.Printf("    n=%v\n", n)

		isInitial := false
		for q, _ := range partition[n] {
			if q == 0 {
				isInitial = true
				// fmt.Println("    isInitial!")
				break
			}
		}

		newState := 0
		if !isInitial {
			newState = ans.createState()
		}

		// fmt.Printf("  newState=%v\n", newState)

		for q, _ := range partition[n] {
			stateMap[q] = newState
			// fmt.Printf("      q=%v isAccept?=%v\n", q, a.IsAccept(q))
			ans.setAccept(newState, a.IsAccept(q))
			stateRep[newState] = q // select representative
		}
	}

	// build transitions and set acceptance
	for n := 0; n < k; n++ {
		numTransitions := a.initTransition(stateRep[n], t)
		for i := 0; i < numTransitions; i++ {
			a.nextTransition(t)
			// fmt.Println("  add trans")
			ans.addTransitionRange(n, stateMap[t.dest], t.min, t.max)
		}
	}
	ans.finishState()
	// fmt.Printf("%v states\n", ans.numStates())

	return removeDeadStates(ans)
}
コード例 #3
0
ファイル: automaton_test.go プロジェクト: kiskovacs/golucene
/*
Simple original brics implementation of determinize()
Determinizes the given automaton using the given set of initial states.
*/
func determinizeSimple(a *Automaton, initialset map[int]bool) *Automaton {
	if a.numStates() == 0 {
		return a
	}
	points := a.startPoints()
	// subset construction
	sets := make(map[string]bool)
	hash := func(sets map[int]bool) string {
		n := util.NewOpenBitSet()
		for k, _ := range sets {
			n.Set(int64(k))
		}
		return n.String()
	}
	worklist := list.New()
	newstate := make(map[string]int)
	sets[hash(initialset)] = true
	worklist.PushBack(initialset)
	b := newAutomatonBuilder()
	b.createState()
	newstate[hash(initialset)] = 0
	t := newTransition()
	for worklist.Len() > 0 {
		s := worklist.Remove(worklist.Front()).(map[int]bool)
		r := newstate[hash(s)]
		for q, _ := range s {
			if a.IsAccept(q) {
				b.setAccept(r, true)
				break
			}
		}
		for n, point := range points {
			p := make(map[int]bool)
			for q, _ := range s {
				count := a.initTransition(q, t)
				for i := 0; i < count; i++ {
					a.nextTransition(t)
					if t.min <= point && point <= t.max {
						p[t.dest] = true
					}
				}
			}

			hashKey := hash(p)
			if _, ok := sets[hashKey]; !ok {
				sets[hashKey] = true
				worklist.PushBack(p)
				newstate[hashKey] = b.createState()
			}
			q := newstate[hashKey]
			min := point
			var max int
			if n+1 < len(points) {
				max = points[n+1] - 1
			} else {
				max = unicode.MaxRune
			}
			b.addTransitionRange(r, q, min, max)
		}
	}

	return removeDeadStates(b.finish())
}