Пример #1
0
func testBytesFind(t *testing.T, line, token []byte, startat int, exp []int) {
	got := []int{}
	tokenlen := len(token)

	for {
		foundat := tekstus.BytesFind(line, token, startat)

		if foundat < 0 {
			break
		}

		got = append(got, foundat)
		startat = foundat + tokenlen
	}

	assert(t, exp, got, true)
}
Пример #2
0
func searchForward(atx, aty int, x, y *int, oldleft, newleft *[]byte) (
	adds, dels tekstus.Chunks,
) {
	oldleftlen := len(*oldleft)
	newleftlen := len(*newleft)

	minlen := DefMatchLen
	if oldleftlen < minlen {
		minlen = oldleftlen
	}

	// Loop through old line to find matching token
	xaty := -1
	xx := 1
	for ; xx < oldleftlen-minlen; xx++ {
		token := (*oldleft)[xx : xx+minlen]

		xaty = tekstus.BytesFind(*newleft, token, 0)
		if xaty > 0 {
			break
		}
	}

	minlen = DefMatchLen
	if newleftlen < minlen {
		minlen = newleftlen
	}

	yatx := -1
	yy := 1
	for ; yy < newleftlen-minlen; yy++ {
		token := (*newleft)[yy : yy+minlen]

		yatx = tekstus.BytesFind(*oldleft, token, 0)
		if yatx > 0 {
			break
		}
	}

	if xaty < 0 && yatx < 0 {
		// still no token found, means whole chunk has been replaced.
		dels = append(dels, tekstus.Chunk{StartAt: atx + *x, V: *oldleft})
		adds = append(adds, tekstus.Chunk{StartAt: aty + *y, V: *newleft})
		*oldleft = []byte{}
		*newleft = []byte{}
		return adds, dels
	}

	// Some chunk has been replaced.
	v := (*oldleft)[:xx]
	dels = append(dels, tekstus.Chunk{StartAt: atx + *x, V: v})
	*oldleft = (*oldleft)[xx:]
	*x = *x + xx

	v = (*newleft)[:yy]
	adds = append(adds, tekstus.Chunk{StartAt: aty + *y, V: v})
	*newleft = (*newleft)[yy:]
	*y = *y + yy

	return adds, dels
}
Пример #3
0
/*
Lines given two similar lines, find and return the differences (additions and
deletion) between them.

Case 1: addition on new or deletion on old.

	old: 00000
	new: 00000111

or

	old: 00000111
	new: 00000

Case 2: addition on new line

	old: 000000
	new: 0001000

Case 3: deletion on old line (reverse of case 2)

	old: 0001000
	new: 000000

Case 4: change happened in the beginning

	old: 11000
	new: 22000

Case 5: both changed

	old: 0001000
	new: 0002000


*/
func Lines(old, new []byte, atx, aty int) (adds, dels tekstus.Chunks) {
	oldlen := len(old)
	newlen := len(new)

	minlen := 0
	if oldlen < newlen {
		minlen = oldlen
	} else {
		minlen = newlen
	}

	// Find the position of unmatched byte from the beginning.
	x, y := 0, 0
	for ; x < minlen; x++ {
		if old[x] != new[x] {
			break
		}
	}
	y = x

	// Case 1: Check if addition or deletion is at the end.
	if x == minlen {
		if oldlen < newlen {
			v := new[y:]
			adds = append(adds, tekstus.Chunk{StartAt: atx + y, V: v})
		} else {
			v := old[x:]
			dels = append(dels, tekstus.Chunk{StartAt: atx + x, V: v})
		}
		return
	}

	// Find the position of unmatched byte from the end
	xend := oldlen - 1
	yend := newlen - 1

	for xend >= x && yend >= y {
		if old[xend] != new[yend] {
			break
		}
		xend--
		yend--
	}

	// Case 2: addition in new line.
	if x == xend+1 {
		v := new[y : yend+1]
		adds = append(adds, tekstus.Chunk{StartAt: aty + y, V: v})
		return
	}

	// Case 3: deletion in old line.
	if y == yend+1 {
		v := old[x : xend+1]
		dels = append(dels, tekstus.Chunk{StartAt: atx + x, V: v})
		return
	}

	// Calculate possible match len.
	// After we found similar bytes in the beginning and end of line, now
	// we have `n` number of bytes left in old and new.
	oldleft := old[x : xend+1]
	newleft := new[y : yend+1]
	oldleftlen := len(oldleft)
	newleftlen := len(newleft)

	// Get minimal token to search in the new left over.
	minlen = DefMatchLen
	if oldleftlen < DefMatchLen {
		minlen = oldleftlen
	}
	xtoken := oldleft[:minlen]

	xaty := tekstus.BytesFind(newleft, xtoken, 0)

	// Get miniminal token to search in the old left over.
	minlen = DefMatchLen
	if newleftlen < DefMatchLen {
		minlen = newleftlen
	}
	ytoken := newleft[:minlen]

	yatx := tekstus.BytesFind(oldleft, ytoken, 0)

	// Case 4:
	// We did not find matching token of x in y, its mean the some chunk
	// in x and y has been replaced.
	if xaty < 0 && yatx < 0 {
		addsleft, delsleft := searchForward(atx, aty, &x, &y, &oldleft,
			&newleft)

		if len(addsleft) > 0 {
			adds = append(adds, addsleft...)
		}
		if len(delsleft) > 0 {
			dels = append(dels, delsleft...)
		}

		// Check for possible empty left
		if len(oldleft) == 0 {
			if len(newleft) > 0 {
				adds = append(adds, tekstus.Chunk{
					StartAt: atx + x,
					V:       newleft,
				})
			}
			return
		}
		if len(newleft) == 0 {
			if len(oldleft) > 0 {
				dels = append(dels, tekstus.Chunk{
					StartAt: aty + y,
					V:       oldleft,
				})
			}
			return
		}
	}

	// Case 5: is combination of case 2 and 3.
	// Case 2: We found x token at y: xaty. Previous byte before that must
	// be an addition.
	if xaty >= 0 {
		v := new[y : y+xaty]
		adds = append(adds, tekstus.Chunk{StartAt: aty + y, V: v})
		newleft = new[y+xaty : yend+1]
	} else {
		if yatx >= 0 {
			// Case 3: We found y token at x: yatx. Previous byte before that must
			// be a deletion.
			v := old[x : x+yatx]
			dels = append(dels, tekstus.Chunk{StartAt: atx + x, V: v})
			oldleft = old[x+yatx : xend+1]
		}
	}

	addsleft, delsleft := Lines(oldleft, newleft, atx+x, aty+y)

	if len(addsleft) > 0 {
		adds = append(adds, addsleft...)
	}
	if len(delsleft) > 0 {
		dels = append(dels, delsleft...)
	}

	return
}
Пример #4
0
/*
BytesRatio compare two slice of bytes and return ratio of matching bytes.
The ratio in in range of 0.0 to 1.0, where 1.0 if both are similar, and 0.0 if
no matchs even found.
`minTokenLen` define the minimum length of token for searching in both of
slice.
*/
func BytesRatio(old, new []byte, minTokenLen int) (
	ratio float32, m int, maxlen int,
) {
	x, y := 0, 0

	oldlen := len(old)
	newlen := len(new)
	minlen := oldlen
	maxlen = newlen
	if newlen < oldlen {
		minlen = newlen
		maxlen = oldlen
	}

	if minTokenLen < 0 {
		minTokenLen = DefMatchLen
	}

	for {
		// Count matching bytes from beginning of slice.
		for x < minlen {
			if old[x] != new[y] {
				break
			}
			m++
			x++
			y++
		}

		if x == minlen {
			// All bytes is matched but probably some trailing in
			// one of them.
			break
		}

		// Count matching bytes from end of slice
		xend := oldlen - 1
		yend := newlen - 1

		for xend >= x && yend >= y {
			if old[xend] != new[yend] {
				break
			}
			m++
			xend--
			yend--
		}

		// One of the line have changes in the middle.
		if xend == x || yend == y {
			break
		}

		// Cut the matching bytes
		old = old[x : xend+1]
		new = new[y : yend+1]
		oldlen = len(old)
		newlen = len(new)

		// Get minimal token to search in the new left over.
		minlen = minTokenLen
		if oldlen < minlen {
			minlen = oldlen
		}

		// Search old token in new, chunk by chunk.
		x = 0
		y = -1
		max := oldlen - minlen
		for ; x < max; x++ {
			token := old[x : x+minlen]

			y = tekstus.BytesFind(new, token, 0)
			if y > 0 {
				break
			}
		}

		if y < 0 {
			// We did not found anything.
			break
		}

		// Cut the changes
		old = old[x:]
		new = new[y:]
		oldlen = len(old)
		newlen = len(new)

		minlen = oldlen
		if newlen < minlen {
			minlen = newlen
		}

		x, y = 0, 0
		// start again from begining...
	}

	ratio = float32(m) / float32(maxlen)

	return ratio, m, maxlen
}