Example #1
0
func computeInverse8(input []int, output []int, shift uint) {
	oIdx := 0
	round := (1 << shift) >> 1

	for i := 0; i < 8; i++ {
		x0 := input[i]
		x1 := input[i+8]
		x2 := input[i+16]
		x3 := input[i+24]
		x4 := input[i+32]
		x5 := input[i+40]
		x6 := input[i+48]
		x7 := input[i+56]

		a0 := (W8_8 * x1) + (W8_24 * x3) + (W8_40 * x5) + (W8_56 * x7)
		a1 := (W8_9 * x1) + (W8_25 * x3) + (W8_41 * x5) + (W8_57 * x7)
		a2 := (W8_10 * x1) + (W8_26 * x3) + (W8_42 * x5) + (W8_58 * x7)
		a3 := (W8_11 * x1) + (W8_27 * x3) + (W8_43 * x5) + (W8_59 * x7)
		a4 := (W8_16 * x2) + (W8_48 * x6)
		a5 := (W8_17 * x2) + (W8_49 * x6)
		a6 := (W8_0 * x0) + (W8_32 * x4)
		a7 := (W8_1 * x0) + (W8_33 * x4)

		b0 := a6 + a4
		b1 := a7 + a5
		b2 := a6 - a4
		b3 := a7 - a5

		c0 := (b0 + a0 + round) >> shift
		c1 := (b1 + a1 + round) >> shift
		c2 := (b3 + a2 + round) >> shift
		c3 := (b2 + a3 + round) >> shift
		c4 := (b2 - a3 + round) >> shift
		c5 := (b3 - a2 + round) >> shift
		c6 := (b1 - a1 + round) >> shift
		c7 := (b0 - a0 + round) >> shift

		output[oIdx] = kanzi.Clamp(c0, MIN_VAL8, MAX_VAL8)
		output[oIdx+1] = kanzi.Clamp(c1, MIN_VAL8, MAX_VAL8)
		output[oIdx+2] = kanzi.Clamp(c2, MIN_VAL8, MAX_VAL8)
		output[oIdx+3] = kanzi.Clamp(c3, MIN_VAL8, MAX_VAL8)
		output[oIdx+4] = kanzi.Clamp(c4, MIN_VAL8, MAX_VAL8)
		output[oIdx+5] = kanzi.Clamp(c5, MIN_VAL8, MAX_VAL8)
		output[oIdx+6] = kanzi.Clamp(c6, MIN_VAL8, MAX_VAL8)
		output[oIdx+7] = kanzi.Clamp(c7, MIN_VAL8, MAX_VAL8)

		oIdx += 8
	}
}
Example #2
0
func computeInverse4(input, output []int, shift uint) {
	round := (1 << shift) >> 1

	x0 := input[0]
	x1 := input[1]
	x2 := input[2]
	x3 := input[3]
	x4 := input[4]
	x5 := input[5]
	x6 := input[6]
	x7 := input[7]
	x8 := input[8]
	x9 := input[9]
	x10 := input[10]
	x11 := input[11]
	x12 := input[12]
	x13 := input[13]
	x14 := input[14]
	x15 := input[15]

	a0 := (W4_4 * x4) + (W4_12 * x12)
	a1 := (W4_5 * x4) + (W4_13 * x12)
	a2 := (W4_0 * x0) + (W4_8 * x8)
	a3 := (W4_1 * x0) + (W4_9 * x8)
	a4 := (W4_4 * x5) + (W4_12 * x13)
	a5 := (W4_5 * x5) + (W4_13 * x13)
	a6 := (W4_0 * x1) + (W4_8 * x9)
	a7 := (W4_1 * x1) + (W4_9 * x9)
	a8 := (W4_4 * x6) + (W4_12 * x14)
	a9 := (W4_5 * x6) + (W4_13 * x14)
	a10 := (W4_0 * x2) + (W4_8 * x10)
	a11 := (W4_1 * x2) + (W4_9 * x10)
	a12 := (W4_4 * x7) + (W4_12 * x15)
	a13 := (W4_5 * x7) + (W4_13 * x15)
	a14 := (W4_0 * x3) + (W4_8 * x11)
	a15 := (W4_1 * x3) + (W4_9 * x11)

	b0 := (a2 + a0 + round) >> shift
	b1 := (a3 + a1 + round) >> shift
	b2 := (a3 - a1 + round) >> shift
	b3 := (a2 - a0 + round) >> shift
	b4 := (a6 + a4 + round) >> shift
	b5 := (a7 + a5 + round) >> shift
	b6 := (a7 - a5 + round) >> shift
	b7 := (a6 - a4 + round) >> shift
	b8 := (a10 + a8 + round) >> shift
	b9 := (a11 + a9 + round) >> shift
	b10 := (a11 - a9 + round) >> shift
	b11 := (a10 - a8 + round) >> shift
	b12 := (a14 + a12 + round) >> shift
	b13 := (a15 + a13 + round) >> shift
	b14 := (a15 - a13 + round) >> shift
	b15 := (a14 - a12 + round) >> shift

	output[0] = kanzi.Clamp(b0, MIN_VAL4, MAX_VAL4)
	output[1] = kanzi.Clamp(b1, MIN_VAL4, MAX_VAL4)
	output[2] = kanzi.Clamp(b2, MIN_VAL4, MAX_VAL4)
	output[3] = kanzi.Clamp(b3, MIN_VAL4, MAX_VAL4)
	output[4] = kanzi.Clamp(b4, MIN_VAL4, MAX_VAL4)
	output[5] = kanzi.Clamp(b5, MIN_VAL4, MAX_VAL4)
	output[6] = kanzi.Clamp(b6, MIN_VAL4, MAX_VAL4)
	output[7] = kanzi.Clamp(b7, MIN_VAL4, MAX_VAL4)
	output[8] = kanzi.Clamp(b8, MIN_VAL4, MAX_VAL4)
	output[9] = kanzi.Clamp(b9, MIN_VAL4, MAX_VAL4)
	output[10] = kanzi.Clamp(b10, MIN_VAL4, MAX_VAL4)
	output[11] = kanzi.Clamp(b11, MIN_VAL4, MAX_VAL4)
	output[12] = kanzi.Clamp(b12, MIN_VAL4, MAX_VAL4)
	output[13] = kanzi.Clamp(b13, MIN_VAL4, MAX_VAL4)
	output[14] = kanzi.Clamp(b14, MIN_VAL4, MAX_VAL4)
	output[15] = kanzi.Clamp(b15, MIN_VAL4, MAX_VAL4)
}
Example #3
0
func computeInverse16(input, output []int, shift uint) {
	oIdx := 0
	round := (1 << shift) >> 1

	for i := 0; i < 16; i++ {
		x0 := input[i]
		x1 := input[i+16]
		x2 := input[i+32]
		x3 := input[i+48]
		x4 := input[i+64]
		x5 := input[i+80]
		x6 := input[i+96]
		x7 := input[i+112]
		x8 := input[i+128]
		x9 := input[i+144]
		x10 := input[i+160]
		x11 := input[i+176]
		x12 := input[i+192]
		x13 := input[i+208]
		x14 := input[i+224]
		x15 := input[i+240]

		a0 := (W16_16 * x1) + (W16_48 * x3) + (W16_80 * x5) + (W16_112 * x7) +
			(W16_144 * x9) + (W16_176 * x11) + (W16_208 * x13) + (W16_240 * x15)
		a1 := (W16_17 * x1) + (W16_49 * x3) + (W16_81 * x5) + (W16_113 * x7) +
			(W16_145 * x9) + (W16_177 * x11) + (W16_209 * x13) + (W16_241 * x15)
		a2 := (W16_18 * x1) + (W16_50 * x3) + (W16_82 * x5) + (W16_114 * x7) +
			(W16_146 * x9) + (W16_178 * x11) + (W16_210 * x13) + (W16_242 * x15)
		a3 := (W16_19 * x1) + (W16_51 * x3) + (W16_83 * x5) + (W16_115 * x7) +
			(W16_147 * x9) + (W16_179 * x11) + (W16_211 * x13) + (W16_243 * x15)
		a4 := (W16_20 * x1) + (W16_52 * x3) + (W16_84 * x5) + (W16_116 * x7) +
			(W16_148 * x9) + (W16_180 * x11) + (W16_212 * x13) + (W16_244 * x15)
		a5 := (W16_21 * x1) + (W16_53 * x3) + (W16_85 * x5) + (W16_117 * x7) +
			(W16_149 * x9) + (W16_181 * x11) + (W16_213 * x13) + (W16_245 * x15)
		a6 := (W16_22 * x1) + (W16_54 * x3) + (W16_86 * x5) + (W16_118 * x7) +
			(W16_150 * x9) + (W16_182 * x11) + (W16_214 * x13) + (W16_246 * x15)
		a7 := (W16_23 * x1) + (W16_55 * x3) + (W16_87 * x5) + (W16_119 * x7) +
			(W16_151 * x9) + (W16_183 * x11) + (W16_215 * x13) + (W16_247 * x15)

		b0 := (W16_32 * x2) + (W16_96 * x6) + (W16_160 * x10) + (W16_224 * x14)
		b1 := (W16_33 * x2) + (W16_97 * x6) + (W16_161 * x10) + (W16_225 * x14)
		b2 := (W16_34 * x2) + (W16_98 * x6) + (W16_162 * x10) + (W16_226 * x14)
		b3 := (W16_35 * x2) + (W16_99 * x6) + (W16_163 * x10) + (W16_227 * x14)
		b4 := (W16_0 * x0) + (W16_128 * x8) + (W16_64 * x4) + (W16_192 * x12)
		b5 := (W16_0 * x0) + (W16_128 * x8) - (W16_64 * x4) - (W16_192 * x12)
		b6 := (W16_1 * x0) + (W16_129 * x8) + (W16_65 * x4) + (W16_193 * x12)
		b7 := (W16_1 * x0) + (W16_129 * x8) - (W16_65 * x4) - (W16_193 * x12)

		c0 := b4 + b0
		c1 := b6 + b1
		c2 := b7 + b2
		c3 := b5 + b3
		c4 := b5 - b3
		c5 := b7 - b2
		c6 := b6 - b1
		c7 := b4 - b0

		d0 := (c0 + a0 + round) >> shift
		d1 := (c1 + a1 + round) >> shift
		d2 := (c2 + a2 + round) >> shift
		d3 := (c3 + a3 + round) >> shift
		d4 := (c4 + a4 + round) >> shift
		d5 := (c5 + a5 + round) >> shift
		d6 := (c6 + a6 + round) >> shift
		d7 := (c7 + a7 + round) >> shift
		d8 := (c7 - a7 + round) >> shift
		d9 := (c6 - a6 + round) >> shift
		d10 := (c5 - a5 + round) >> shift
		d11 := (c4 - a4 + round) >> shift
		d12 := (c3 - a3 + round) >> shift
		d13 := (c2 - a2 + round) >> shift
		d14 := (c1 - a1 + round) >> shift
		d15 := (c0 - a0 + round) >> shift

		output[oIdx] = kanzi.Clamp(d0, MIN_VAL16, MAX_VAL16)
		output[oIdx+1] = kanzi.Clamp(d1, MIN_VAL16, MAX_VAL16)
		output[oIdx+2] = kanzi.Clamp(d2, MIN_VAL16, MAX_VAL16)
		output[oIdx+3] = kanzi.Clamp(d3, MIN_VAL16, MAX_VAL16)
		output[oIdx+4] = kanzi.Clamp(d4, MIN_VAL16, MAX_VAL16)
		output[oIdx+5] = kanzi.Clamp(d5, MIN_VAL16, MAX_VAL16)
		output[oIdx+6] = kanzi.Clamp(d6, MIN_VAL16, MAX_VAL16)
		output[oIdx+7] = kanzi.Clamp(d7, MIN_VAL16, MAX_VAL16)
		output[oIdx+8] = kanzi.Clamp(d8, MIN_VAL16, MAX_VAL16)
		output[oIdx+9] = kanzi.Clamp(d9, MIN_VAL16, MAX_VAL16)
		output[oIdx+10] = kanzi.Clamp(d10, MIN_VAL16, MAX_VAL16)
		output[oIdx+11] = kanzi.Clamp(d11, MIN_VAL16, MAX_VAL16)
		output[oIdx+12] = kanzi.Clamp(d12, MIN_VAL16, MAX_VAL16)
		output[oIdx+13] = kanzi.Clamp(d13, MIN_VAL16, MAX_VAL16)
		output[oIdx+14] = kanzi.Clamp(d14, MIN_VAL16, MAX_VAL16)
		output[oIdx+15] = kanzi.Clamp(d15, MIN_VAL16, MAX_VAL16)

		oIdx += 16
	}
}
Example #4
0
func computeInverse32(input, output []int, shift uint) {
	oIdx := 0
	round := (1 << shift) >> 1

	for i := 0; i < 32; i++ {
		x0 := input[i]
		x1 := input[i+32]
		x2 := input[i+64]
		x3 := input[i+96]
		x4 := input[i+128]
		x5 := input[i+160]
		x6 := input[i+192]
		x7 := input[i+224]
		x8 := input[i+256]
		x9 := input[i+288]
		x10 := input[i+320]
		x11 := input[i+352]
		x12 := input[i+384]
		x13 := input[i+416]
		x14 := input[i+448]
		x15 := input[i+480]
		x16 := input[i+512]
		x17 := input[i+544]
		x18 := input[i+576]
		x19 := input[i+608]
		x20 := input[i+640]
		x21 := input[i+672]
		x22 := input[i+704]
		x23 := input[i+736]
		x24 := input[i+768]
		x25 := input[i+800]
		x26 := input[i+832]
		x27 := input[i+864]
		x28 := input[i+896]
		x29 := input[i+928]
		x30 := input[i+960]
		x31 := input[i+992]

		a0 := (w[32] * x1) + (w[96] * x3) + (w[160] * x5) + (w[224] * x7) +
			(w[288] * x9) + (w[352] * x11) + (w[416] * x13) + (w[480] * x15) +
			(w[544] * x17) + (w[608] * x19) + (w[672] * x21) + (w[736] * x23) +
			(w[800] * x25) + (w[864] * x27) + (w[928] * x29) + (w[992] * x31)
		a1 := (w[33] * x1) + (w[97] * x3) + (w[161] * x5) + (w[225] * x7) +
			(w[289] * x9) + (w[353] * x11) + (w[417] * x13) + (w[481] * x15) +
			(w[545] * x17) + (w[609] * x19) + (w[673] * x21) + (w[737] * x23) +
			(w[801] * x25) + (w[865] * x27) + (w[929] * x29) + (w[993] * x31)
		a2 := (w[34] * x1) + (w[98] * x3) + (w[162] * x5) + (w[226] * x7) +
			(w[290] * x9) + (w[354] * x11) + (w[418] * x13) + (w[482] * x15) +
			(w[546] * x17) + (w[610] * x19) + (w[674] * x21) + (w[738] * x23) +
			(w[802] * x25) + (w[866] * x27) + (w[930] * x29) + (w[994] * x31)
		a3 := (w[35] * x1) + (w[99] * x3) + (w[163] * x5) + (w[227] * x7) +
			(w[291] * x9) + (w[355] * x11) + (w[419] * x13) + (w[483] * x15) +
			(w[547] * x17) + (w[611] * x19) + (w[675] * x21) + (w[739] * x23) +
			(w[803] * x25) + (w[867] * x27) + (w[931] * x29) + (w[995] * x31)
		a4 := (w[36] * x1) + (w[100] * x3) + (w[164] * x5) + (w[228] * x7) +
			(w[292] * x9) + (w[356] * x11) + (w[420] * x13) + (w[484] * x15) +
			(w[548] * x17) + (w[612] * x19) + (w[676] * x21) + (w[740] * x23) +
			(w[804] * x25) + (w[868] * x27) + (w[932] * x29) + (w[996] * x31)
		a5 := (w[37] * x1) + (w[101] * x3) + (w[165] * x5) + (w[229] * x7) +
			(w[293] * x9) + (w[357] * x11) + (w[421] * x13) + (w[485] * x15) +
			(w[549] * x17) + (w[613] * x19) + (w[677] * x21) + (w[741] * x23) +
			(w[805] * x25) + (w[869] * x27) + (w[933] * x29) + (w[997] * x31)
		a6 := (w[38] * x1) + (w[102] * x3) + (w[166] * x5) + (w[230] * x7) +
			(w[294] * x9) + (w[358] * x11) + (w[422] * x13) + (w[486] * x15) +
			(w[550] * x17) + (w[614] * x19) + (w[678] * x21) + (w[742] * x23) +
			(w[806] * x25) + (w[870] * x27) + (w[934] * x29) + (w[998] * x31)
		a7 := (w[39] * x1) + (w[103] * x3) + (w[167] * x5) + (w[231] * x7) +
			(w[295] * x9) + (w[359] * x11) + (w[423] * x13) + (w[487] * x15) +
			(w[551] * x17) + (w[615] * x19) + (w[679] * x21) + (w[743] * x23) +
			(w[807] * x25) + (w[871] * x27) + (w[935] * x29) + (w[999] * x31)
		a8 := (w[40] * x1) + (w[104] * x3) + (w[168] * x5) + (w[232] * x7) +
			(w[296] * x9) + (w[360] * x11) + (w[424] * x13) + (w[488] * x15) +
			(w[552] * x17) + (w[616] * x19) + (w[680] * x21) + (w[744] * x23) +
			(w[808] * x25) + (w[872] * x27) + (w[936] * x29) + (w[1000] * x31)
		a9 := (w[41] * x1) + (w[105] * x3) + (w[169] * x5) + (w[233] * x7) +
			(w[297] * x9) + (w[361] * x11) + (w[425] * x13) + (w[489] * x15) +
			(w[553] * x17) + (w[617] * x19) + (w[681] * x21) + (w[745] * x23) +
			(w[809] * x25) + (w[873] * x27) + (w[937] * x29) + (w[1001] * x31)
		a10 := (w[42] * x1) + (w[106] * x3) + (w[170] * x5) + (w[234] * x7) +
			(w[298] * x9) + (w[362] * x11) + (w[426] * x13) + (w[490] * x15) +
			(w[554] * x17) + (w[618] * x19) + (w[682] * x21) + (w[746] * x23) +
			(w[810] * x25) + (w[874] * x27) + (w[938] * x29) + (w[1002] * x31)
		a11 := (w[43] * x1) + (w[107] * x3) + (w[171] * x5) + (w[235] * x7) +
			(w[299] * x9) + (w[363] * x11) + (w[427] * x13) + (w[491] * x15) +
			(w[555] * x17) + (w[619] * x19) + (w[683] * x21) + (w[747] * x23) +
			(w[811] * x25) + (w[875] * x27) + (w[939] * x29) + (w[1003] * x31)
		a12 := (w[44] * x1) + (w[108] * x3) + (w[172] * x5) + (w[236] * x7) +
			(w[300] * x9) + (w[364] * x11) + (w[428] * x13) + (w[492] * x15) +
			(w[556] * x17) + (w[620] * x19) + (w[684] * x21) + (w[748] * x23) +
			(w[812] * x25) + (w[876] * x27) + (w[940] * x29) + (w[1004] * x31)
		a13 := (w[45] * x1) + (w[109] * x3) + (w[173] * x5) + (w[237] * x7) +
			(w[301] * x9) + (w[365] * x11) + (w[429] * x13) + (w[493] * x15) +
			(w[557] * x17) + (w[621] * x19) + (w[685] * x21) + (w[749] * x23) +
			(w[813] * x25) + (w[877] * x27) + (w[941] * x29) + (w[1005] * x31)
		a14 := (w[46] * x1) + (w[110] * x3) + (w[174] * x5) + (w[238] * x7) +
			(w[302] * x9) + (w[366] * x11) + (w[430] * x13) + (w[494] * x15) +
			(w[558] * x17) + (w[622] * x19) + (w[686] * x21) + (w[750] * x23) +
			(w[814] * x25) + (w[878] * x27) + (w[942] * x29) + (w[1006] * x31)
		a15 := (w[47] * x1) + (w[111] * x3) + (w[175] * x5) + (w[239] * x7) +
			(w[303] * x9) + (w[367] * x11) + (w[431] * x13) + (w[495] * x15) +
			(w[559] * x17) + (w[623] * x19) + (w[687] * x21) + (w[751] * x23) +
			(w[815] * x25) + (w[879] * x27) + (w[943] * x29) + (w[1007] * x31)

		b0 := (w[64] * x2) + (w[192] * x6) + (w[320] * x10) + (w[448] * x14) +
			(w[576] * x18) + (w[704] * x22) + (w[832] * x26) + (w[960] * x30)
		b1 := (w[65] * x2) + (w[193] * x6) + (w[321] * x10) + (w[449] * x14) +
			(w[577] * x18) + (w[705] * x22) + (w[833] * x26) + (w[961] * x30)
		b2 := (w[66] * x2) + (w[194] * x6) + (w[322] * x10) + (w[450] * x14) +
			(w[578] * x18) + (w[706] * x22) + (w[834] * x26) + (w[962] * x30)
		b3 := (w[67] * x2) + (w[195] * x6) + (w[323] * x10) + (w[451] * x14) +
			(w[579] * x18) + (w[707] * x22) + (w[835] * x26) + (w[963] * x30)
		b4 := (w[68] * x2) + (w[196] * x6) + (w[324] * x10) + (w[452] * x14) +
			(w[580] * x18) + (w[708] * x22) + (w[836] * x26) + (w[964] * x30)
		b5 := (w[69] * x2) + (w[197] * x6) + (w[325] * x10) + (w[453] * x14) +
			(w[581] * x18) + (w[709] * x22) + (w[837] * x26) + (w[965] * x30)
		b6 := (w[70] * x2) + (w[198] * x6) + (w[326] * x10) + (w[454] * x14) +
			(w[582] * x18) + (w[710] * x22) + (w[838] * x26) + (w[966] * x30)
		b7 := (w[71] * x2) + (w[199] * x6) + (w[327] * x10) + (w[455] * x14) +
			(w[583] * x18) + (w[711] * x22) + (w[839] * x26) + (w[967] * x30)

		c0 := (w[128] * x4) + (w[384] * x12) + (w[640] * x20) + (w[896] * x28)
		c1 := (w[129] * x4) + (w[385] * x12) + (w[641] * x20) + (w[897] * x28)
		c2 := (w[130] * x4) + (w[386] * x12) + (w[642] * x20) + (w[898] * x28)
		c3 := (w[131] * x4) + (w[387] * x12) + (w[643] * x20) + (w[899] * x28)
		c4 := (w[256] * x8) + (w[768] * x24)
		c5 := (w[257] * x8) + (w[769] * x24)
		c6 := (w[0] * x0) + (w[512] * x16)
		c7 := (w[1] * x0) + (w[513] * x16)
		c8 := c6 + c4
		c9 := c7 + c5
		c10 := c7 - c5
		c11 := c6 - c4

		d0 := c8 + c0
		d1 := c9 + c1
		d2 := c10 + c2
		d3 := c11 + c3
		d4 := c11 - c3
		d5 := c10 - c2
		d6 := c9 - c1
		d7 := c8 - c0

		e0 := d0 + b0
		e1 := d1 + b1
		e2 := d2 + b2
		e3 := d3 + b3
		e4 := d4 + b4
		e5 := d5 + b5
		e6 := d6 + b6
		e7 := d7 + b7
		e8 := d7 - b7
		e9 := d6 - b6
		e10 := d5 - b5
		e11 := d4 - b4
		e12 := d3 - b3
		e13 := d2 - b2
		e14 := d1 - b1
		e15 := d0 - b0

		r0 := (e0 + a0 + round) >> shift
		r1 := (e1 + a1 + round) >> shift
		r2 := (e2 + a2 + round) >> shift
		r3 := (e3 + a3 + round) >> shift
		r4 := (e4 + a4 + round) >> shift
		r5 := (e5 + a5 + round) >> shift
		r6 := (e6 + a6 + round) >> shift
		r7 := (e7 + a7 + round) >> shift
		r8 := (e8 + a8 + round) >> shift
		r9 := (e9 + a9 + round) >> shift
		r10 := (e10 + a10 + round) >> shift
		r11 := (e11 + a11 + round) >> shift
		r12 := (e12 + a12 + round) >> shift
		r13 := (e13 + a13 + round) >> shift
		r14 := (e14 + a14 + round) >> shift
		r15 := (e15 + a15 + round) >> shift
		r16 := (e15 - a15 + round) >> shift
		r17 := (e14 - a14 + round) >> shift
		r18 := (e13 - a13 + round) >> shift
		r19 := (e12 - a12 + round) >> shift
		r20 := (e11 - a11 + round) >> shift
		r21 := (e10 - a10 + round) >> shift
		r22 := (e9 - a9 + round) >> shift
		r23 := (e8 - a8 + round) >> shift
		r24 := (e7 - a7 + round) >> shift
		r25 := (e6 - a6 + round) >> shift
		r26 := (e5 - a5 + round) >> shift
		r27 := (e4 - a4 + round) >> shift
		r28 := (e3 - a3 + round) >> shift
		r29 := (e2 - a2 + round) >> shift
		r30 := (e1 - a1 + round) >> shift
		r31 := (e0 - a0 + round) >> shift

		output[oIdx] = kanzi.Clamp(r0, MIN_VAL32, MAX_VAL32)
		output[oIdx+1] = kanzi.Clamp(r1, MIN_VAL32, MAX_VAL32)
		output[oIdx+2] = kanzi.Clamp(r2, MIN_VAL32, MAX_VAL32)
		output[oIdx+3] = kanzi.Clamp(r3, MIN_VAL32, MAX_VAL32)
		output[oIdx+4] = kanzi.Clamp(r4, MIN_VAL32, MAX_VAL32)
		output[oIdx+5] = kanzi.Clamp(r5, MIN_VAL32, MAX_VAL32)
		output[oIdx+6] = kanzi.Clamp(r6, MIN_VAL32, MAX_VAL32)
		output[oIdx+7] = kanzi.Clamp(r7, MIN_VAL32, MAX_VAL32)
		output[oIdx+8] = kanzi.Clamp(r8, MIN_VAL32, MAX_VAL32)
		output[oIdx+9] = kanzi.Clamp(r9, MIN_VAL32, MAX_VAL32)
		output[oIdx+10] = kanzi.Clamp(r10, MIN_VAL32, MAX_VAL32)
		output[oIdx+11] = kanzi.Clamp(r11, MIN_VAL32, MAX_VAL32)
		output[oIdx+12] = kanzi.Clamp(r12, MIN_VAL32, MAX_VAL32)
		output[oIdx+13] = kanzi.Clamp(r13, MIN_VAL32, MAX_VAL32)
		output[oIdx+14] = kanzi.Clamp(r14, MIN_VAL32, MAX_VAL32)
		output[oIdx+15] = kanzi.Clamp(r15, MIN_VAL32, MAX_VAL32)
		output[oIdx+16] = kanzi.Clamp(r16, MIN_VAL32, MAX_VAL32)
		output[oIdx+17] = kanzi.Clamp(r17, MIN_VAL32, MAX_VAL32)
		output[oIdx+18] = kanzi.Clamp(r18, MIN_VAL32, MAX_VAL32)
		output[oIdx+19] = kanzi.Clamp(r19, MIN_VAL32, MAX_VAL32)
		output[oIdx+20] = kanzi.Clamp(r20, MIN_VAL32, MAX_VAL32)
		output[oIdx+21] = kanzi.Clamp(r21, MIN_VAL32, MAX_VAL32)
		output[oIdx+22] = kanzi.Clamp(r22, MIN_VAL32, MAX_VAL32)
		output[oIdx+23] = kanzi.Clamp(r23, MIN_VAL32, MAX_VAL32)
		output[oIdx+24] = kanzi.Clamp(r24, MIN_VAL32, MAX_VAL32)
		output[oIdx+25] = kanzi.Clamp(r25, MIN_VAL32, MAX_VAL32)
		output[oIdx+26] = kanzi.Clamp(r26, MIN_VAL32, MAX_VAL32)
		output[oIdx+27] = kanzi.Clamp(r27, MIN_VAL32, MAX_VAL32)
		output[oIdx+28] = kanzi.Clamp(r28, MIN_VAL32, MAX_VAL32)
		output[oIdx+29] = kanzi.Clamp(r29, MIN_VAL32, MAX_VAL32)
		output[oIdx+30] = kanzi.Clamp(r30, MIN_VAL32, MAX_VAL32)
		output[oIdx+31] = kanzi.Clamp(r31, MIN_VAL32, MAX_VAL32)

		oIdx += 32
	}
}