// M256MaskzLzcntEpi64: Counts the number of leading zero bits in each packed // 64-bit integer in 'a', and store the results in 'dst' using zeromask 'k' // (elements are zeroed out when the corresponding mask bit is not set). // // FOR j := 0 to 3 // i := j*64 // IF k[j] // tmp := 63 // dst[i+63:i] := 0 // DO WHILE (tmp >= 0 AND a[i+tmp] == 0) // tmp := tmp - 1 // dst[i+63:i] := dst[i+63:i] + 1 // OD // ELSE // dst[i+63:i] := 0 // FI // ENDFOR // dst[MAX:256] := 0 // // Instruction: 'VPLZCNTQ'. Intrinsic: '_mm256_maskz_lzcnt_epi64'. // Requires AVX512CD. func M256MaskzLzcntEpi64(k x86.Mmask8, a x86.M256i) (dst x86.M256i) { return x86.M256i(m256MaskzLzcntEpi64(uint8(k), [32]byte(a))) }
// M256BroadcastmwEpi32: Broadcast the low 16-bits from input mask 'k' to all // 32-bit elements of 'dst'. // // FOR j := 0 to 7 // i := j*32 // dst[i+31:i] := ZeroExtend(k[15:0]) // ENDFOR // dst[MAX:256] := 0 // // Instruction: 'VPBROADCASTMW2D'. Intrinsic: '_mm256_broadcastmw_epi32'. // Requires AVX512CD. func M256BroadcastmwEpi32(k x86.Mmask16) (dst x86.M256i) { return x86.M256i(m256BroadcastmwEpi32(uint16(k))) }
// M256MaskLzcntEpi64: Counts the number of leading zero bits in each packed // 64-bit integer in 'a', and store the results in 'dst' using writemask 'k' // (elements are copied from 'src' when the corresponding mask bit is not set). // // FOR j := 0 to 3 // i := j*64 // IF k[j] // tmp := 63 // dst[i+63:i] := 0 // DO WHILE (tmp >= 0 AND a[i+tmp] == 0) // tmp := tmp - 1 // dst[i+63:i] := dst[i+63:i] + 1 // OD // ELSE // dst[i+63:i] := src[i+63:i] // FI // ENDFOR // dst[MAX:256] := 0 // // Instruction: 'VPLZCNTQ'. Intrinsic: '_mm256_mask_lzcnt_epi64'. // Requires AVX512CD. func M256MaskLzcntEpi64(src x86.M256i, k x86.Mmask8, a x86.M256i) (dst x86.M256i) { return x86.M256i(m256MaskLzcntEpi64([32]byte(src), uint8(k), [32]byte(a))) }
// M256LzcntEpi64: Counts the number of leading zero bits in each packed 64-bit // integer in 'a', and store the results in 'dst'. // // FOR j := 0 to 3 // i := j*64 // tmp := 63 // dst[i+63:i] := 0 // DO WHILE (tmp >= 0 AND a[i+tmp] == 0) // tmp := tmp - 1 // dst[i+63:i] := dst[i+63:i] + 1 // OD // ENDFOR // dst[MAX:256] := 0 // // Instruction: 'VPLZCNTQ'. Intrinsic: '_mm256_lzcnt_epi64'. // Requires AVX512CD. func M256LzcntEpi64(a x86.M256i) (dst x86.M256i) { return x86.M256i(m256LzcntEpi64([32]byte(a))) }
// M256ConflictEpi64: Test each 64-bit element of 'a' for equality with all // other elements in 'a' closer to the least significant bit. Each element's // comparison forms a zero extended bit vector in 'dst'. // // FOR j := 0 to 3 // i := j*64 // FOR k := 0 to j-1 // m := k*64 // dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0 // ENDFOR // dst[i+63:i+j] := 0 // ENDFOR // dst[MAX:256] := 0 // // Instruction: 'VPCONFLICTQ'. Intrinsic: '_mm256_conflict_epi64'. // Requires AVX512CD. func M256ConflictEpi64(a x86.M256i) (dst x86.M256i) { return x86.M256i(m256ConflictEpi64([32]byte(a))) }
// M256BroadcastmbEpi64: Broadcast the low 8-bits from input mask 'k' to all // 64-bit elements of 'dst'. // // FOR j := 0 to 3 // i := j*64 // dst[i+63:i] := ZeroExtend(k[7:0]) // ENDFOR // dst[MAX:256] := 0 // // Instruction: 'VPBROADCASTMB2Q'. Intrinsic: '_mm256_broadcastmb_epi64'. // Requires AVX512CD. func M256BroadcastmbEpi64(k x86.Mmask8) (dst x86.M256i) { return x86.M256i(m256BroadcastmbEpi64(uint8(k))) }
// M256MaskzConflictEpi32: Test each 32-bit element of 'a' for equality with // all other elements in 'a' closer to the least significant bit using zeromask // 'k' (elements are zeroed out when the corresponding mask bit is not set). // Each element's comparison forms a zero extended bit vector in 'dst'. // // FOR j := 0 to 7 // i := j*32 // IF k[i] // FOR l := 0 to j-1 // m := l*32 // dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 // ENDFOR // dst[i+31:i+j] := 0 // ELSE // dst[i+31:i] := 0 // FI // ENDFOR // dst[MAX:256] := 0 // // Instruction: 'VPCONFLICTD'. Intrinsic: '_mm256_maskz_conflict_epi32'. // Requires AVX512CD. func M256MaskzConflictEpi32(k x86.Mmask8, a x86.M256i) (dst x86.M256i) { return x86.M256i(m256MaskzConflictEpi32(uint8(k), [32]byte(a))) }
// M256MaskConflictEpi32: Test each 32-bit element of 'a' for equality with all // other elements in 'a' closer to the least significant bit using writemask // 'k' (elements are copied from 'src' when the corresponding mask bit is not // set). Each element's comparison forms a zero extended bit vector in 'dst'. // // FOR j := 0 to 7 // i := j*32 // IF k[i] // FOR l := 0 to j-1 // m := l*32 // dst[i+l] := (a[i+31:i] == a[m+31:m]) ? 1 : 0 // ENDFOR // dst[i+31:i+j] := 0 // ELSE // dst[i+31:i] := src[i+31:i] // FI // ENDFOR // dst[MAX:256] := 0 // // Instruction: 'VPCONFLICTD'. Intrinsic: '_mm256_mask_conflict_epi32'. // Requires AVX512CD. func M256MaskConflictEpi32(src x86.M256i, k x86.Mmask8, a x86.M256i) (dst x86.M256i) { return x86.M256i(m256MaskConflictEpi32([32]byte(src), uint8(k), [32]byte(a))) }