/* * Copyright (C) 2020 Collabora, Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* Autogenerated file, do not edit */ #include "compiler.h" static inline unsigned bi_pack_fma_arshift_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src1) & 0x8); static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lane2 = lane2_table[I->src[2].swizzle]; assert(lane2 < 4); return 0x335018 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9); } static inline unsigned bi_pack_fma_arshift_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src1) & 0x8); static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; assert(I->src[2].swizzle < 13); unsigned lanes2 = lanes2_table[I->src[2].swizzle]; assert(lanes2 < 8); if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { unsigned derived_9 = 0; if (lanes2 == 0) derived_9 = 0; else if (lanes2 == 1) derived_9 = 1; else if (lanes2 == 2) derived_9 = 2; else if (lanes2 == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x334818 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { unsigned derived_9 = 0; if (lanes2 == 4) derived_9 = 1; else if (lanes2 == 5) derived_9 = 2; else if (lanes2 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x335818 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); } else { unreachable("No matching state found in fma_arshift_v2i16"); } } static inline unsigned bi_pack_fma_arshift_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src1) & 0x8); static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lanes2 = lanes2_table[I->src[2].swizzle]; assert(lanes2 < 8); if (lanes2 != 0) { unsigned derived_9 = 0; if (lanes2 == 1) derived_9 = 0; else if (lanes2 == 2) derived_9 = 1; else if (lanes2 == 3) derived_9 = 2; else if (lanes2 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x334018 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); } else if (lanes2 == 0) { return 0x335818 | (src0 << 0) | (src1 << 3) | (src2 << 6); } else { unreachable("No matching state found in fma_arshift_v4i8"); } } static inline unsigned bi_pack_fma_arshift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned bytes2 = I->bytes2; assert(bytes2 < 2); static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lane2 = lane2_table[I->src[2].swizzle]; assert(lane2 < 2); unsigned result_word = I->result_word; assert(result_word < 2); return 0x33e000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); } static inline unsigned bi_pack_fma_atom_c_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf3); assert((1 << src1) & 0xf3); assert((1 << src2) & 0xf7); static uint8_t atom_opc_table[] = { 0, 2, 8, 9, 10, 11, 12, 13, 14, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->atom_opc < 16); unsigned atom_opc = atom_opc_table[I->atom_opc]; assert(atom_opc < 16); return 0x2f4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); } static inline unsigned bi_pack_fma_atom_c_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf3); assert((1 << src1) & 0xf3); assert((1 << src2) & 0xf7); static uint8_t atom_opc_table[] = { 3, 2, 8, 9, 10, 11, 12, 13, 14, 0, 1, ~0, ~0, ~0, ~0, ~0 }; assert(I->atom_opc < 16); unsigned atom_opc = atom_opc_table[I->atom_opc]; assert(atom_opc < 16); return 0x2f0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); } static inline unsigned bi_pack_fma_atom_c1_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf3); assert((1 << src1) & 0xf3); static uint8_t atom_opc_table[] = { 5, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4 }; assert(I->atom_opc < 16); unsigned atom_opc = atom_opc_table[I->atom_opc]; assert(atom_opc < 8); return 0x2f5e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); } static inline unsigned bi_pack_fma_atom_c1_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf3); assert((1 << src1) & 0xf3); static uint8_t atom_opc_table[] = { 5, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4 }; assert(I->atom_opc < 16); unsigned atom_opc = atom_opc_table[I->atom_opc]; assert(atom_opc < 8); return 0x2f1e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); } static inline unsigned bi_pack_fma_atom_c1_return_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf3); assert((1 << src1) & 0xf3); static uint8_t atom_opc_table[] = { 5, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4 }; assert(I->atom_opc < 16); unsigned atom_opc = atom_opc_table[I->atom_opc]; assert(atom_opc < 8); return 0x2f7e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); } static inline unsigned bi_pack_fma_atom_c1_return_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf3); assert((1 << src1) & 0xf3); static uint8_t atom_opc_table[] = { 5, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4 }; assert(I->atom_opc < 16); unsigned atom_opc = atom_opc_table[I->atom_opc]; assert(atom_opc < 8); return 0x2f3e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); } static inline unsigned bi_pack_fma_atom_c_return_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf3); assert((1 << src1) & 0xf3); assert((1 << src2) & 0xf7); static uint8_t atom_opc_table[] = { 0, 2, 8, 9, 10, 11, 12, 13, 14, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->atom_opc < 16); unsigned atom_opc = atom_opc_table[I->atom_opc]; assert(atom_opc < 16); return 0x2f6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); } static inline unsigned bi_pack_fma_atom_c_return_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf3); assert((1 << src1) & 0xf3); assert((1 << src2) & 0xf7); static uint8_t atom_opc_table[] = { 3, 2, 8, 9, 10, 11, 12, 13, 14, 0, 1, ~0, ~0, ~0, ~0, ~0 }; assert(I->atom_opc < 16); unsigned atom_opc = atom_opc_table[I->atom_opc]; assert(atom_opc < 16); return 0x2f2000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); } static inline unsigned bi_pack_fma_atom_post_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t atom_opc_table[] = { 0, 2, 8, 9, 10, 11, 12, 13, 14, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->atom_opc < 16); unsigned atom_opc = atom_opc_table[I->atom_opc]; assert(atom_opc < 16); return 0x6ee400 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); } static inline unsigned bi_pack_fma_atom_post_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t atom_opc_table[] = { 3, 2, 8, 9, 10, 11, 12, 13, 14, 0, 1, ~0, ~0, ~0, ~0, ~0 }; assert(I->atom_opc < 16); unsigned atom_opc = atom_opc_table[I->atom_opc]; assert(atom_opc < 16); return 0x6ee000 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); } static inline unsigned bi_pack_fma_atom_pre_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t atom_opc_table[] = { 3, 2, 8, 9, 10, 11, 12, 13, 14, 0, 1, ~0, ~0, ~0, ~0, ~0 }; assert(I->atom_opc < 16); unsigned atom_opc = atom_opc_table[I->atom_opc]; assert(atom_opc < 16); return 0x6ec000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); } static inline unsigned bi_pack_fma_bitrev_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x701fc0 | (src0 << 0); } static inline unsigned bi_pack_fma_clz_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned mask = I->mask; assert(mask < 2); return 0x701fd0 | (src0 << 0) | (mask << 3); } static inline unsigned bi_pack_fma_clz_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned mask = I->mask; assert(mask < 2); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); return 0x701ec0 | (src0 << 0) | (mask << 3) | (swz0 << 4); } static inline unsigned bi_pack_fma_clz_v4u8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned mask = I->mask; assert(mask < 2); return 0x701f90 | (src0 << 0) | (mask << 3); } static inline unsigned bi_pack_fma_csel_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned cmpf = I->cmpf; assert(cmpf < 8); if ((cmpf == 4) || (cmpf == 5)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 4) cmpf = 1; else if (cmpf == 5) cmpf = 2; } if (cmpf == 3) { { unsigned temp = src2; src2 = src3; src3 = temp; } if (cmpf == 3) cmpf = 0; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 0; else if (cmpf == 1) derived_12 = 1; else if (cmpf == 2) derived_12 = 2; else unreachable("No pattern match at pos 12"); return 0x2e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); } static inline unsigned bi_pack_fma_csel_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 2); if (cmpf == 1) { { unsigned temp = src2; src2 = src3; src3 = temp; } if (cmpf == 1) cmpf = 0; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 3; else unreachable("No pattern match at pos 12"); return 0x2e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); } static inline unsigned bi_pack_fma_csel_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 2) cmpf = 0; else if (cmpf == 3) cmpf = 1; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 0; else if (cmpf == 1) derived_12 = 1; else unreachable("No pattern match at pos 12"); return 0x2e4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); } static inline unsigned bi_pack_fma_csel_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 2) cmpf = 0; else if (cmpf == 3) cmpf = 1; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 0; else if (cmpf == 1) derived_12 = 1; else unreachable("No pattern match at pos 12"); return 0x2e6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); } static inline unsigned bi_pack_fma_csel_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned cmpf = I->cmpf; assert(cmpf < 8); if ((cmpf == 4) || (cmpf == 5)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 4) cmpf = 1; else if (cmpf == 5) cmpf = 2; } if (cmpf == 3) { { unsigned temp = src2; src2 = src3; src3 = temp; } if (cmpf == 3) cmpf = 0; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 0; else if (cmpf == 1) derived_12 = 1; else if (cmpf == 2) derived_12 = 2; else unreachable("No pattern match at pos 12"); return 0x6e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); } static inline unsigned bi_pack_fma_csel_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 2); if (cmpf == 1) { { unsigned temp = src2; src2 = src3; src3 = temp; } if (cmpf == 1) cmpf = 0; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 3; else unreachable("No pattern match at pos 12"); return 0x6e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); } static inline unsigned bi_pack_fma_csel_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 2) cmpf = 0; else if (cmpf == 3) cmpf = 1; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 0; else if (cmpf == 1) derived_12 = 1; else unreachable("No pattern match at pos 12"); return 0x6e4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); } static inline unsigned bi_pack_fma_csel_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 2) cmpf = 0; else if (cmpf == 3) cmpf = 1; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 0; else if (cmpf == 1) derived_12 = 1; else unreachable("No pattern match at pos 12"); return 0x6e6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); } static inline unsigned bi_pack_fma_cubeface1(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); unsigned neg2 = I->src[2].neg; assert(neg2 < 2); unsigned derived_9 = 0; if ((neg0 == 0) && (neg1 == 0) && (neg2 == 0)) derived_9 = 0; else if ((neg0 == 1) && (neg1 == 1) && (neg2 == 1)) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x706800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); } static inline unsigned bi_pack_fma_dtsel_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t table_table[] = { 2, 1, 0, 3 }; assert(I->table < 4); unsigned table = table_table[I->table]; assert(table < 4); return 0x70f3e0 | (src0 << 0) | (table << 3); } static inline unsigned bi_pack_fma_f16_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); return 0x700d10 | (src0 << 0) | (lane0 << 3); } static inline unsigned bi_pack_fma_fadd_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs1 = I->src[1].abs; assert(abs1 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned round = I->round; assert(round < 4); unsigned clamp = I->clamp; assert(clamp < 4); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned widen1 = widen1_table[I->src[1].swizzle]; assert(widen1 < 4); if ((widen0 == 2) && (widen1 == 1)) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } } unsigned derived_9 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4; else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5; else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6; else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7; else unreachable("No pattern match at pos 9"); return 0x2c0000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (abs0 << 12) | (round << 13) | (clamp << 15) | (derived_9 << 9); } static inline unsigned bi_pack_fma_fadd_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned abs1 = I->src[1].abs; assert(abs1 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned swz1 = swz1_table[I->src[1].swizzle]; assert(swz1 < 4); unsigned round = I->round; assert(round < 4); unsigned clamp = I->clamp; assert(clamp < 4); if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } } unsigned derived_6 = 0; if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; else unreachable("No pattern match at pos 6"); return 0x6c0000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (clamp << 15) | (derived_6 << 6); } static inline unsigned bi_pack_fma_fadd_lscale_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned abs1 = I->src[1].abs; assert(abs1 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); return 0x70f400 | (src0 << 0) | (src1 << 3) | (abs0 << 6) | (neg0 << 7) | (abs1 << 8) | (neg1 << 9); } static inline unsigned bi_pack_fma_fcmp_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned widen1 = widen1_table[I->src[1].swizzle]; assert(widen1 < 4); unsigned abs1 = I->src[1].abs; assert(abs1 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); static uint8_t cmpf_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 8); unsigned result_type = I->result_type; assert(result_type < 4); if ((widen0 == 2) && (widen1 == 1)) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } if (cmpf == 4) cmpf = 1; else if (cmpf == 5) cmpf = 2; else if (cmpf == 1) cmpf = 4; else if (cmpf == 2) cmpf = 5; } unsigned derived_9 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4; else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5; else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6; else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7; else unreachable("No pattern match at pos 9"); return 0x240000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (abs0 << 12) | (cmpf << 13) | (result_type << 16) | (derived_9 << 9); } static inline unsigned bi_pack_fma_fcmp_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned abs1 = I->src[1].abs; assert(abs1 < 2); static uint8_t cmpf_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 8); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned swz1 = swz1_table[I->src[1].swizzle]; assert(swz1 < 4); unsigned result_type = I->result_type; assert(result_type < 4); if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } if (cmpf == 4) cmpf = 1; else if (cmpf == 5) cmpf = 2; else if (cmpf == 1) cmpf = 4; else if (cmpf == 2) cmpf = 5; } unsigned derived_6 = 0; if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; else unreachable("No pattern match at pos 6"); unsigned derived_13 = 0; if (cmpf == 0) derived_13 = 0; else if (cmpf == 1) derived_13 = 1; else if (cmpf == 2) derived_13 = 2; else if (cmpf == 3) derived_13 = 3; else if (cmpf == 4) derived_13 = 4; else if (cmpf == 5) derived_13 = 5; else if (cmpf == 6) derived_13 = 6; else if ((cmpf == 7) && (abs0 == 0) && (abs1 == 0)) derived_13 = 7; else unreachable("No pattern match at pos 13"); return 0x640000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (result_type << 16) | (derived_6 << 6) | (derived_13 << 13); } static inline unsigned bi_pack_fma_flshift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned bytes2 = I->bytes2; assert(bytes2 < 2); static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lane2 = lane2_table[I->src[2].swizzle]; assert(lane2 < 2); return 0x33f800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10); } static inline unsigned bi_pack_fma_fma_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned widen1 = widen1_table[I->src[1].swizzle]; assert(widen1 < 4); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned round = I->round; assert(round < 4); unsigned clamp = I->clamp; assert(clamp < 4); unsigned abs1 = I->src[1].abs; assert(abs1 < 2); unsigned neg2 = I->src[2].neg; assert(neg2 < 2); unsigned abs2 = I->src[2].abs; assert(abs2 < 2); if ((widen0 == 2) && (widen1 == 1)) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } } unsigned derived_9 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4; else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5; else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6; else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7; else unreachable("No pattern match at pos 9"); unsigned derived_17 = 0; if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_17 = 0; else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_17 = 1; else unreachable("No pattern match at pos 17"); return 0x0 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (abs0 << 12) | (round << 13) | (clamp << 15) | (abs1 << 19) | (neg2 << 18) | (abs2 << 20) | (derived_9 << 9) | (derived_17 << 17); } static inline unsigned bi_pack_fma_fma_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned swz1 = swz1_table[I->src[1].swizzle]; assert(swz1 < 4); unsigned round = I->round; assert(round < 4); unsigned clamp = I->clamp; assert(clamp < 4); unsigned neg2 = I->src[2].neg; assert(neg2 < 2); static uint8_t swz2_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned swz2 = swz2_table[I->src[2].swizzle]; assert(swz2 < 4); unsigned derived_17 = 0; if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_17 = 0; else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_17 = 1; else unreachable("No pattern match at pos 17"); return 0x400000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (clamp << 15) | (neg2 << 18) | (swz2 << 19) | (derived_17 << 17); } static inline unsigned bi_pack_fma_fma_rscale_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t round_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; assert(I->round < 9); unsigned round = round_table[I->round]; assert(round < 2); unsigned clamp = I->clamp; assert(clamp < 4); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned neg2 = I->src[2].neg; assert(neg2 < 2); unsigned special = I->special; assert(special < 4); unsigned derived_16 = 0; if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_16 = 0; else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_16 = 1; else unreachable("No pattern match at pos 16"); unsigned derived_12 = 0; if ((clamp == 0) && (special == 0) && (round == 0)) derived_12 = 0; else if ((clamp == 1) && (special == 0) && (round == 0)) derived_12 = 1; else if ((clamp == 2) && (special == 0) && (round == 0)) derived_12 = 2; else if ((clamp == 3) && (special == 0) && (round == 0)) derived_12 = 3; else if ((clamp == 0) && (special == 1) && (round == 0)) derived_12 = 4; else if ((clamp == 0) && (special == 1) && (round == 1)) derived_12 = 5; else if ((clamp == 0) && (special == 3) && (round == 0)) derived_12 = 6; else if ((clamp == 0) && (special == 2) && (round == 0)) derived_12 = 7; else unreachable("No pattern match at pos 12"); return 0x280000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (abs0 << 15) | (neg2 << 17) | (derived_16 << 16) | (derived_12 << 12); } static inline unsigned bi_pack_fma_fma_rscale_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t round_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; assert(I->round < 9); unsigned round = round_table[I->round]; assert(round < 2); unsigned clamp = I->clamp; assert(clamp < 4); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned neg2 = I->src[2].neg; assert(neg2 < 2); unsigned special = I->special; assert(special < 4); unsigned derived_16 = 0; if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_16 = 0; else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_16 = 1; else unreachable("No pattern match at pos 16"); unsigned derived_12 = 0; if ((clamp == 0) && (special == 0) && (round == 0)) derived_12 = 0; else if ((clamp == 1) && (special == 0) && (round == 0)) derived_12 = 1; else if ((clamp == 2) && (special == 0) && (round == 0)) derived_12 = 2; else if ((clamp == 3) && (special == 0) && (round == 0)) derived_12 = 3; else if ((clamp == 0) && (special == 1) && (round == 0)) derived_12 = 4; else if ((clamp == 0) && (special == 1) && (round == 1)) derived_12 = 5; else if ((clamp == 0) && (special == 2) && (round == 0)) derived_12 = 7; else unreachable("No pattern match at pos 12"); return 0x680000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (abs0 << 15) | (neg2 << 17) | (derived_16 << 16) | (derived_12 << 12); } static inline unsigned bi_pack_fma_fmul_cslice(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); return 0x70d000 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (abs0 << 7) | (neg0 << 8); } static inline unsigned bi_pack_fma_fmul_slice_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x70cb40 | (src0 << 0) | (src1 << 3); } static inline unsigned bi_pack_fma_frexpe_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned sqrt = I->sqrt; assert(sqrt < 2); unsigned log = I->log; assert(log < 2); static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); if (log == 0) { return 0x701c20 | (src0 << 0) | (neg0 << 6) | (sqrt << 8) | (widen0 << 3); } else if ((log == 1) && (sqrt == 0) && (neg0 == 0)) { return 0x701e20 | (src0 << 0) | (widen0 << 3); } else { unreachable("No matching state found in fma_frexpe_f32"); } } static inline unsigned bi_pack_fma_frexpe_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned sqrt = I->sqrt; assert(sqrt < 2); unsigned log = I->log; assert(log < 2); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); if (log == 0) { return 0x701c00 | (src0 << 0) | (neg0 << 6) | (sqrt << 8) | (swz0 << 3); } else if ((log == 1) && (sqrt == 0) && (neg0 == 0)) { return 0x701e00 | (src0 << 0) | (swz0 << 3); } else { unreachable("No matching state found in fma_frexpe_v2f16"); } } static inline unsigned bi_pack_fma_frexpm_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned sqrt = I->sqrt; assert(sqrt < 2); unsigned log = I->log; assert(log < 2); static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); if ((log == 0) && (neg0 == 0)) { return 0x701b20 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (widen0 << 3); } else if ((log == 1) && (sqrt == 0)) { return 0x701a20 | (src0 << 0) | (abs0 << 6) | (widen0 << 3) | (neg0 << 7); } else { unreachable("No matching state found in fma_frexpm_f32"); } } static inline unsigned bi_pack_fma_frexpm_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned sqrt = I->sqrt; assert(sqrt < 2); unsigned log = I->log; assert(log < 2); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); if ((log == 0) && (neg0 == 0)) { return 0x701b00 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (swz0 << 3); } else if ((log == 1) && (sqrt == 0)) { return 0x701a00 | (src0 << 0) | (abs0 << 6) | (swz0 << 3) | (neg0 << 7); } else { unreachable("No matching state found in fma_frexpm_v2f16"); } } static inline unsigned bi_pack_fma_fround_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); unsigned round = I->round; assert(round < 8); if (round != 4) { unsigned derived_9 = 0; if (round == 0) derived_9 = 0; else if (round == 1) derived_9 = 1; else if (round == 2) derived_9 = 2; else if (round == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x70c020 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3) | (derived_9 << 9); } else if (round == 4) { return 0x707620 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3); } else { unreachable("No matching state found in fma_fround_f32"); } } static inline unsigned bi_pack_fma_fround_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); unsigned round = I->round; assert(round < 8); if (round != 4) { unsigned derived_9 = 0; if (round == 0) derived_9 = 0; else if (round == 1) derived_9 = 1; else if (round == 2) derived_9 = 2; else if (round == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x70c000 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3) | (derived_9 << 9); } else if (round == 4) { return 0x707600 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3); } else { unreachable("No matching state found in fma_fround_v2f16"); } } static inline unsigned bi_pack_fma_frshift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned bytes2 = I->bytes2; assert(bytes2 < 2); static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lane2 = lane2_table[I->src[2].swizzle]; assert(lane2 < 2); return 0x33f000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10); } static inline unsigned bi_pack_fma_iaddc_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x27fc00 | (src0 << 0) | (src1 << 3) | (src2 << 6); } static inline unsigned bi_pack_fma_idp_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned sign0 = I->src[0].abs; assert(sign0 < 2); unsigned sign1 = I->src[1].abs; assert(sign1 < 2); return 0x73e8c0 | (src0 << 0) | (src1 << 3) | (sign0 << 9) | (sign1 << 10); } static inline unsigned bi_pack_fma_imul_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t widen1_table[] = { 1, 0, ~0, 2, 3, 4, 5, 6, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned widen1 = widen1_table[I->src[1].swizzle]; assert(widen1 < 8); unsigned extend = I->extend; assert(extend < 4); if ((extend == 0) && (widen1 == 0)) { return 0x73c0c0 | (src0 << 0) | (src1 << 3); } else if ((extend != 0) && ((widen1 == 1) || (widen1 == 2))) { unsigned derived_9 = 0; if (widen1 == 1) derived_9 = 0; else if (widen1 == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); unsigned derived_10 = 0; if (extend == 2) derived_10 = 0; else if (extend == 1) derived_10 = 1; else unreachable("No pattern match at pos 10"); return 0x73c8c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9) | (derived_10 << 10); } else if ((extend != 0) && ((widen1 == 3) || (widen1 == 4) || (widen1 == 5) || (widen1 == 6))) { unsigned derived_9 = 0; if (widen1 == 3) derived_9 = 0; else if (widen1 == 4) derived_9 = 1; else if (widen1 == 5) derived_9 = 2; else if (widen1 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); unsigned derived_11 = 0; if (extend == 2) derived_11 = 0; else if (extend == 1) derived_11 = 1; else unreachable("No pattern match at pos 11"); return 0x73b0c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9) | (derived_11 << 11); } else { unreachable("No matching state found in fma_imul_i32"); } } static inline unsigned bi_pack_fma_imul_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned swz1 = swz1_table[I->src[1].swizzle]; assert(swz1 < 4); return 0x7240c0 | (src0 << 0) | (src1 << 3) | (swz0 << 9) | (swz1 << 11); } static inline unsigned bi_pack_fma_imul_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t replicate0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned replicate0 = replicate0_table[I->src[0].swizzle]; assert(replicate0 < 8); static uint8_t replicate1_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned replicate1 = replicate1_table[I->src[1].swizzle]; assert(replicate1 < 8); if ((replicate0 == 0) && (replicate1 == 0)) { return 0x73e0c0 | (src0 << 0) | (src1 << 3); } else if ((replicate0 == 0) && (replicate1 != 0)) { unsigned derived_9 = 0; if (replicate1 == 1) derived_9 = 0; else if (replicate1 == 2) derived_9 = 1; else if (replicate1 == 3) derived_9 = 2; else if (replicate1 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x7380c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9); } else { unreachable("No matching state found in fma_imul_v4i8"); } } static inline unsigned bi_pack_fma_imuld(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0x33); assert((1 << src1) & 0x33); unsigned threads = I->threads; assert(threads < 2); return 0x70f100 | (src0 << 0) | (src1 << 3) | (threads << 6); } static inline unsigned bi_pack_fma_isubb_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x27fe00 | (src0 << 0) | (src1 << 3) | (src2 << 6); } static inline unsigned bi_pack_fma_jump_ex(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned test_mode = I->test_mode; assert(test_mode < 2); unsigned stack_mode = I->stack_mode; assert(stack_mode < 4); return 0x2eb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (test_mode << 9) | (stack_mode << 10); } static inline unsigned bi_pack_fma_lrot_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned bytes2 = I->bytes2; assert(bytes2 < 2); static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lane2 = lane2_table[I->src[2].swizzle]; assert(lane2 < 2); unsigned result_word = I->result_word; assert(result_word < 2); return 0x33b000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); } static inline unsigned bi_pack_fma_lshift_and_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lane2 = lane2_table[I->src[2].swizzle]; assert(lane2 < 4); unsigned not1 = I->src[1].neg; assert(not1 < 2); static uint8_t not_result_table[] = { 1, 0 }; unsigned not_result = not_result_table[I->not_result]; assert(not_result < 2); return 0x311000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); } static inline unsigned bi_pack_fma_lshift_and_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; assert(I->src[2].swizzle < 13); unsigned lanes2 = lanes2_table[I->src[2].swizzle]; assert(lanes2 < 8); unsigned not1 = I->src[1].neg; assert(not1 < 2); static uint8_t not_result_table[] = { 1, 0 }; unsigned not_result = not_result_table[I->not_result]; assert(not_result < 2); if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { unsigned derived_9 = 0; if (lanes2 == 0) derived_9 = 0; else if (lanes2 == 1) derived_9 = 1; else if (lanes2 == 2) derived_9 = 2; else if (lanes2 == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x310800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { unsigned derived_9 = 0; if (lanes2 == 4) derived_9 = 1; else if (lanes2 == 5) derived_9 = 2; else if (lanes2 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x311800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else { unreachable("No matching state found in fma_lshift_and_v2i16"); } } static inline unsigned bi_pack_fma_lshift_and_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lanes2 = lanes2_table[I->src[2].swizzle]; assert(lanes2 < 8); unsigned not1 = I->src[1].neg; assert(not1 < 2); static uint8_t not_result_table[] = { 1, 0 }; unsigned not_result = not_result_table[I->not_result]; assert(not_result < 2); if (lanes2 != 0) { unsigned derived_9 = 0; if (lanes2 == 1) derived_9 = 0; else if (lanes2 == 2) derived_9 = 1; else if (lanes2 == 3) derived_9 = 2; else if (lanes2 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x310000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else if (lanes2 == 0) { return 0x311800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); } else { unreachable("No matching state found in fma_lshift_and_v4i8"); } } static inline unsigned bi_pack_fma_lshift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned bytes2 = I->bytes2; assert(bytes2 < 2); static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lane2 = lane2_table[I->src[2].swizzle]; assert(lane2 < 2); unsigned result_word = I->result_word; assert(result_word < 2); return 0x33c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); } static inline unsigned bi_pack_fma_lshift_or_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lane2 = lane2_table[I->src[2].swizzle]; assert(lane2 < 4); static uint8_t not1_table[] = { 1, 0 }; unsigned not1 = not1_table[I->src[1].neg]; assert(not1 < 2); unsigned not_result = I->not_result; assert(not_result < 2); return 0x313000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); } static inline unsigned bi_pack_fma_lshift_or_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; assert(I->src[2].swizzle < 13); unsigned lanes2 = lanes2_table[I->src[2].swizzle]; assert(lanes2 < 8); static uint8_t not1_table[] = { 1, 0 }; unsigned not1 = not1_table[I->src[1].neg]; assert(not1 < 2); unsigned not_result = I->not_result; assert(not_result < 2); if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { unsigned derived_9 = 0; if (lanes2 == 0) derived_9 = 0; else if (lanes2 == 1) derived_9 = 1; else if (lanes2 == 2) derived_9 = 2; else if (lanes2 == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x312800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { unsigned derived_9 = 0; if (lanes2 == 4) derived_9 = 1; else if (lanes2 == 5) derived_9 = 2; else if (lanes2 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x313800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else { unreachable("No matching state found in fma_lshift_or_v2i16"); } } static inline unsigned bi_pack_fma_lshift_or_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lanes2 = lanes2_table[I->src[2].swizzle]; assert(lanes2 < 8); static uint8_t not1_table[] = { 1, 0 }; unsigned not1 = not1_table[I->src[1].neg]; assert(not1 < 2); unsigned not_result = I->not_result; assert(not_result < 2); if (lanes2 != 0) { unsigned derived_9 = 0; if (lanes2 == 1) derived_9 = 0; else if (lanes2 == 2) derived_9 = 1; else if (lanes2 == 3) derived_9 = 2; else if (lanes2 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x312000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else if (lanes2 == 0) { return 0x313800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); } else { unreachable("No matching state found in fma_lshift_or_v4i8"); } } static inline unsigned bi_pack_fma_lshift_xor_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lane2 = lane2_table[I->src[2].swizzle]; assert(lane2 < 4); unsigned not_result = I->not_result; assert(not_result < 2); return 0x325000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not_result << 13); } static inline unsigned bi_pack_fma_lshift_xor_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; assert(I->src[2].swizzle < 13); unsigned lanes2 = lanes2_table[I->src[2].swizzle]; assert(lanes2 < 8); unsigned not_result = I->not_result; assert(not_result < 2); if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { unsigned derived_9 = 0; if (lanes2 == 0) derived_9 = 0; else if (lanes2 == 1) derived_9 = 1; else if (lanes2 == 2) derived_9 = 2; else if (lanes2 == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x324800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { unsigned derived_9 = 0; if (lanes2 == 4) derived_9 = 1; else if (lanes2 == 5) derived_9 = 2; else if (lanes2 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x325800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); } else { unreachable("No matching state found in fma_lshift_xor_v2i16"); } } static inline unsigned bi_pack_fma_lshift_xor_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lanes2 = lanes2_table[I->src[2].swizzle]; assert(lanes2 < 8); unsigned not_result = I->not_result; assert(not_result < 2); if (lanes2 != 0) { unsigned derived_9 = 0; if (lanes2 == 1) derived_9 = 0; else if (lanes2 == 2) derived_9 = 1; else if (lanes2 == 3) derived_9 = 2; else if (lanes2 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x324000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); } else if (lanes2 == 0) { return 0x325800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13); } else { unreachable("No matching state found in fma_lshift_xor_v4i8"); } } static inline unsigned bi_pack_fma_mkvec_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); static uint8_t lane1_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lane1 = lane1_table[I->src[1].swizzle]; assert(lane1 < 2); return 0x70f000 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (lane1 << 7); } static inline unsigned bi_pack_fma_mkvec_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); static uint8_t lane1_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lane1 = lane1_table[I->src[1].swizzle]; assert(lane1 < 2); static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lane2 = lane2_table[I->src[2].swizzle]; assert(lane2 < 2); static uint8_t lane3_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[3].swizzle < 13); unsigned lane3 = lane3_table[I->src[3].swizzle]; assert(lane3 < 2); return 0x710000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (lane0 << 12) | (lane1 << 13) | (lane2 << 14) | (lane3 << 15); } static inline unsigned bi_pack_fma_mov_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x701968 | (src0 << 0); } static inline unsigned bi_pack_fma_nop(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x701963; } static inline unsigned bi_pack_fma_popcount_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x73c6d8 | (src0 << 0); } static inline unsigned bi_pack_fma_quiet_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x701970 | (src0 << 0); } static inline unsigned bi_pack_fma_quiet_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); return 0x701900 | (src0 << 0) | (swz0 << 4); } static inline unsigned bi_pack_fma_rrot_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned bytes2 = I->bytes2; assert(bytes2 < 2); static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lane2 = lane2_table[I->src[2].swizzle]; assert(lane2 < 2); unsigned result_word = I->result_word; assert(result_word < 2); return 0x33a000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); } static inline unsigned bi_pack_fma_rshift_and_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lane2 = lane2_table[I->src[2].swizzle]; assert(lane2 < 4); unsigned not1 = I->src[1].neg; assert(not1 < 2); static uint8_t not_result_table[] = { 1, 0 }; unsigned not_result = not_result_table[I->not_result]; assert(not_result < 2); return 0x301000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); } static inline unsigned bi_pack_fma_rshift_and_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; assert(I->src[2].swizzle < 13); unsigned lanes2 = lanes2_table[I->src[2].swizzle]; assert(lanes2 < 8); unsigned not1 = I->src[1].neg; assert(not1 < 2); static uint8_t not_result_table[] = { 1, 0 }; unsigned not_result = not_result_table[I->not_result]; assert(not_result < 2); if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { unsigned derived_9 = 0; if (lanes2 == 0) derived_9 = 0; else if (lanes2 == 1) derived_9 = 1; else if (lanes2 == 2) derived_9 = 2; else if (lanes2 == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x300800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { unsigned derived_9 = 0; if (lanes2 == 4) derived_9 = 1; else if (lanes2 == 5) derived_9 = 2; else if (lanes2 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x301800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else { unreachable("No matching state found in fma_rshift_and_v2i16"); } } static inline unsigned bi_pack_fma_rshift_and_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lanes2 = lanes2_table[I->src[2].swizzle]; assert(lanes2 < 8); unsigned not1 = I->src[1].neg; assert(not1 < 2); static uint8_t not_result_table[] = { 1, 0 }; unsigned not_result = not_result_table[I->not_result]; assert(not_result < 2); if (lanes2 != 0) { unsigned derived_9 = 0; if (lanes2 == 1) derived_9 = 0; else if (lanes2 == 2) derived_9 = 1; else if (lanes2 == 3) derived_9 = 2; else if (lanes2 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x300000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else if (lanes2 == 0) { return 0x301800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); } else { unreachable("No matching state found in fma_rshift_and_v4i8"); } } static inline unsigned bi_pack_fma_rshift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned bytes2 = I->bytes2; assert(bytes2 < 2); static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lane2 = lane2_table[I->src[2].swizzle]; assert(lane2 < 2); unsigned result_word = I->result_word; assert(result_word < 2); return 0x33d000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); } static inline unsigned bi_pack_fma_rshift_or_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lane2 = lane2_table[I->src[2].swizzle]; assert(lane2 < 4); static uint8_t not1_table[] = { 1, 0 }; unsigned not1 = not1_table[I->src[1].neg]; assert(not1 < 2); unsigned not_result = I->not_result; assert(not_result < 2); return 0x303000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); } static inline unsigned bi_pack_fma_rshift_or_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; assert(I->src[2].swizzle < 13); unsigned lanes2 = lanes2_table[I->src[2].swizzle]; assert(lanes2 < 8); static uint8_t not1_table[] = { 1, 0 }; unsigned not1 = not1_table[I->src[1].neg]; assert(not1 < 2); unsigned not_result = I->not_result; assert(not_result < 2); if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { unsigned derived_9 = 0; if (lanes2 == 0) derived_9 = 0; else if (lanes2 == 1) derived_9 = 1; else if (lanes2 == 2) derived_9 = 2; else if (lanes2 == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x302800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { unsigned derived_9 = 0; if (lanes2 == 4) derived_9 = 1; else if (lanes2 == 5) derived_9 = 2; else if (lanes2 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x303800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else { unreachable("No matching state found in fma_rshift_or_v2i16"); } } static inline unsigned bi_pack_fma_rshift_or_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lanes2 = lanes2_table[I->src[2].swizzle]; assert(lanes2 < 8); static uint8_t not1_table[] = { 1, 0 }; unsigned not1 = not1_table[I->src[1].neg]; assert(not1 < 2); unsigned not_result = I->not_result; assert(not_result < 2); if (lanes2 != 0) { unsigned derived_9 = 0; if (lanes2 == 1) derived_9 = 0; else if (lanes2 == 2) derived_9 = 1; else if (lanes2 == 3) derived_9 = 2; else if (lanes2 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x302000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else if (lanes2 == 0) { return 0x303800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); } else { unreachable("No matching state found in fma_rshift_or_v4i8"); } } static inline unsigned bi_pack_fma_rshift_xor_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lane2 = lane2_table[I->src[2].swizzle]; assert(lane2 < 4); unsigned not_result = I->not_result; assert(not_result < 2); return 0x321000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not_result << 13); } static inline unsigned bi_pack_fma_rshift_xor_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; assert(I->src[2].swizzle < 13); unsigned lanes2 = lanes2_table[I->src[2].swizzle]; assert(lanes2 < 8); unsigned not_result = I->not_result; assert(not_result < 2); if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { unsigned derived_9 = 0; if (lanes2 == 0) derived_9 = 0; else if (lanes2 == 1) derived_9 = 1; else if (lanes2 == 2) derived_9 = 2; else if (lanes2 == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x320800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { unsigned derived_9 = 0; if (lanes2 == 4) derived_9 = 1; else if (lanes2 == 5) derived_9 = 2; else if (lanes2 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x321800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); } else { unreachable("No matching state found in fma_rshift_xor_v2i16"); } } static inline unsigned bi_pack_fma_rshift_xor_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned lanes2 = lanes2_table[I->src[2].swizzle]; assert(lanes2 < 8); unsigned not_result = I->not_result; assert(not_result < 2); if (lanes2 != 0) { unsigned derived_9 = 0; if (lanes2 == 1) derived_9 = 0; else if (lanes2 == 2) derived_9 = 1; else if (lanes2 == 3) derived_9 = 2; else if (lanes2 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x320000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); } else if (lanes2 == 0) { return 0x321800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13); } else { unreachable("No matching state found in fma_rshift_xor_v4i8"); } } static inline unsigned bi_pack_fma_s16_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); return 0x700cc0 | (src0 << 0) | (lane0 << 4); } static inline unsigned bi_pack_fma_s8_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 4); return 0x700b40 | (src0 << 0) | (lane0 << 4); } static inline unsigned bi_pack_fma_seg_add(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { ~0, 2, 0, ~0, ~0, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); unsigned preserve_null = I->preserve_null; assert(preserve_null < 2); return 0x701500 | (src0 << 0) | (seg << 3) | (preserve_null << 7); } static inline unsigned bi_pack_fma_shaddxl_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned shift = I->shift; assert(shift < 0x8); return 0x70e600 | (src0 << 0) | (src1 << 3) | (shift << 6); } static inline unsigned bi_pack_fma_shaddxl_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane1_table[] = { 0, 2, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lane1 = lane1_table[I->src[1].swizzle]; assert(lane1 < 4); unsigned shift = I->shift; assert(shift < 0x8); return 0x70e800 | (src0 << 0) | (src1 << 3) | (lane1 << 9) | (shift << 6); } static inline unsigned bi_pack_fma_shaddxl_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane1_table[] = { 0, 2, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lane1 = lane1_table[I->src[1].swizzle]; assert(lane1 < 4); unsigned shift = I->shift; assert(shift < 0x8); return 0x70e000 | (src0 << 0) | (src1 << 3) | (lane1 << 9) | (shift << 6); } static inline unsigned bi_pack_fma_u16_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); return 0x700cc8 | (src0 << 0) | (lane0 << 4); } static inline unsigned bi_pack_fma_u8_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 4); return 0x700b48 | (src0 << 0) | (lane0 << 4); } static inline unsigned bi_pack_fma_v2f32_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned abs1 = I->src[1].abs; assert(abs1 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); unsigned clamp = I->clamp; assert(clamp < 4); unsigned round = I->round; assert(round < 8); unsigned derived_6 = 0; if ((abs0 == 0) && (abs1 == 0)) derived_6 = 0; else if ((abs0 == 1) && (abs1 == 1)) derived_6 = 1; else unreachable("No pattern match at pos 6"); unsigned derived_7 = 0; if ((neg0 == 0) && (neg1 == 0)) derived_7 = 0; else if ((neg0 == 1) && (neg1 == 1)) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0x6e8000 | (src0 << 0) | (src1 << 3) | (clamp << 8) | (round << 10) | (derived_6 << 6) | (derived_7 << 7); } static inline unsigned bi_pack_fma_vn_asst1_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned h = I->h; assert(h < 2); unsigned l = I->l; assert(l < 2); unsigned neg2 = I->src[2].neg; assert(neg2 < 2); return 0x6eb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (h << 9) | (l << 10) | (neg2 << 11); } static inline unsigned bi_pack_fma_vn_asst1_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned neg2 = I->src[2].neg; assert(neg2 < 2); return 0x27c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (neg2 << 12); } static inline unsigned bi_pack_add_acmpstore_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned seg = I->seg; assert(seg < 2); return 0x648c0 | (src0 << 0) | (src1 << 3) | (seg << 9); } static inline unsigned bi_pack_add_acmpstore_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned seg = I->seg; assert(seg < 2); return 0x64900 | (src0 << 0) | (src1 << 3) | (seg << 9); } static inline unsigned bi_pack_add_acmpxchg_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned seg = I->seg; assert(seg < 2); return 0x644c0 | (src0 << 0) | (src1 << 3) | (seg << 9); } static inline unsigned bi_pack_add_acmpxchg_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned seg = I->seg; assert(seg < 2); return 0x64500 | (src0 << 0) | (src1 << 3) | (seg << 9); } static inline unsigned bi_pack_add_atest(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); assert((1 << src1) & 0xf7); static uint8_t widen1_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned widen1 = widen1_table[I->src[1].swizzle]; assert(widen1 < 4); return 0xc8f00 | (src0 << 0) | (src1 << 3) | (widen1 << 6); } static inline unsigned bi_pack_add_atom_cx(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0xd7400 | (src0 << 0) | (src1 << 3) | (src2 << 6); } static inline unsigned bi_pack_add_axchg_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned seg = I->seg; assert(seg < 2); return 0x640c0 | (src0 << 0) | (src1 << 3) | (seg << 9); } static inline unsigned bi_pack_add_axchg_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned seg = I->seg; assert(seg < 2); return 0x64100 | (src0 << 0) | (src1 << 3) | (seg << 9); } static inline unsigned bi_pack_add_barrier(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0xd7874; } static inline unsigned bi_pack_add_blend(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src1) & 0xf7); assert((1 << src2) & 0xf7); return 0xca800 | (src0 << 0) | (src1 << 3) | (src2 << 6); } static inline unsigned bi_pack_add_branch_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src2) & 0xf7); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned widen1 = widen1_table[I->src[1].swizzle]; assert(widen1 < 4); unsigned cmpf = I->cmpf; assert(cmpf < 8); if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) || ((widen0 == widen1) && (src0 < src1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } if (cmpf == 4) cmpf = 1; else if (cmpf == 5) cmpf = 2; else if (cmpf == 1) cmpf = 4; else if (cmpf == 2) cmpf = 5; } unsigned derived_12 = 0; if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; else if ((widen0 == 2) && (widen1 == 1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5))) derived_12 = 3; else if ((widen0 == 2) && (widen1 == 1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) derived_12 = 4; else unreachable("No pattern match at pos 12"); unsigned derived_9 = 0; if ((widen0 == widen1) && (src0 == src1) && ((cmpf == 1) || (cmpf == 4))) derived_9 = 4; else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 0) || (cmpf == 3))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 3))) derived_9 = 5; else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 1) || (cmpf == 2))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 2))) derived_9 = 6; else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 4) || (cmpf == 5))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 4)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 5)) || ((widen0 == widen1) && (src0 == src1) && (cmpf == 0))) derived_9 = 7; else unreachable("No pattern match at pos 9"); return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); } static inline unsigned bi_pack_add_branch_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src2) & 0xf7); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned widen1 = widen1_table[I->src[1].swizzle]; assert(widen1 < 4); unsigned cmpf = I->cmpf; assert(cmpf < 8); if (((widen0 != 0) && (widen1 == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 > src1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } if (cmpf == 4) cmpf = 1; else if (cmpf == 5) cmpf = 2; else if (cmpf == 1) cmpf = 4; else if (cmpf == 2) cmpf = 5; } unsigned derived_12 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; else if ((widen0 == 0) && (widen1 == 1)) derived_12 = 5; else if ((widen0 == 0) && (widen1 == 2)) derived_12 = 6; else unreachable("No pattern match at pos 12"); unsigned derived_9 = 0; if ((widen0 == 0) && (widen1 != 0) && (cmpf == 3)) derived_9 = 1; else if ((widen0 == 0) && (widen1 != 0) && (cmpf == 2)) derived_9 = 2; else if ((widen0 == 0) && (widen1 != 0) && (cmpf == 5)) derived_9 = 3; else if ((widen0 == 0) && (widen1 == 0) && (src0 == src1) && ((cmpf == 1) || (cmpf == 4))) derived_9 = 4; else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 3))) derived_9 = 5; else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 1)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 1)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 2))) derived_9 = 6; else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 4)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 4)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 5)) || ((widen0 == 0) && (widen1 == 0) && (src0 == src1) && (cmpf == 0))) derived_9 = 7; else unreachable("No pattern match at pos 9"); return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); } static inline unsigned bi_pack_add_branch_i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src2) & 0xf7); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned widen1 = widen1_table[I->src[1].swizzle]; assert(widen1 < 4); static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 2); if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } } unsigned derived_12 = 0; if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; else if ((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) derived_12 = 3; else if ((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) derived_12 = 4; else unreachable("No pattern match at pos 12"); unsigned derived_9 = 0; if ((widen0 == widen1) && (src0 == src1) && (cmpf == 0)) derived_9 = 1; else if (((widen0 == 2) && (widen1 == 1)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 1))) derived_9 = 4; else unreachable("No pattern match at pos 9"); return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); } static inline unsigned bi_pack_add_branch_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src2) & 0xf7); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned widen1 = widen1_table[I->src[1].swizzle]; assert(widen1 < 4); static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 2); if (((src0 > src1) && (cmpf == 0)) || ((src0 < src1) && (cmpf == 1))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } } unsigned derived_12 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; else unreachable("No pattern match at pos 12"); unsigned derived_9 = 0; if ((src0 == src1) && (cmpf == 0)) derived_9 = 1; else if (((src0 < src1) && (cmpf == 0)) || ((src0 >= src1) && (cmpf == 1))) derived_9 = 4; else unreachable("No pattern match at pos 9"); return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); } static inline unsigned bi_pack_add_branch_s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src2) & 0xf7); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned widen1 = widen1_table[I->src[1].swizzle]; assert(widen1 < 4); static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } if (cmpf == 2) cmpf = 0; else if (cmpf == 3) cmpf = 1; else if (cmpf == 0) cmpf = 2; else if (cmpf == 1) cmpf = 3; } unsigned derived_12 = 0; if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; else if ((widen0 == 2) && (widen1 == 1)) derived_12 = 4; else unreachable("No pattern match at pos 12"); unsigned derived_9 = 0; if (((widen0 == 2) && (widen1 == 1) && (cmpf == 2)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 2))) derived_9 = 0; else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 == src1) && ((cmpf == 3) || (cmpf == 1)))) derived_9 = 1; else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1))) derived_9 = 2; else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0))) derived_9 = 3; else if ((widen0 == widen1) && (src0 == src1) && ((cmpf == 2) || (cmpf == 0))) derived_9 = 4; else unreachable("No pattern match at pos 9"); return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); } static inline unsigned bi_pack_add_branch_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src2) & 0xf7); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned widen1 = widen1_table[I->src[1].swizzle]; assert(widen1 < 4); static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); if (src0 > src1) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } if (cmpf == 2) cmpf = 0; else if (cmpf == 3) cmpf = 1; else if (cmpf == 0) cmpf = 2; else if (cmpf == 1) cmpf = 3; } unsigned derived_12 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; else unreachable("No pattern match at pos 12"); unsigned derived_9 = 0; if ((src0 < src1) && (cmpf == 2)) derived_9 = 0; else if (((src0 < src1) && (cmpf == 3)) || ((src0 == src1) && ((cmpf == 3) || (cmpf == 1)))) derived_9 = 1; else if ((src0 < src1) && (cmpf == 1)) derived_9 = 2; else if ((src0 < src1) && (cmpf == 0)) derived_9 = 3; else if ((src0 == src1) && ((cmpf == 2) || (cmpf == 0))) derived_9 = 4; else unreachable("No pattern match at pos 9"); return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); } static inline unsigned bi_pack_add_branch_u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src2) & 0xf7); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned widen1 = widen1_table[I->src[1].swizzle]; assert(widen1 < 4); static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 < src1))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } if (cmpf == 2) cmpf = 0; else if (cmpf == 3) cmpf = 1; else if (cmpf == 0) cmpf = 2; else if (cmpf == 1) cmpf = 3; } unsigned derived_12 = 0; if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; else if ((widen0 == 2) && (widen1 == 1)) derived_12 = 3; else unreachable("No pattern match at pos 12"); unsigned derived_9 = 0; if (((widen0 == 2) && (widen1 == 1) && (cmpf == 2)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 2))) derived_9 = 0; else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 3))) derived_9 = 1; else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 1))) derived_9 = 2; else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 0))) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); } static inline unsigned bi_pack_add_branch_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src2) & 0xf7); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned widen1 = widen1_table[I->src[1].swizzle]; assert(widen1 < 4); static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); if (src0 < src1) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } if (cmpf == 2) cmpf = 0; else if (cmpf == 3) cmpf = 1; else if (cmpf == 0) cmpf = 2; else if (cmpf == 1) cmpf = 3; } unsigned derived_12 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; else unreachable("No pattern match at pos 12"); unsigned derived_9 = 0; if ((src0 >= src1) && (cmpf == 2)) derived_9 = 0; else if ((src0 >= src1) && (cmpf == 3)) derived_9 = 1; else if ((src0 >= src1) && (cmpf == 1)) derived_9 = 2; else if ((src0 >= src1) && (cmpf == 0)) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); } static inline unsigned bi_pack_add_branchc_i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src1) & 0xf7); unsigned combine = I->combine; assert(combine < 2); static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); unsigned derived_9 = 0; if (lane0 == 0) derived_9 = 0; else if (lane0 == 1) derived_9 = 1; else unreachable("No pattern match at pos 9"); unsigned derived_3 = 0; if (lane0 == 1) derived_3 = 0; else if (lane0 == 0) derived_3 = 1; else unreachable("No pattern match at pos 3"); return 0x6f030 | (src0 << 0) | (src1 << 6) | (combine << 10) | (derived_9 << 9) | (derived_3 << 3); } static inline unsigned bi_pack_add_branchc_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src1) & 0xf7); unsigned combine = I->combine; assert(combine < 2); return 0x6f238 | (src0 << 0) | (src1 << 6) | (combine << 10); } static inline unsigned bi_pack_add_branchz_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src1) & 0xf7); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); unsigned cmpf = I->cmpf; assert(cmpf < 8); unsigned derived_4 = 0; if (widen0 == 2) derived_4 = 1; else if (widen0 == 1) derived_4 = 2; else unreachable("No pattern match at pos 4"); unsigned derived_3 = 0; if ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)) derived_3 = 0; else if ((cmpf == 0) || (cmpf == 1) || (cmpf == 4)) derived_3 = 1; else unreachable("No pattern match at pos 3"); unsigned derived_9 = 0; if ((cmpf == 3) || (cmpf == 0)) derived_9 = 5; else if ((cmpf == 2) || (cmpf == 1)) derived_9 = 6; else if ((cmpf == 5) || (cmpf == 4)) derived_9 = 7; else unreachable("No pattern match at pos 9"); return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_3 << 3) | (derived_9 << 9); } static inline unsigned bi_pack_add_branchz_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src1) & 0xf7); unsigned cmpf = I->cmpf; assert(cmpf < 8); unsigned derived_3 = 0; if ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)) derived_3 = 0; else if ((cmpf == 0) || (cmpf == 1) || (cmpf == 4)) derived_3 = 1; else unreachable("No pattern match at pos 3"); unsigned derived_9 = 0; if ((cmpf == 3) || (cmpf == 0)) derived_9 = 5; else if ((cmpf == 2) || (cmpf == 1)) derived_9 = 6; else if ((cmpf == 5) || (cmpf == 4)) derived_9 = 7; else unreachable("No pattern match at pos 9"); return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_3 << 3) | (derived_9 << 9); } static inline unsigned bi_pack_add_branchz_i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src1) & 0xf7); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 2); unsigned derived_4 = 0; if (widen0 == 2) derived_4 = 1; else if (widen0 == 1) derived_4 = 2; else unreachable("No pattern match at pos 4"); unsigned derived_3 = 0; if (cmpf == 1) derived_3 = 0; else if (cmpf == 0) derived_3 = 1; else unreachable("No pattern match at pos 3"); return 0x6f800 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_3 << 3); } static inline unsigned bi_pack_add_branchz_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src1) & 0xf7); static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 2); unsigned derived_3 = 0; if (cmpf == 1) derived_3 = 0; else if (cmpf == 0) derived_3 = 1; else unreachable("No pattern match at pos 3"); return 0x6f800 | (src0 << 0) | (src1 << 6) | (derived_3 << 3); } static inline unsigned bi_pack_add_branchz_s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src1) & 0xf7); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); unsigned derived_4 = 0; if (widen0 == 2) derived_4 = 1; else if (widen0 == 1) derived_4 = 2; else unreachable("No pattern match at pos 4"); unsigned derived_9 = 0; if (cmpf == 2) derived_9 = 0; else if (cmpf == 3) derived_9 = 1; else if (cmpf == 1) derived_9 = 2; else if (cmpf == 0) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x6f008 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_9 << 9); } static inline unsigned bi_pack_add_branchz_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src1) & 0xf7); static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); unsigned derived_9 = 0; if (cmpf == 2) derived_9 = 0; else if (cmpf == 3) derived_9 = 1; else if (cmpf == 1) derived_9 = 2; else if (cmpf == 0) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x6f008 | (src0 << 0) | (src1 << 6) | (derived_9 << 9); } static inline unsigned bi_pack_add_branchz_u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src1) & 0xf7); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); unsigned derived_4 = 0; if (widen0 == 2) derived_4 = 1; else if (widen0 == 1) derived_4 = 2; else unreachable("No pattern match at pos 4"); unsigned derived_9 = 0; if (cmpf == 2) derived_9 = 0; else if (cmpf == 3) derived_9 = 1; else if (cmpf == 1) derived_9 = 2; else if (cmpf == 0) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_9 << 9); } static inline unsigned bi_pack_add_branchz_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src1) & 0xf7); static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); unsigned derived_9 = 0; if (cmpf == 2) derived_9 = 0; else if (cmpf == 3) derived_9 = 1; else if (cmpf == 1) derived_9 = 2; else if (cmpf == 0) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_9 << 9); } static inline unsigned bi_pack_add_branch_diverg(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); return 0x6f83c | (src0 << 6); } static inline unsigned bi_pack_add_branch_lowbits_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src1) & 0xf7); return 0x6fa38 | (src0 << 0) | (src1 << 6); } static inline unsigned bi_pack_add_branch_no_diverg(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); return 0x6fa34 | (src0 << 6); } static inline unsigned bi_pack_add_clper_v6_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0x7); return 0x3f0c0 | (src0 << 0) | (src1 << 3); } static inline unsigned bi_pack_add_clper_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0x7); unsigned lane_op = I->lane_op; assert(lane_op < 4); unsigned subgroup = I->subgroup; assert(subgroup < 4); unsigned inactive_result = I->inactive_result; assert(inactive_result < 16); return 0x7c000 | (src0 << 0) | (src1 << 3) | (lane_op << 6) | (subgroup << 8) | (inactive_result << 10); } static inline unsigned bi_pack_add_cubeface2(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x3de58 | (src0 << 0); } static inline unsigned bi_pack_add_cube_ssel(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); unsigned derived_9 = 0; if ((neg0 == 0) && (neg1 == 0)) derived_9 = 0; else if ((neg0 == 1) && (neg1 == 1)) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x3e000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); } static inline unsigned bi_pack_add_cube_tsel(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); unsigned derived_9 = 0; if ((neg0 == 0) && (neg1 == 0)) derived_9 = 0; else if ((neg0 == 1) && (neg1 == 1)) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x3e400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); } static inline unsigned bi_pack_add_discard_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned cmpf = I->cmpf; assert(cmpf < 8); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned widen1 = widen1_table[I->src[1].swizzle]; assert(widen1 < 4); if ((cmpf == 1) || (cmpf == 2)) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } if (cmpf == 1) cmpf = 4; else if (cmpf == 2) cmpf = 5; } unsigned derived_6 = 0; if (cmpf == 0) derived_6 = 0; else if (cmpf == 3) derived_6 = 1; else if (cmpf == 4) derived_6 = 2; else if (cmpf == 5) derived_6 = 3; else unreachable("No pattern match at pos 6"); unsigned derived_8 = 0; if ((widen0 == 1) && (widen1 == 1)) derived_8 = 0; else if ((widen0 == 2) && (widen1 == 1)) derived_8 = 1; else if ((widen0 == 1) && (widen1 == 2)) derived_8 = 2; else if ((widen0 == 2) && (widen1 == 2)) derived_8 = 3; else if ((widen0 == 0) && (widen1 == 0)) derived_8 = 4; else unreachable("No pattern match at pos 8"); return 0xc8800 | (src0 << 0) | (src1 << 3) | (derived_6 << 6) | (derived_8 << 8); } static inline unsigned bi_pack_add_f16_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); return 0x3cd10 | (src0 << 0) | (lane0 << 3); } static inline unsigned bi_pack_add_f16_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned round = I->round; assert(round < 8); static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3c500 | (src0 << 0) | (lane0 << 7) | (derived_4 << 4); } else if (round == 4) { return 0x3cc40 | (src0 << 0) | (lane0 << 5); } else { unreachable("No matching state found in add_f16_to_s32"); } } static inline unsigned bi_pack_add_f16_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned round = I->round; assert(round < 8); static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3c508 | (src0 << 0) | (lane0 << 7) | (derived_4 << 4); } else if (round == 4) { return 0x3cc48 | (src0 << 0) | (lane0 << 5); } else { unreachable("No matching state found in add_f16_to_u32"); } } static inline unsigned bi_pack_add_f32_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned round = I->round; assert(round < 8); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3c980 | (src0 << 0) | (derived_4 << 4); } else if (round == 4) { return 0x3cca0 | (src0 << 0); } else { unreachable("No matching state found in add_f32_to_s32"); } } static inline unsigned bi_pack_add_f32_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned round = I->round; assert(round < 8); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3c988 | (src0 << 0) | (derived_4 << 4); } else if (round == 4) { return 0x3cca8 | (src0 << 0); } else { unreachable("No matching state found in add_f32_to_u32"); } } static inline unsigned bi_pack_add_fadd_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t round_table[] = { 0, 1, 2, 3, ~0, 5, 4, ~0, ~0 }; assert(I->round < 9); unsigned round = round_table[I->round]; assert(round < 8); unsigned abs1 = I->src[1].abs; assert(abs1 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); unsigned clamp = I->clamp; assert(clamp < 4); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned widen1 = widen1_table[I->src[1].swizzle]; assert(widen1 < 4); if (((widen0 == 1) && (widen1 == 0)) || ((widen0 == 2) && (widen1 == 0))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } } if (round != 4) { unsigned derived_13 = 0; if (round == 0) derived_13 = 0; else if (round == 1) derived_13 = 1; else if (round == 2) derived_13 = 2; else if (round == 3) derived_13 = 3; else unreachable("No pattern match at pos 13"); unsigned derived_9 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x20000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (abs0 << 15) | (derived_13 << 13) | (derived_9 << 9); } else if ((round == 4) && (widen0 == 0) && (widen1 == 0) && (abs0 == 0) && (abs1 == 0) && (neg0 == 0) && (neg1 == 0) && (clamp == 0)) { return 0x75200 | (src0 << 0) | (src1 << 3); } else { unreachable("No matching state found in add_fadd_f32"); } } static inline unsigned bi_pack_add_fadd_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs1 = I->src[1].abs; assert(abs1 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned swz1 = swz1_table[I->src[1].swizzle]; assert(swz1 < 4); unsigned round = I->round; assert(round < 4); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); return 0xa0000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (abs0 << 15); } static inline unsigned bi_pack_add_fadd_rscale_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t clamp_table[] = { 0, ~0, ~0, 1 }; assert(I->clamp < 4); unsigned clamp = clamp_table[I->clamp]; assert(clamp < 2); unsigned special = I->special; assert(special < 2); unsigned round = I->round; assert(round < 8); unsigned abs1 = I->src[1].abs; assert(abs1 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned derived_9 = 0; if ((clamp == 0) && (special == 0) && (round == 0)) derived_9 = 0; else if ((clamp == 1) && (special == 0) && (round == 0)) derived_9 = 2; else if ((clamp == 0) && (special == 1) && (round == 4)) derived_9 = 3; else if ((clamp == 0) && (special == 1) && (round == 0)) derived_9 = 4; else if ((clamp == 0) && (special == 1) && (round == 1)) derived_9 = 5; else if ((clamp == 0) && (special == 1) && (round == 2)) derived_9 = 6; else if ((clamp == 0) && (special == 1) && (round == 3)) derived_9 = 7; else unreachable("No pattern match at pos 9"); return 0x88000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (abs1 << 12) | (neg0 << 13) | (neg1 << 14) | (abs0 << 16) | (derived_9 << 9); } static inline unsigned bi_pack_add_fcmp_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned widen1 = widen1_table[I->src[1].swizzle]; assert(widen1 < 4); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); static uint8_t cmpf_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 8); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned abs1 = I->src[1].abs; assert(abs1 < 2); unsigned result_type = I->result_type; assert(result_type < 4); if (((neg0 == 0) && (neg1 == 1)) || ((widen0 == 1) && (widen1 == 0)) || ((widen0 == 2) && (widen1 == 0))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } if (cmpf == 4) cmpf = 1; else if (cmpf == 5) cmpf = 2; else if (cmpf == 1) cmpf = 4; else if (cmpf == 2) cmpf = 5; } unsigned derived_9 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; else unreachable("No pattern match at pos 9"); unsigned derived_13 = 0; if ((neg0 == 0) && (neg1 == 0)) derived_13 = 0; else if ((neg0 == 1) && (neg1 == 0)) derived_13 = 1; else unreachable("No pattern match at pos 13"); return 0x30000 | (src0 << 0) | (src1 << 3) | (cmpf << 6) | (abs0 << 11) | (abs1 << 12) | (result_type << 14) | (derived_9 << 9) | (derived_13 << 13); } static inline unsigned bi_pack_add_fcmp_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); static uint8_t cmpf_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 8); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned swz1 = swz1_table[I->src[1].swizzle]; assert(swz1 < 4); unsigned result_type = I->result_type; assert(result_type < 4); if ((neg0 == 0) && (neg1 == 1)) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } if (cmpf == 4) cmpf = 1; else if (cmpf == 5) cmpf = 2; else if (cmpf == 1) cmpf = 4; else if (cmpf == 2) cmpf = 5; } unsigned derived_13 = 0; if ((neg0 == 0) && (neg1 == 0)) derived_13 = 0; else if ((neg0 == 1) && (neg1 == 0)) derived_13 = 1; else unreachable("No pattern match at pos 13"); return 0xb0000 | (src0 << 0) | (src1 << 3) | (cmpf << 6) | (swz0 << 9) | (swz1 << 11) | (result_type << 14) | (derived_13 << 13); } static inline unsigned bi_pack_add_fcos_table_u6(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); unsigned offset = I->offset; assert(offset < 2); return 0x67a88 | (src0 << 0) | (offset << 4); } static inline unsigned bi_pack_add_fexp_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); assert((1 << src1) & 0xf7); return 0x66ac0 | (src0 << 0) | (src1 << 3); } static inline unsigned bi_pack_add_fexp_table_u4(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); unsigned adj = I->adj; assert(adj < 4); return 0x67ac0 | (src0 << 0) | (adj << 3); } static inline unsigned bi_pack_add_flogd_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); return 0x66340 | (src0 << 0); } static inline unsigned bi_pack_add_flog_table_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); unsigned mode = I->mode; assert(mode < 4); unsigned precision = I->precision; assert(precision < 4); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned divzero = I->divzero; assert(divzero < 2); if ((mode == 0) && (widen0 == 0) && (precision == 0)) { return 0x67300 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5); } else if ((mode == 0) && (widen0 != 0) && (precision == 0)) { unsigned derived_7 = 0; if (widen0 == 1) derived_7 = 0; else if (widen0 == 2) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0x67340 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); } else if ((mode != 0) && (widen0 == 0) && (precision == 0) && (divzero == 0)) { unsigned derived_5 = 0; if (mode == 1) derived_5 = 0; else if (mode == 2) derived_5 = 1; else unreachable("No pattern match at pos 5"); return 0x67b00 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (derived_5 << 5); } else if ((mode != 0) && (widen0 != 0) && (precision == 0) && (divzero == 0)) { unsigned derived_5 = 0; if (mode == 1) derived_5 = 0; else if (mode == 2) derived_5 = 1; else unreachable("No pattern match at pos 5"); unsigned derived_7 = 0; if (widen0 == 1) derived_7 = 0; else if (widen0 == 2) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0x67b40 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (derived_5 << 5) | (derived_7 << 7); } else if ((mode != 0) && (widen0 == 0) && (precision != 0) && (divzero == 0) && (abs0 == 0) && (neg0 == 0)) { unsigned derived_3 = 0; if (mode == 2) derived_3 = 0; else if (mode == 1) derived_3 = 1; else unreachable("No pattern match at pos 3"); unsigned derived_4 = 0; if (precision == 1) derived_4 = 0; else if (precision == 2) derived_4 = 1; else unreachable("No pattern match at pos 4"); return 0x67ae0 | (src0 << 0) | (derived_3 << 3) | (derived_4 << 4); } else { unreachable("No matching state found in add_flog_table_f32"); } } static inline unsigned bi_pack_add_fmax_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs1 = I->src[1].abs; assert(abs1 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); unsigned clamp = I->clamp; assert(clamp < 4); unsigned sem = I->sem; assert(sem < 4); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); return 0x0 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (sem << 13) | (abs0 << 15); } static inline unsigned bi_pack_add_fmax_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned abs1 = I->src[1].abs; assert(abs1 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned swz1 = swz1_table[I->src[1].swizzle]; assert(swz1 < 4); unsigned sem = I->sem; assert(sem < 4); if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } if (sem == 2) sem = 3; else if (sem == 3) sem = 2; } unsigned derived_6 = 0; if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; else unreachable("No pattern match at pos 6"); return 0x80000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (sem << 13) | (derived_6 << 6); } static inline unsigned bi_pack_add_fmin_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs1 = I->src[1].abs; assert(abs1 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); unsigned clamp = I->clamp; assert(clamp < 4); unsigned sem = I->sem; assert(sem < 4); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); return 0x10000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (sem << 13) | (abs0 << 15); } static inline unsigned bi_pack_add_fmin_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned abs1 = I->src[1].abs; assert(abs1 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned swz1 = swz1_table[I->src[1].swizzle]; assert(swz1 < 4); unsigned sem = I->sem; assert(sem < 4); if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } if (sem == 2) sem = 3; else if (sem == 3) sem = 2; } unsigned derived_6 = 0; if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; else unreachable("No pattern match at pos 6"); return 0x90000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (sem << 13) | (derived_6 << 6); } static inline unsigned bi_pack_add_fpclass_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); return 0x67c40 | (src0 << 0) | (lane0 << 3); } static inline unsigned bi_pack_add_fpclass_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); return 0x67c50 | (src0 << 0); } static inline unsigned bi_pack_add_fpow_sc_apply(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x75080 | (src0 << 0) | (src1 << 3); } static inline unsigned bi_pack_add_fpow_sc_det_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); assert((1 << src1) & 0xf7); unsigned func = I->func; assert(func < 4); static uint8_t lane1_table[] = { 0, 2, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lane1 = lane1_table[I->src[1].swizzle]; assert(lane1 < 4); static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); if ((func == 0) || (func == 1)) { unsigned derived_6 = 0; if ((lane1 == 2) || (lane1 == 0)) derived_6 = 0; else if (lane1 == 1) derived_6 = 1; else unreachable("No pattern match at pos 6"); unsigned derived_8 = 0; if (func == 0) derived_8 = 0; else if (func == 1) derived_8 = 1; else unreachable("No pattern match at pos 8"); return 0x67400 | (src0 << 0) | (src1 << 3) | (lane0 << 7) | (derived_6 << 6) | (derived_8 << 8); } else if (((func == 2) || (func == 3)) && (lane1 == 2)) { unsigned derived_8 = 0; if (func == 2) derived_8 = 0; else if (func == 3) derived_8 = 1; else unreachable("No pattern match at pos 8"); return 0x67600 | (src0 << 0) | (src1 << 3) | (lane0 << 7) | (derived_8 << 8); } else { unreachable("No matching state found in add_fpow_sc_det_f16"); } } static inline unsigned bi_pack_add_fpow_sc_det_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); assert((1 << src1) & 0xf7); unsigned func = I->func; assert(func < 4); return 0x67640 | (src0 << 0) | (src1 << 3) | (func << 7); } static inline unsigned bi_pack_add_frcp_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned divzero = I->divzero; assert(divzero < 2); static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); return 0x67080 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5) | (lane0 << 8); } static inline unsigned bi_pack_add_frcp_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); static uint8_t widen0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned derived_6 = 0; if (widen0 == 0) derived_6 = 0; else unreachable("No pattern match at pos 6"); return 0x66000 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (derived_6 << 6); } static inline unsigned bi_pack_add_frcp_approx_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned divzero = I->divzero; assert(divzero < 2); if (widen0 == 0) { return 0x67000 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5); } else if (widen0 != 0) { unsigned derived_7 = 0; if (widen0 == 1) derived_7 = 0; else if (widen0 == 2) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0x67040 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); } else { unreachable("No matching state found in add_frcp_approx_f32"); } } static inline unsigned bi_pack_add_frexpe_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned sqrt = I->sqrt; assert(sqrt < 2); unsigned log = I->log; assert(log < 2); static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); if (log == 0) { return 0x3dc20 | (src0 << 0) | (neg0 << 6) | (sqrt << 8) | (widen0 << 3); } else if ((log == 1) && (sqrt == 0) && (neg0 == 0)) { return 0x3de20 | (src0 << 0) | (widen0 << 3); } else { unreachable("No matching state found in add_frexpe_f32"); } } static inline unsigned bi_pack_add_frexpe_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned sqrt = I->sqrt; assert(sqrt < 2); unsigned log = I->log; assert(log < 2); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); if (log == 0) { return 0x3dc00 | (src0 << 0) | (neg0 << 6) | (sqrt << 8) | (swz0 << 3); } else if ((log == 1) && (sqrt == 0) && (neg0 == 0)) { return 0x3de00 | (src0 << 0) | (swz0 << 3); } else { unreachable("No matching state found in add_frexpe_v2f16"); } } static inline unsigned bi_pack_add_frexpm_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned sqrt = I->sqrt; assert(sqrt < 2); unsigned log = I->log; assert(log < 2); static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); if ((log == 0) && (neg0 == 0)) { return 0x3db20 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (widen0 << 3); } else if ((log == 1) && (sqrt == 0)) { return 0x3da20 | (src0 << 0) | (abs0 << 6) | (widen0 << 3) | (neg0 << 7); } else { unreachable("No matching state found in add_frexpm_f32"); } } static inline unsigned bi_pack_add_frexpm_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned sqrt = I->sqrt; assert(sqrt < 2); unsigned log = I->log; assert(log < 2); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); if ((log == 0) && (neg0 == 0)) { return 0x3db00 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (swz0 << 3); } else if ((log == 1) && (sqrt == 0)) { return 0x3da00 | (src0 << 0) | (abs0 << 6) | (swz0 << 3) | (neg0 << 7); } else { unreachable("No matching state found in add_frexpm_v2f16"); } } static inline unsigned bi_pack_add_fround_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); unsigned round = I->round; assert(round < 4); return 0x3e820 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3) | (round << 9); } static inline unsigned bi_pack_add_fround_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); unsigned round = I->round; assert(round < 4); return 0x3e800 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3) | (round << 9); } static inline unsigned bi_pack_add_frsq_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned divzero = I->divzero; assert(divzero < 2); static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); return 0x67280 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5) | (lane0 << 8); } static inline unsigned bi_pack_add_frsq_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); static uint8_t widen0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned derived_6 = 0; if (widen0 == 0) derived_6 = 0; else unreachable("No pattern match at pos 6"); return 0x66100 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (derived_6 << 6); } static inline unsigned bi_pack_add_frsq_approx_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned divzero = I->divzero; assert(divzero < 2); if (widen0 == 0) { return 0x67100 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5); } else if (widen0 != 0) { unsigned derived_7 = 0; if (widen0 == 1) derived_7 = 0; else if (widen0 == 2) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0x67140 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); } else { unreachable("No matching state found in add_frsq_approx_f32"); } } static inline unsigned bi_pack_add_fsincos_offset_u6(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); unsigned scale = I->scale; assert(scale < 2); return 0x67aa0 | (src0 << 0) | (scale << 3); } static inline unsigned bi_pack_add_fsin_table_u6(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); unsigned offset = I->offset; assert(offset < 2); return 0x67a80 | (src0 << 0) | (offset << 4); } static inline unsigned bi_pack_add_hadd_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->round < 9); unsigned round = round_table[I->round]; assert(round < 2); return 0xbc640 | (src0 << 0) | (src1 << 3) | (round << 12); } static inline unsigned bi_pack_add_hadd_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->round < 9); unsigned round = round_table[I->round]; assert(round < 2); return 0xbc6c0 | (src0 << 0) | (src1 << 3) | (round << 12); } static inline unsigned bi_pack_add_hadd_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->round < 9); unsigned round = round_table[I->round]; assert(round < 2); static uint8_t swap1_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned swap1 = swap1_table[I->src[1].swizzle]; assert(swap1 < 2); static uint8_t swap0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swap0 = swap0_table[I->src[0].swizzle]; assert(swap0 < 2); return 0xbc840 | (src0 << 0) | (src1 << 3) | (round << 12) | (swap1 << 9) | (swap0 << 10); } static inline unsigned bi_pack_add_hadd_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->round < 9); unsigned round = round_table[I->round]; assert(round < 2); static uint8_t swap1_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned swap1 = swap1_table[I->src[1].swizzle]; assert(swap1 < 2); static uint8_t swap0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swap0 = swap0_table[I->src[0].swizzle]; assert(swap0 < 2); return 0xbc8c0 | (src0 << 0) | (src1 << 3) | (round << 12) | (swap1 << 9) | (swap0 << 10); } static inline unsigned bi_pack_add_hadd_v4s8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->round < 9); unsigned round = round_table[I->round]; assert(round < 2); return 0xbc440 | (src0 << 0) | (src1 << 3) | (round << 12); } static inline unsigned bi_pack_add_hadd_v4u8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->round < 9); unsigned round = round_table[I->round]; assert(round < 2); return 0xbc4c0 | (src0 << 0) | (src1 << 3) | (round << 12); } static inline unsigned bi_pack_add_iabs_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x3dea0 | (src0 << 0); } static inline unsigned bi_pack_add_iabs_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); return 0x3de88 | (src0 << 0) | (swz0 << 4); } static inline unsigned bi_pack_add_iabs_v4s8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x3deb0 | (src0 << 0); } static inline unsigned bi_pack_add_iadd_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned saturate = I->saturate; assert(saturate < 2); static uint8_t lanes1_table[] = { 1, 0, ~0, 2, 3, 4, 5, 6, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lanes1 = lanes1_table[I->src[1].swizzle]; assert(lanes1 < 8); if (lanes1 == 0) { return 0xbc600 | (src0 << 0) | (src1 << 3) | (saturate << 8); } else if ((lanes1 == 1) || (lanes1 == 2)) { unsigned derived_9 = 0; if (lanes1 == 1) derived_9 = 0; else if (lanes1 == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbec00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { unsigned derived_9 = 0; if (lanes1 == 3) derived_9 = 0; else if (lanes1 == 4) derived_9 = 1; else if (lanes1 == 5) derived_9 = 2; else if (lanes1 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xbe000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else { unreachable("No matching state found in add_iadd_s32"); } } static inline unsigned bi_pack_add_iadd_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned saturate = I->saturate; assert(saturate < 2); static uint8_t lanes1_table[] = { 1, 0, ~0, 2, 3, 4, 5, 6, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lanes1 = lanes1_table[I->src[1].swizzle]; assert(lanes1 < 8); if (lanes1 == 0) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0xbc600 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); } else if ((lanes1 == 1) || (lanes1 == 2)) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 1) derived_9 = 0; else if (lanes1 == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbec00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 3) derived_9 = 0; else if (lanes1 == 4) derived_9 = 1; else if (lanes1 == 5) derived_9 = 2; else if (lanes1 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xbe000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else { unreachable("No matching state found in add_iadd_u32"); } } static inline unsigned bi_pack_add_iadd_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned saturate = I->saturate; assert(saturate < 2); static uint8_t lanes0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lanes0 = lanes0_table[I->src[0].swizzle]; assert(lanes0 < 2); static uint8_t lanes1_table[] = { 2, 0, 1, 3, ~0, ~0, ~0, ~0, 4, 5, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lanes1 = lanes1_table[I->src[1].swizzle]; assert(lanes1 < 8); if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { unsigned derived_9 = 0; if (lanes1 == 0) derived_9 = 0; else if (lanes1 == 1) derived_9 = 1; else unreachable("No pattern match at pos 9"); unsigned derived_10 = 0; if (lanes0 == 0) derived_10 = 0; else if (lanes0 == 1) derived_10 = 1; else unreachable("No pattern match at pos 10"); return 0xbc800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9) | (derived_10 << 10); } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { unsigned derived_9 = 0; if (lanes1 == 2) derived_9 = 0; else if (lanes1 == 3) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbec40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { unsigned derived_9 = 0; if (lanes1 == 4) derived_9 = 0; else if (lanes1 == 5) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbe800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else { unreachable("No matching state found in add_iadd_v2s16"); } } static inline unsigned bi_pack_add_iadd_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned saturate = I->saturate; assert(saturate < 2); static uint8_t lanes0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lanes0 = lanes0_table[I->src[0].swizzle]; assert(lanes0 < 2); static uint8_t lanes1_table[] = { 2, 0, 1, 3, ~0, ~0, ~0, ~0, 4, 5, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lanes1 = lanes1_table[I->src[1].swizzle]; assert(lanes1 < 8); if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 0) derived_9 = 0; else if (lanes1 == 1) derived_9 = 1; else unreachable("No pattern match at pos 9"); unsigned derived_10 = 0; if (lanes0 == 0) derived_10 = 0; else if (lanes0 == 1) derived_10 = 1; else unreachable("No pattern match at pos 10"); return 0xbc800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9) | (derived_10 << 10); } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 2) derived_9 = 0; else if (lanes1 == 3) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbec40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 4) derived_9 = 0; else if (lanes1 == 5) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbe800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else { unreachable("No matching state found in add_iadd_v2u16"); } } static inline unsigned bi_pack_add_iadd_v4s8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned saturate = I->saturate; assert(saturate < 2); static uint8_t lanes0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lanes0 = lanes0_table[I->src[0].swizzle]; assert(lanes0 < 8); static uint8_t lanes1_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lanes1 = lanes1_table[I->src[1].swizzle]; assert(lanes1 < 8); if ((lanes0 == 0) && (lanes1 == 0)) { return 0xbc400 | (src0 << 0) | (src1 << 3) | (saturate << 8); } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { unsigned derived_9 = 0; if (lanes1 == 1) derived_9 = 0; else if (lanes1 == 2) derived_9 = 1; else if (lanes1 == 3) derived_9 = 2; else if (lanes1 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xbe040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { unsigned derived_9 = 0; if (lanes1 == 5) derived_9 = 0; else if (lanes1 == 6) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbe840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else { unreachable("No matching state found in add_iadd_v4s8"); } } static inline unsigned bi_pack_add_iadd_v4u8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned saturate = I->saturate; assert(saturate < 2); static uint8_t lanes0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lanes0 = lanes0_table[I->src[0].swizzle]; assert(lanes0 < 8); static uint8_t lanes1_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lanes1 = lanes1_table[I->src[1].swizzle]; assert(lanes1 < 8); if ((lanes0 == 0) && (lanes1 == 0)) { unsigned derived_7 = 0; if (saturate == 0) derived_7 = 0; else if (saturate == 1) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0xbc400 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { unsigned derived_7 = 0; if (saturate == 0) derived_7 = 0; else if (saturate == 1) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 1) derived_9 = 0; else if (lanes1 == 2) derived_9 = 1; else if (lanes1 == 3) derived_9 = 2; else if (lanes1 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xbe040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { unsigned derived_7 = 0; if (saturate == 0) derived_7 = 0; else if (saturate == 1) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 5) derived_9 = 0; else if (lanes1 == 6) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbe840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else { unreachable("No matching state found in add_iadd_v4u8"); } } static inline unsigned bi_pack_add_icmp_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; assert(I->result_type < 4); unsigned result_type = result_type_table[I->result_type]; assert(result_type < 2); static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 2); return 0x7b300 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); } static inline unsigned bi_pack_add_icmp_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; assert(I->result_type < 4); unsigned result_type = result_type_table[I->result_type]; assert(result_type < 2); static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 2) cmpf = 0; else if (cmpf == 3) cmpf = 1; } unsigned derived_6 = 0; if (cmpf == 0) derived_6 = 0; else if (cmpf == 1) derived_6 = 1; else unreachable("No pattern match at pos 6"); return 0x7b200 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); } static inline unsigned bi_pack_add_icmp_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; assert(I->result_type < 4); unsigned result_type = result_type_table[I->result_type]; assert(result_type < 2); static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 2) cmpf = 0; else if (cmpf == 3) cmpf = 1; } unsigned derived_6 = 0; if (cmpf == 0) derived_6 = 0; else if (cmpf == 1) derived_6 = 1; else unreachable("No pattern match at pos 6"); return 0x7b280 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); } static inline unsigned bi_pack_add_icmp_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned swz1 = swz1_table[I->src[1].swizzle]; assert(swz1 < 4); static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; assert(I->result_type < 4); unsigned result_type = result_type_table[I->result_type]; assert(result_type < 2); static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 2); return 0x7a000 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (cmpf << 11); } static inline unsigned bi_pack_add_icmp_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned swz1 = swz1_table[I->src[1].swizzle]; assert(swz1 < 4); static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; assert(I->result_type < 4); unsigned result_type = result_type_table[I->result_type]; assert(result_type < 2); static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } if (cmpf == 2) cmpf = 0; else if (cmpf == 3) cmpf = 1; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 0; else if (cmpf == 1) derived_12 = 1; else unreachable("No pattern match at pos 12"); return 0x78000 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (derived_12 << 12); } static inline unsigned bi_pack_add_icmp_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned swz1 = swz1_table[I->src[1].swizzle]; assert(swz1 < 4); static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; assert(I->result_type < 4); unsigned result_type = result_type_table[I->result_type]; assert(result_type < 2); static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } if (cmpf == 2) cmpf = 0; else if (cmpf == 3) cmpf = 1; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 0; else if (cmpf == 1) derived_12 = 1; else unreachable("No pattern match at pos 12"); return 0x78800 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (derived_12 << 12); } static inline unsigned bi_pack_add_icmp_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; assert(I->result_type < 4); unsigned result_type = result_type_table[I->result_type]; assert(result_type < 2); static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 2); return 0x7b100 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); } static inline unsigned bi_pack_add_icmp_v4s8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; assert(I->result_type < 4); unsigned result_type = result_type_table[I->result_type]; assert(result_type < 2); static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 2) cmpf = 0; else if (cmpf == 3) cmpf = 1; } unsigned derived_6 = 0; if (cmpf == 0) derived_6 = 0; else if (cmpf == 1) derived_6 = 1; else unreachable("No pattern match at pos 6"); return 0x7b000 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); } static inline unsigned bi_pack_add_icmp_v4u8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; assert(I->result_type < 4); unsigned result_type = result_type_table[I->result_type]; assert(result_type < 2); static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 2) cmpf = 0; else if (cmpf == 3) cmpf = 1; } unsigned derived_6 = 0; if (cmpf == 0) derived_6 = 0; else if (cmpf == 1) derived_6 = 1; else unreachable("No pattern match at pos 6"); return 0x7b080 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); } static inline unsigned bi_pack_add_icmpf_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x7be00 | (src0 << 0) | (src1 << 3) | (src2 << 6); } static inline unsigned bi_pack_add_icmpi_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; assert(I->result_type < 4); unsigned result_type = result_type_table[I->result_type]; assert(result_type < 2); static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 2); return 0x7b900 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); } static inline unsigned bi_pack_add_icmpi_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; assert(I->result_type < 4); unsigned result_type = result_type_table[I->result_type]; assert(result_type < 2); static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 2); return 0x7b800 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); } static inline unsigned bi_pack_add_icmpi_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; assert(I->result_type < 4); unsigned result_type = result_type_table[I->result_type]; assert(result_type < 2); static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->cmpf < 9); unsigned cmpf = cmpf_table[I->cmpf]; assert(cmpf < 2); return 0x7b880 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); } static inline unsigned bi_pack_add_icmpm_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x7ba00 | (src0 << 0) | (src1 << 3) | (src2 << 6); } static inline unsigned bi_pack_add_ilogb_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); return 0x3d9e0 | (src0 << 0) | (widen0 << 3); } static inline unsigned bi_pack_add_ilogb_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); return 0x3d9c0 | (src0 << 0) | (swz0 << 3); } static inline unsigned bi_pack_add_imov_fma(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned threads = I->threads; assert(threads < 2); return 0xd7820 | (threads << 3); } static inline unsigned bi_pack_add_isub_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned saturate = I->saturate; assert(saturate < 2); static uint8_t lanes1_table[] = { 1, 0, ~0, 2, 3, 4, 5, 6, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lanes1 = lanes1_table[I->src[1].swizzle]; assert(lanes1 < 8); if (lanes1 == 0) { return 0xbd600 | (src0 << 0) | (src1 << 3) | (saturate << 8); } else if ((lanes1 == 1) || (lanes1 == 2)) { unsigned derived_9 = 0; if (lanes1 == 1) derived_9 = 0; else if (lanes1 == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbfc00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { unsigned derived_9 = 0; if (lanes1 == 3) derived_9 = 0; else if (lanes1 == 4) derived_9 = 1; else if (lanes1 == 5) derived_9 = 2; else if (lanes1 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xbf000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else { unreachable("No matching state found in add_isub_s32"); } } static inline unsigned bi_pack_add_isub_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned saturate = I->saturate; assert(saturate < 2); static uint8_t lanes1_table[] = { 1, 0, ~0, 2, 3, 4, 5, 6, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lanes1 = lanes1_table[I->src[1].swizzle]; assert(lanes1 < 8); if (lanes1 == 0) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0xbd600 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); } else if ((lanes1 == 1) || (lanes1 == 2)) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 1) derived_9 = 0; else if (lanes1 == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbfc00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 3) derived_9 = 0; else if (lanes1 == 4) derived_9 = 1; else if (lanes1 == 5) derived_9 = 2; else if (lanes1 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xbf000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else { unreachable("No matching state found in add_isub_u32"); } } static inline unsigned bi_pack_add_isub_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned saturate = I->saturate; assert(saturate < 2); static uint8_t lanes0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lanes0 = lanes0_table[I->src[0].swizzle]; assert(lanes0 < 2); static uint8_t lanes1_table[] = { 2, 0, 1, 3, ~0, ~0, ~0, ~0, 4, 5, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lanes1 = lanes1_table[I->src[1].swizzle]; assert(lanes1 < 8); if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { unsigned derived_9 = 0; if (lanes1 == 0) derived_9 = 0; else if (lanes1 == 1) derived_9 = 1; else unreachable("No pattern match at pos 9"); unsigned derived_10 = 0; if (lanes0 == 0) derived_10 = 0; else if (lanes0 == 1) derived_10 = 1; else unreachable("No pattern match at pos 10"); return 0xbd800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9) | (derived_10 << 10); } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { unsigned derived_9 = 0; if (lanes1 == 2) derived_9 = 0; else if (lanes1 == 3) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbfc40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { unsigned derived_9 = 0; if (lanes1 == 4) derived_9 = 0; else if (lanes1 == 5) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbf800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else { unreachable("No matching state found in add_isub_v2s16"); } } static inline unsigned bi_pack_add_isub_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned saturate = I->saturate; assert(saturate < 2); static uint8_t lanes0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lanes0 = lanes0_table[I->src[0].swizzle]; assert(lanes0 < 2); static uint8_t lanes1_table[] = { 2, 0, 1, 3, ~0, ~0, ~0, ~0, 4, 5, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lanes1 = lanes1_table[I->src[1].swizzle]; assert(lanes1 < 8); if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 0) derived_9 = 0; else if (lanes1 == 1) derived_9 = 1; else unreachable("No pattern match at pos 9"); unsigned derived_10 = 0; if (lanes0 == 0) derived_10 = 0; else if (lanes0 == 1) derived_10 = 1; else unreachable("No pattern match at pos 10"); return 0xbd800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9) | (derived_10 << 10); } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 2) derived_9 = 0; else if (lanes1 == 3) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbfc40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 4) derived_9 = 0; else if (lanes1 == 5) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbf800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else { unreachable("No matching state found in add_isub_v2u16"); } } static inline unsigned bi_pack_add_isub_v4s8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned saturate = I->saturate; assert(saturate < 2); static uint8_t lanes0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lanes0 = lanes0_table[I->src[0].swizzle]; assert(lanes0 < 8); static uint8_t lanes1_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lanes1 = lanes1_table[I->src[1].swizzle]; assert(lanes1 < 8); if ((lanes0 == 0) && (lanes1 == 0)) { return 0xbd400 | (src0 << 0) | (src1 << 3) | (saturate << 8); } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { unsigned derived_9 = 0; if (lanes1 == 1) derived_9 = 0; else if (lanes1 == 2) derived_9 = 1; else if (lanes1 == 3) derived_9 = 2; else if (lanes1 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xbf040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { unsigned derived_9 = 0; if (lanes1 == 5) derived_9 = 0; else if (lanes1 == 6) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbf840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else { unreachable("No matching state found in add_isub_v4s8"); } } static inline unsigned bi_pack_add_isub_v4u8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned saturate = I->saturate; assert(saturate < 2); static uint8_t lanes0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lanes0 = lanes0_table[I->src[0].swizzle]; assert(lanes0 < 8); static uint8_t lanes1_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lanes1 = lanes1_table[I->src[1].swizzle]; assert(lanes1 < 8); if ((lanes0 == 0) && (lanes1 == 0)) { unsigned derived_7 = 0; if (saturate == 0) derived_7 = 0; else if (saturate == 1) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0xbd400 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { unsigned derived_7 = 0; if (saturate == 0) derived_7 = 0; else if (saturate == 1) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 1) derived_9 = 0; else if (lanes1 == 2) derived_9 = 1; else if (lanes1 == 3) derived_9 = 2; else if (lanes1 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xbf040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { unsigned derived_7 = 0; if (saturate == 0) derived_7 = 0; else if (saturate == 1) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 5) derived_9 = 0; else if (lanes1 == 6) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbf840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else { unreachable("No matching state found in add_isub_v4u8"); } } static inline unsigned bi_pack_add_jump(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src0) & 0xf7); return 0x6fe34 | (src0 << 6); } static inline unsigned bi_pack_add_ldexp_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t round_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; assert(I->round < 9); unsigned round = round_table[I->round]; assert(round < 8); return 0x74c00 | (src0 << 0) | (src1 << 3) | (round << 6); } static inline unsigned bi_pack_add_ldexp_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t round_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; assert(I->round < 9); unsigned round = round_table[I->round]; assert(round < 8); return 0x74e00 | (src0 << 0) | (src1 << 3) | (round << 6); } static inline unsigned bi_pack_add_ld_attr(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned register_format = I->register_format; assert(register_format < 16); unsigned vecsize = I->vecsize; assert(vecsize < 4); if (register_format != 8) { unsigned derived_13 = 0; if (register_format == 0) derived_13 = 0; else if (register_format == 1) derived_13 = 1; else if (register_format == 2) derived_13 = 2; else if (register_format == 3) derived_13 = 3; else if (register_format == 4) derived_13 = 4; else if (register_format == 5) derived_13 = 5; else if (register_format == 6) derived_13 = 6; else if (register_format == 7) derived_13 = 7; else unreachable("No pattern match at pos 13"); return 0x40400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11) | (derived_13 << 13); } else if (register_format == 8) { return 0xc4400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11); } else { unreachable("No matching state found in add_ld_attr"); } } static inline unsigned bi_pack_add_ld_attr_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned register_format = I->register_format; assert(register_format < 16); unsigned vecsize = I->vecsize; assert(vecsize < 4); unsigned attribute_index = I->attribute_index; assert(attribute_index < 0x10); if (register_format != 8) { unsigned derived_13 = 0; if (register_format == 0) derived_13 = 0; else if (register_format == 1) derived_13 = 1; else if (register_format == 2) derived_13 = 2; else if (register_format == 3) derived_13 = 3; else if (register_format == 4) derived_13 = 4; else if (register_format == 5) derived_13 = 5; else if (register_format == 6) derived_13 = 6; else if (register_format == 7) derived_13 = 7; else unreachable("No pattern match at pos 13"); return 0x40000 | (src0 << 0) | (src1 << 3) | (vecsize << 11) | (attribute_index << 6) | (derived_13 << 13); } else if (register_format == 8) { return 0xc4000 | (src0 << 0) | (src1 << 3) | (vecsize << 11) | (attribute_index << 6); } else { unreachable("No matching state found in add_ld_attr_imm"); } } static inline unsigned bi_pack_add_ld_attr_tex(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned register_format = I->register_format; assert(register_format < 16); unsigned vecsize = I->vecsize; assert(vecsize < 4); if (register_format != 8) { unsigned derived_13 = 0; if (register_format == 0) derived_13 = 0; else if (register_format == 1) derived_13 = 1; else if (register_format == 2) derived_13 = 2; else if (register_format == 3) derived_13 = 3; else if (register_format == 4) derived_13 = 4; else if (register_format == 5) derived_13 = 5; else if (register_format == 6) derived_13 = 6; else if (register_format == 7) derived_13 = 7; else unreachable("No pattern match at pos 13"); return 0x40600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11) | (derived_13 << 13); } else if (register_format == 8) { return 0xc4600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11); } else { unreachable("No matching state found in add_ld_attr_tex"); } } static inline unsigned bi_pack_add_ld_cvt(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src2) & 0xf7); unsigned vecsize = I->vecsize; assert(vecsize < 4); return 0xc9000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); } static inline unsigned bi_pack_add_ld_gclk_u64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t source_table[] = { 0, 6, 7 }; assert(I->source < 3); unsigned source = source_table[I->source]; assert(source < 8); return 0xd7800 | (source << 0); } static inline unsigned bi_pack_add_ld_tile(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src2) & 0xf7); unsigned vecsize = I->vecsize; assert(vecsize < 4); return 0xcb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); } static inline unsigned bi_pack_add_ld_var(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned vecsize = I->vecsize; assert(vecsize < 4); unsigned update = I->update; assert(update < 4); static uint8_t register_format_table[] = { 1, 0, ~0, ~0, ~0, ~0, ~0, ~0, 2, 3 }; assert(I->register_format < 10); unsigned register_format = register_format_table[I->register_format]; assert(register_format < 4); unsigned sample = I->sample; assert(sample < 8); if (register_format != 2) { unsigned derived_19 = 0; if (register_format == 0) derived_19 = 0; else if (register_format == 1) derived_19 = 1; else unreachable("No pattern match at pos 19"); unsigned derived_10 = 0; if ((sample == 0) && (update == 0)) derived_10 = 0; else if ((sample == 1) && (update == 0)) derived_10 = 1; else if ((sample == 2) && (update == 0)) derived_10 = 2; else if ((sample == 3) && (update == 0)) derived_10 = 3; else if ((sample == 4) && (update == 1)) derived_10 = 4; else if ((sample == 0) && (update == 2)) derived_10 = 8; else if ((sample == 1) && (update == 2)) derived_10 = 9; else if ((sample == 0) && (update == 3)) derived_10 = 10; else if ((sample == 1) && (update == 3)) derived_10 = 11; else if ((sample == 2) && (update == 3)) derived_10 = 12; else if ((sample == 3) && (update == 3)) derived_10 = 13; else unreachable("No pattern match at pos 10"); return 0x500c0 | (src0 << 0) | (src1 << 3) | (vecsize << 8) | (derived_19 << 19) | (derived_10 << 10); } else if (register_format == 2) { unsigned derived_10 = 0; if ((sample == 0) && (update == 0)) derived_10 = 0; else if ((sample == 1) && (update == 0)) derived_10 = 1; else if ((sample == 2) && (update == 0)) derived_10 = 2; else if ((sample == 3) && (update == 0)) derived_10 = 3; else if ((sample == 4) && (update == 1)) derived_10 = 4; else if ((sample == 0) && (update == 2)) derived_10 = 8; else if ((sample == 1) && (update == 2)) derived_10 = 9; else if ((sample == 0) && (update == 3)) derived_10 = 10; else if ((sample == 1) && (update == 3)) derived_10 = 11; else if ((sample == 2) && (update == 3)) derived_10 = 12; else if ((sample == 3) && (update == 3)) derived_10 = 13; else unreachable("No pattern match at pos 10"); return 0xcc0c0 | (src0 << 0) | (src1 << 3) | (vecsize << 8) | (derived_10 << 10); } else { unreachable("No matching state found in add_ld_var"); } } static inline unsigned bi_pack_add_ld_var_flat(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned vecsize = I->vecsize; assert(vecsize < 4); static uint8_t register_format_table[] = { 1, 0, 3, 2, ~0, ~0, ~0, ~0, 4, 5 }; assert(I->register_format < 10); unsigned register_format = register_format_table[I->register_format]; assert(register_format < 8); static uint8_t function_table[] = { 0, 3, 6, 7 }; assert(I->function < 4); unsigned function = function_table[I->function]; assert(function < 8); if (register_format != 4) { unsigned derived_10 = 0; if ((register_format == 0) || (register_format == 1)) derived_10 = 0; else if ((register_format == 2) || (register_format == 3)) derived_10 = 1; else unreachable("No pattern match at pos 10"); unsigned derived_19 = 0; if ((register_format == 0) || (register_format == 2)) derived_19 = 0; else if ((register_format == 1) || (register_format == 3)) derived_19 = 1; else unreachable("No pattern match at pos 19"); return 0x538c0 | (src0 << 3) | (vecsize << 8) | (function << 0) | (derived_10 << 10) | (derived_19 << 19); } else if (register_format == 4) { return 0xcf8c0 | (src0 << 3) | (vecsize << 8) | (function << 0); } else { unreachable("No matching state found in add_ld_var_flat"); } } static inline unsigned bi_pack_add_ld_var_flat_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned vecsize = I->vecsize; assert(vecsize < 4); static uint8_t register_format_table[] = { 1, 0, 3, 2, ~0, ~0, ~0, ~0, 4, 5 }; assert(I->register_format < 10); unsigned register_format = register_format_table[I->register_format]; assert(register_format < 8); static uint8_t function_table[] = { 0, 3, 6, 7 }; assert(I->function < 4); unsigned function = function_table[I->function]; assert(function < 8); unsigned index = I->index; assert(index < 0x20); if (register_format != 4) { unsigned derived_10 = 0; if ((register_format == 0) || (register_format == 1)) derived_10 = 0; else if ((register_format == 2) || (register_format == 3)) derived_10 = 1; else unreachable("No pattern match at pos 10"); unsigned derived_19 = 0; if ((register_format == 0) || (register_format == 2)) derived_19 = 0; else if ((register_format == 1) || (register_format == 3)) derived_19 = 1; else unreachable("No pattern match at pos 19"); return 0x53800 | (vecsize << 8) | (function << 0) | (index << 3) | (derived_10 << 10) | (derived_19 << 19); } else if (register_format == 4) { return 0xcf800 | (vecsize << 8) | (function << 0) | (index << 3); } else { unreachable("No matching state found in add_ld_var_flat_imm"); } } static inline unsigned bi_pack_add_ld_var_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned vecsize = I->vecsize; assert(vecsize < 4); unsigned update = I->update; assert(update < 4); static uint8_t register_format_table[] = { 1, 0, ~0, ~0, ~0, ~0, ~0, ~0, 2, 3 }; assert(I->register_format < 10); unsigned register_format = register_format_table[I->register_format]; assert(register_format < 4); unsigned sample = I->sample; assert(sample < 8); unsigned index = I->index; assert(index < 0x20); if (register_format != 2) { unsigned derived_19 = 0; if (register_format == 0) derived_19 = 0; else if (register_format == 1) derived_19 = 1; else unreachable("No pattern match at pos 19"); unsigned derived_10 = 0; if ((sample == 0) && (update == 0)) derived_10 = 0; else if ((sample == 1) && (update == 0)) derived_10 = 1; else if ((sample == 2) && (update == 0)) derived_10 = 2; else if ((sample == 3) && (update == 0)) derived_10 = 3; else if ((sample == 4) && (update == 1)) derived_10 = 4; else if ((sample == 0) && (update == 2)) derived_10 = 8; else if ((sample == 1) && (update == 2)) derived_10 = 9; else if ((sample == 0) && (update == 3)) derived_10 = 10; else if ((sample == 1) && (update == 3)) derived_10 = 11; else if ((sample == 2) && (update == 3)) derived_10 = 12; else if ((sample == 3) && (update == 3)) derived_10 = 13; else unreachable("No pattern match at pos 10"); return 0x50000 | (src0 << 0) | (vecsize << 8) | (index << 3) | (derived_19 << 19) | (derived_10 << 10); } else if (register_format == 2) { unsigned derived_10 = 0; if ((sample == 0) && (update == 0)) derived_10 = 0; else if ((sample == 1) && (update == 0)) derived_10 = 1; else if ((sample == 2) && (update == 0)) derived_10 = 2; else if ((sample == 3) && (update == 0)) derived_10 = 3; else if ((sample == 4) && (update == 1)) derived_10 = 4; else if ((sample == 0) && (update == 2)) derived_10 = 8; else if ((sample == 1) && (update == 2)) derived_10 = 9; else if ((sample == 0) && (update == 3)) derived_10 = 10; else if ((sample == 1) && (update == 3)) derived_10 = 11; else if ((sample == 2) && (update == 3)) derived_10 = 12; else if ((sample == 3) && (update == 3)) derived_10 = 13; else unreachable("No pattern match at pos 10"); return 0xcc000 | (src0 << 0) | (vecsize << 8) | (index << 3) | (derived_10 << 10); } else { unreachable("No matching state found in add_ld_var_imm"); } } static inline unsigned bi_pack_add_ld_var_special(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned varying_name = I->varying_name; assert(varying_name < 32); unsigned vecsize = I->vecsize; assert(vecsize < 4); unsigned update = I->update; assert(update < 4); static uint8_t register_format_table[] = { 1, 0, ~0, ~0, ~0, ~0, ~0, ~0, 2, 3 }; assert(I->register_format < 10); unsigned register_format = register_format_table[I->register_format]; assert(register_format < 4); unsigned sample = I->sample; assert(sample < 8); if (register_format != 2) { unsigned derived_3 = 0; if ((varying_name == 0) && (vecsize == 1) && (update == 3)) derived_3 = 0; else if ((varying_name == 2) && (vecsize == 0) && (update == 3)) derived_3 = 2; else if ((varying_name == 3) && (vecsize == 0) && (update == 3) && (sample != 3) && (register_format != 2)) derived_3 = 3; else unreachable("No pattern match at pos 3"); unsigned derived_19 = 0; if (register_format == 0) derived_19 = 0; else if (register_format == 1) derived_19 = 1; else unreachable("No pattern match at pos 19"); unsigned derived_10 = 0; if ((sample == 0) && (update == 0)) derived_10 = 0; else if ((sample == 1) && (update == 0)) derived_10 = 1; else if ((sample == 2) && (update == 0)) derived_10 = 2; else if ((sample == 3) && (update == 0)) derived_10 = 3; else if ((sample == 4) && (update == 1)) derived_10 = 4; else if ((sample == 0) && (update == 2)) derived_10 = 8; else if ((sample == 1) && (update == 2)) derived_10 = 9; else if ((sample == 0) && (update == 3)) derived_10 = 10; else if ((sample == 1) && (update == 3)) derived_10 = 11; else if ((sample == 2) && (update == 3)) derived_10 = 12; else if ((sample == 3) && (update == 3)) derived_10 = 13; else unreachable("No pattern match at pos 10"); return 0x500a0 | (src0 << 0) | (derived_3 << 3) | (derived_19 << 19) | (derived_10 << 10); } else if (register_format == 2) { unsigned derived_3 = 0; if ((varying_name == 0) && (vecsize == 1) && (update == 3)) derived_3 = 0; else if ((varying_name == 2) && (vecsize == 0) && (update == 3)) derived_3 = 2; else if ((varying_name == 3) && (vecsize == 0) && (update == 3) && (sample != 3) && (register_format != 2)) derived_3 = 3; else unreachable("No pattern match at pos 3"); unsigned derived_10 = 0; if ((sample == 0) && (update == 0)) derived_10 = 0; else if ((sample == 1) && (update == 0)) derived_10 = 1; else if ((sample == 2) && (update == 0)) derived_10 = 2; else if ((sample == 3) && (update == 0)) derived_10 = 3; else if ((sample == 4) && (update == 1)) derived_10 = 4; else if ((sample == 0) && (update == 2)) derived_10 = 8; else if ((sample == 1) && (update == 2)) derived_10 = 9; else if ((sample == 0) && (update == 3)) derived_10 = 10; else if ((sample == 1) && (update == 3)) derived_10 = 11; else if ((sample == 2) && (update == 3)) derived_10 = 12; else if ((sample == 3) && (update == 3)) derived_10 = 13; else unreachable("No pattern match at pos 10"); return 0xcc0a0 | (src0 << 0) | (derived_3 << 3) | (derived_10 << 10); } else { unreachable("No matching state found in add_ld_var_special"); } } static inline unsigned bi_pack_add_lea_attr(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned register_format = I->register_format; assert(register_format < 16); if (register_format != 8) { unsigned derived_11 = 0; if (register_format == 0) derived_11 = 0; else if (register_format == 1) derived_11 = 1; else if (register_format == 2) derived_11 = 2; else if (register_format == 3) derived_11 = 3; else if (register_format == 4) derived_11 = 4; else if (register_format == 5) derived_11 = 5; else if (register_format == 6) derived_11 = 6; else if (register_format == 7) derived_11 = 7; else unreachable("No pattern match at pos 11"); return 0xc0400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_11 << 11); } else if (register_format == 8) { return 0xc8400 | (src0 << 0) | (src1 << 3) | (src2 << 6); } else { unreachable("No matching state found in add_lea_attr"); } } static inline unsigned bi_pack_add_lea_attr_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned register_format = I->register_format; assert(register_format < 16); unsigned attribute_index = I->attribute_index; assert(attribute_index < 0x10); if (register_format != 8) { unsigned derived_11 = 0; if (register_format == 0) derived_11 = 0; else if (register_format == 1) derived_11 = 1; else if (register_format == 2) derived_11 = 2; else if (register_format == 3) derived_11 = 3; else if (register_format == 4) derived_11 = 4; else if (register_format == 5) derived_11 = 5; else if (register_format == 6) derived_11 = 6; else if (register_format == 7) derived_11 = 7; else unreachable("No pattern match at pos 11"); return 0xc0000 | (src0 << 0) | (src1 << 3) | (attribute_index << 6) | (derived_11 << 11); } else if (register_format == 8) { return 0xc8000 | (src0 << 0) | (src1 << 3) | (attribute_index << 6); } else { unreachable("No matching state found in add_lea_attr_imm"); } } static inline unsigned bi_pack_add_lea_attr_tex(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned register_format = I->register_format; assert(register_format < 16); if (register_format != 8) { unsigned derived_11 = 0; if (register_format == 0) derived_11 = 0; else if (register_format == 1) derived_11 = 1; else if (register_format == 2) derived_11 = 2; else if (register_format == 3) derived_11 = 3; else if (register_format == 4) derived_11 = 4; else if (register_format == 5) derived_11 = 5; else if (register_format == 6) derived_11 = 6; else if (register_format == 7) derived_11 = 7; else unreachable("No pattern match at pos 11"); return 0xc0600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_11 << 11); } else if (register_format == 8) { return 0xc8600 | (src0 << 0) | (src1 << 3) | (src2 << 6); } else { unreachable("No matching state found in add_lea_attr_tex"); } } static inline unsigned bi_pack_add_lea_tex(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned format = I->format; assert(format < 2); return 0xd6600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (format << 11); } static inline unsigned bi_pack_add_lea_tex_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned format = I->format; assert(format < 2); unsigned texture_index = I->texture_index; assert(texture_index < 0x20); return 0xd6000 | (src0 << 0) | (src1 << 3) | (format << 11) | (texture_index << 6); } static inline unsigned bi_pack_add_load_i128(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); return 0x61000 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned bi_pack_add_load_i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); static uint8_t lane_dest_table[] = { 0, 2, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->dest->swizzle < 13); unsigned lane_dest = lane_dest_table[I->dest->swizzle]; assert(lane_dest < 4); unsigned extend = I->extend; assert(extend < 4); if ((extend == 0) && ((lane_dest == 0) || (lane_dest == 1))) { unsigned derived_9 = 0; if (lane_dest == 0) derived_9 = 0; else if (lane_dest == 1) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x60800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); } else if ((extend != 0) && (lane_dest == 2)) { unsigned derived_9 = 0; if (extend == 1) derived_9 = 0; else if (extend == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x63000 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); } else if ((extend != 0) && (lane_dest == 3)) { unsigned derived_9 = 0; if (extend == 1) derived_9 = 0; else if (extend == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x61800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); } else { unreachable("No matching state found in add_load_i16"); } } static inline unsigned bi_pack_add_load_i24(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); return 0x65000 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned bi_pack_add_load_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); static uint8_t lane_dest_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->dest->swizzle < 13); unsigned lane_dest = lane_dest_table[I->dest->swizzle]; assert(lane_dest < 2); unsigned extend = I->extend; assert(extend < 4); if ((extend == 0) && (lane_dest == 0)) { return 0x60c00 | (src0 << 0) | (src1 << 3) | (seg << 6); } else if ((extend != 0) && (lane_dest == 1)) { unsigned derived_9 = 0; if (extend == 1) derived_9 = 0; else if (extend == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x61c00 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); } else { unreachable("No matching state found in add_load_i32"); } } static inline unsigned bi_pack_add_load_i48(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); return 0x65200 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned bi_pack_add_load_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); return 0x60e00 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned bi_pack_add_load_i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); static uint8_t lane_dest_table[] = { 4, 6, ~0, 5, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; assert(I->dest->swizzle < 13); unsigned lane_dest = lane_dest_table[I->dest->swizzle]; assert(lane_dest < 8); unsigned extend = I->extend; assert(extend < 4); if ((extend == 0) && ((lane_dest == 0) || (lane_dest == 1) || (lane_dest == 2) || (lane_dest == 3))) { unsigned derived_9 = 0; if (lane_dest == 0) derived_9 = 0; else if (lane_dest == 1) derived_9 = 1; else if (lane_dest == 2) derived_9 = 2; else if (lane_dest == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x60000 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); } else if ((extend != 0) && ((lane_dest == 4) || (lane_dest == 5))) { unsigned derived_9 = 0; if (extend == 1) derived_9 = 0; else if (extend == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); unsigned derived_10 = 0; if (lane_dest == 4) derived_10 = 0; else if (lane_dest == 5) derived_10 = 1; else unreachable("No pattern match at pos 10"); return 0x63800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9) | (derived_10 << 10); } else if ((extend != 0) && (lane_dest == 6)) { unsigned derived_9 = 0; if (extend == 1) derived_9 = 0; else if (extend == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x63400 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); } else if ((extend != 0) && (lane_dest == 7)) { unsigned derived_9 = 0; if (extend == 1) derived_9 = 0; else if (extend == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x61400 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); } else { unreachable("No matching state found in add_load_i8"); } } static inline unsigned bi_pack_add_load_i96(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); return 0x65400 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned bi_pack_add_logb_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned widen0 = widen0_table[I->src[0].swizzle]; assert(widen0 < 4); return 0x3d9a0 | (src0 << 0) | (widen0 << 3); } static inline unsigned bi_pack_add_logb_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); return 0x3d980 | (src0 << 0) | (swz0 << 3); } static inline unsigned bi_pack_add_mkvec_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); static uint8_t lane1_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned lane1 = lane1_table[I->src[1].swizzle]; assert(lane1 < 2); return 0x75300 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (lane1 << 7); } static inline unsigned bi_pack_add_mov_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x3d968 | (src0 << 0); } static inline unsigned bi_pack_add_mux_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned mux = I->mux; assert(mux < 4); return 0x74000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9); } static inline unsigned bi_pack_add_mux_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned mux = I->mux; assert(mux < 4); static uint8_t swap2_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[2].swizzle < 13); unsigned swap2 = swap2_table[I->src[2].swizzle]; assert(swap2 < 2); static uint8_t swap1_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[1].swizzle < 13); unsigned swap1 = swap1_table[I->src[1].swizzle]; assert(swap1 < 2); static uint8_t swap0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swap0 = swap0_table[I->src[0].swizzle]; assert(swap0 < 2); return 0x70000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9) | (swap2 << 11) | (swap1 << 12) | (swap0 << 13); } static inline unsigned bi_pack_add_mux_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned mux = I->mux; assert(mux < 2); return 0x74800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9); } static inline unsigned bi_pack_add_nop(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x3d964; } static inline unsigned bi_pack_add_quiet_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x3d970 | (src0 << 0); } static inline unsigned bi_pack_add_quiet_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); return 0x3d900 | (src0 << 0) | (swz0 << 4); } static inline unsigned bi_pack_add_s16_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); return 0x3cce0 | (src0 << 0) | (lane0 << 4); } static inline unsigned bi_pack_add_s16_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); return 0x3ccc0 | (src0 << 0) | (lane0 << 4); } static inline unsigned bi_pack_add_s32_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned round = I->round; assert(round < 8); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3cbc0 | (src0 << 0) | (derived_4 << 4); } else if (round == 4) { return 0x3cd00 | (src0 << 0); } else { unreachable("No matching state found in add_s32_to_f32"); } } static inline unsigned bi_pack_add_s8_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 4); return 0x3cb80 | (src0 << 0) | (lane0 << 4); } static inline unsigned bi_pack_add_s8_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 4); return 0x3cb40 | (src0 << 0) | (lane0 << 4); } static inline unsigned bi_pack_add_seg_add(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { ~0, 2, 0, ~0, ~0, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); unsigned preserve_null = I->preserve_null; assert(preserve_null < 2); return 0x3d500 | (src0 << 0) | (seg << 3) | (preserve_null << 7); } static inline unsigned bi_pack_add_shaddxh_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0x3f8c0 | (src0 << 0) | (src1 << 3); } static inline unsigned bi_pack_add_shift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { return 0xefe00 | (src0 << 0) | (src1 << 3) | (src2 << 6); } static inline unsigned bi_pack_add_store_i128(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); return 0x61200 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned bi_pack_add_store_i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); return 0x62800 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned bi_pack_add_store_i24(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); return 0x65800 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned bi_pack_add_store_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); return 0x62c00 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned bi_pack_add_store_i48(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); return 0x65a00 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned bi_pack_add_store_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); return 0x62e00 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned bi_pack_add_store_i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); return 0x62000 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned bi_pack_add_store_i96(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; assert(I->seg < 6); unsigned seg = seg_table[I->seg]; assert(seg < 8); return 0x65c00 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned bi_pack_add_st_cvt(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src2) & 0xf7); unsigned vecsize = I->vecsize; assert(vecsize < 4); return 0xc9800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); } static inline unsigned bi_pack_add_st_tile(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src2) & 0xf7); unsigned vecsize = I->vecsize; assert(vecsize < 4); return 0xcb800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); } static inline unsigned bi_pack_add_swz_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t swz0_table[] = { 0, ~0, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); return 0x3d948 | (src0 << 0) | (swz0 << 4); } static inline unsigned bi_pack_add_swz_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t swz0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, 6, 7, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 8); return 0x3df40 | (src0 << 0) | (swz0 << 3); } static inline unsigned bi_pack_add_texc(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { assert((1 << src2) & 0xf7); unsigned skip = I->skip; assert(skip < 2); return 0xd7000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9); } static inline unsigned bi_pack_add_texs_2d_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned skip = I->skip; assert(skip < 2); unsigned lod_mode = I->lod_mode; assert(lod_mode < 2); unsigned texture_index = I->texture_index; assert(texture_index < 0x8); unsigned sampler_index = I->sampler_index; assert(sampler_index < 0x8); return 0xd8000 | (src0 << 0) | (src1 << 3) | (skip << 9) | (lod_mode << 13) | (texture_index << 6) | (sampler_index << 10); } static inline unsigned bi_pack_add_texs_2d_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned skip = I->skip; assert(skip < 2); unsigned lod_mode = I->lod_mode; assert(lod_mode < 2); unsigned texture_index = I->texture_index; assert(texture_index < 0x8); unsigned sampler_index = I->sampler_index; assert(sampler_index < 0x8); return 0x58000 | (src0 << 0) | (src1 << 3) | (skip << 9) | (lod_mode << 13) | (texture_index << 6) | (sampler_index << 10); } static inline unsigned bi_pack_add_texs_cube_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned skip = I->skip; assert(skip < 2); unsigned sampler_index = I->sampler_index; assert(sampler_index < 0x4); unsigned texture_index = I->texture_index; assert(texture_index < 0x4); return 0xdc000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9) | (sampler_index << 10) | (texture_index << 12); } static inline unsigned bi_pack_add_texs_cube_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned skip = I->skip; assert(skip < 2); unsigned sampler_index = I->sampler_index; assert(sampler_index < 0x4); unsigned texture_index = I->texture_index; assert(texture_index < 0x4); return 0x5c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9) | (sampler_index << 10) | (texture_index << 12); } static inline unsigned bi_pack_add_u16_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); return 0x3cce8 | (src0 << 0) | (lane0 << 4); } static inline unsigned bi_pack_add_u16_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 2); return 0x3ccc8 | (src0 << 0) | (lane0 << 4); } static inline unsigned bi_pack_add_u32_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned round = I->round; assert(round < 8); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3cbc8 | (src0 << 0) | (derived_4 << 4); } else if (round == 4) { return 0x3cd08 | (src0 << 0); } else { unreachable("No matching state found in add_u32_to_f32"); } } static inline unsigned bi_pack_add_u8_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 4); return 0x3cb88 | (src0 << 0) | (lane0 << 4); } static inline unsigned bi_pack_add_u8_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned lane0 = lane0_table[I->src[0].swizzle]; assert(lane0 < 4); return 0x3cb48 | (src0 << 0) | (lane0 << 4); } static inline unsigned bi_pack_add_v2f16_to_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned round = I->round; assert(round < 8); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3c200 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); } else if (round == 4) { return 0x3ca80 | (src0 << 0) | (swz0 << 4); } else { unreachable("No matching state found in add_v2f16_to_v2s16"); } } static inline unsigned bi_pack_add_v2f16_to_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned round = I->round; assert(round < 8); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3c208 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); } else if (round == 4) { return 0x3ca88 | (src0 << 0) | (swz0 << 4); } else { unreachable("No matching state found in add_v2f16_to_v2u16"); } } static inline unsigned bi_pack_add_v2f32_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned abs0 = I->src[0].abs; assert(abs0 < 2); unsigned abs1 = I->src[1].abs; assert(abs1 < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); unsigned neg1 = I->src[1].neg; assert(neg1 < 2); unsigned clamp = I->clamp; assert(clamp < 4); unsigned round = I->round; assert(round < 8); unsigned derived_6 = 0; if ((abs0 == 0) && (abs1 == 0)) derived_6 = 0; else if ((abs0 == 1) && (abs1 == 1)) derived_6 = 1; else unreachable("No pattern match at pos 6"); unsigned derived_7 = 0; if ((neg0 == 0) && (neg1 == 0)) derived_7 = 0; else if ((neg0 == 1) && (neg1 == 1)) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0x76000 | (src0 << 0) | (src1 << 3) | (clamp << 8) | (round << 10) | (derived_6 << 6) | (derived_7 << 7); } static inline unsigned bi_pack_add_v2s16_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned round = I->round; assert(round < 8); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3c600 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); } else if (round == 4) { return 0x3cb00 | (src0 << 0) | (swz0 << 4); } else { unreachable("No matching state found in add_v2s16_to_v2f16"); } } static inline unsigned bi_pack_add_v2s8_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t swz0_table[] = { ~0, ~0, ~0, ~0, 0, 5, 10, 15, 4, 14, ~0, ~0, 8 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 16); return 0x3c800 | (src0 << 0) | (swz0 << 4); } static inline unsigned bi_pack_add_v2s8_to_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t swz0_table[] = { ~0, ~0, ~0, ~0, 0, 5, 10, 15, 4, 14, ~0, ~0, 8 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 16); return 0x3c700 | (src0 << 0) | (swz0 << 4); } static inline unsigned bi_pack_add_v2u16_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned round = I->round; assert(round < 8); static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 4); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3c608 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); } else if (round == 4) { return 0x3cb08 | (src0 << 0) | (swz0 << 4); } else { unreachable("No matching state found in add_v2u16_to_v2f16"); } } static inline unsigned bi_pack_add_v2u8_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t swz0_table[] = { ~0, ~0, ~0, ~0, 0, 5, 10, 15, 4, 14, ~0, ~0, 8 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 16); return 0x3c808 | (src0 << 0) | (swz0 << 4); } static inline unsigned bi_pack_add_v2u8_to_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { static uint8_t swz0_table[] = { ~0, ~0, ~0, ~0, 0, 5, 10, 15, 4, 14, ~0, ~0, 8 }; assert(I->src[0].swizzle < 13); unsigned swz0 = swz0_table[I->src[0].swizzle]; assert(swz0 < 16); return 0x3c708 | (src0 << 0) | (swz0 << 4); } static inline unsigned bi_pack_add_var_tex_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned update = I->update; assert(update < 2); unsigned skip = I->skip; assert(skip < 2); unsigned lod_mode = I->lod_mode; assert(lod_mode < 2); static uint8_t sample_table[] = { 0, ~0, ~0, ~0, 1, ~0 }; assert(I->sample < 6); unsigned sample = sample_table[I->sample]; assert(sample < 2); unsigned varying_index = I->varying_index; assert(varying_index < 0x8); unsigned texture_index = I->texture_index; assert(texture_index < 0x4); unsigned derived_5 = 0; if ((sample == 0) && (update == 0)) derived_5 = 0; else if ((sample == 1) && (update == 1)) derived_5 = 1; else unreachable("No pattern match at pos 5"); return 0xca100 | (skip << 7) | (lod_mode << 9) | (varying_index << 0) | (texture_index << 3) | (derived_5 << 5); } static inline unsigned bi_pack_add_var_tex_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned update = I->update; assert(update < 2); unsigned skip = I->skip; assert(skip < 2); unsigned lod_mode = I->lod_mode; assert(lod_mode < 2); static uint8_t sample_table[] = { 0, ~0, ~0, ~0, 1, ~0 }; assert(I->sample < 6); unsigned sample = sample_table[I->sample]; assert(sample < 2); unsigned varying_index = I->varying_index; assert(varying_index < 0x8); unsigned texture_index = I->texture_index; assert(texture_index < 0x4); unsigned derived_5 = 0; if ((sample == 0) && (update == 0)) derived_5 = 0; else if ((sample == 1) && (update == 1)) derived_5 = 1; else unreachable("No pattern match at pos 5"); return 0xca000 | (skip << 7) | (lod_mode << 9) | (varying_index << 0) | (texture_index << 3) | (derived_5 << 5); } static inline unsigned bi_pack_add_vn_asst2_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned scale = I->scale; assert(scale < 2); unsigned neg0 = I->src[0].neg; assert(neg0 < 2); if (scale == 0) { return 0x3df80 | (src0 << 0) | (neg0 << 3); } else if (scale == 1) { return 0x3de80 | (src0 << 0) | (neg0 << 4); } else { unreachable("No matching state found in add_vn_asst2_f32"); } } static inline unsigned bi_pack_add_vn_asst2_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned neg0 = I->src[0].neg; assert(neg0 < 2); return 0x3dfa0 | (src0 << 0) | (neg0 << 3); } static inline unsigned bi_pack_add_wmask(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned subgroup = I->subgroup; assert(subgroup < 4); unsigned fill = I->fill; assert(fill < 0x2); return 0x3d700 | (src0 << 0) | (subgroup << 4) | (fill << 3); } static inline unsigned bi_pack_add_zs_emit(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { unsigned stencil = I->stencil; assert(stencil < 2); unsigned z = I->z; assert(z < 2); unsigned derived_9 = 0; if ((stencil == 1) && (z == 0)) derived_9 = 1; else if ((stencil == 0) && (z == 1)) derived_9 = 2; else if ((stencil == 1) && (z == 1)) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xd7800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); } unsigned bi_pack_fma(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { if (!I) return bi_pack_fma_nop(I, src0, src1, src2, src3); assert((1 << src0) & 0xfb); assert((1 << src1) & 0xfb); switch (I->op) { case BI_OPCODE_ARSHIFT_I32: return bi_pack_fma_arshift_i32(I, src0, src1, src2, src3); case BI_OPCODE_ARSHIFT_V2I16: return bi_pack_fma_arshift_v2i16(I, src0, src1, src2, src3); case BI_OPCODE_ARSHIFT_V4I8: return bi_pack_fma_arshift_v4i8(I, src0, src1, src2, src3); case BI_OPCODE_ARSHIFT_DOUBLE_I32: return bi_pack_fma_arshift_double_i32(I, src0, src1, src2, src3); case BI_OPCODE_ATOM_C_I32: return bi_pack_fma_atom_c_i32(I, src0, src1, src2, src3); case BI_OPCODE_ATOM_C_I64: return bi_pack_fma_atom_c_i64(I, src0, src1, src2, src3); case BI_OPCODE_ATOM_C1_I32: return bi_pack_fma_atom_c1_i32(I, src0, src1, src2, src3); case BI_OPCODE_ATOM_C1_I64: return bi_pack_fma_atom_c1_i64(I, src0, src1, src2, src3); case BI_OPCODE_ATOM_C1_RETURN_I32: return bi_pack_fma_atom_c1_return_i32(I, src0, src1, src2, src3); case BI_OPCODE_ATOM_C1_RETURN_I64: return bi_pack_fma_atom_c1_return_i64(I, src0, src1, src2, src3); case BI_OPCODE_ATOM_C_RETURN_I32: return bi_pack_fma_atom_c_return_i32(I, src0, src1, src2, src3); case BI_OPCODE_ATOM_C_RETURN_I64: return bi_pack_fma_atom_c_return_i64(I, src0, src1, src2, src3); case BI_OPCODE_ATOM_POST_I32: return bi_pack_fma_atom_post_i32(I, src0, src1, src2, src3); case BI_OPCODE_ATOM_POST_I64: return bi_pack_fma_atom_post_i64(I, src0, src1, src2, src3); case BI_OPCODE_ATOM_PRE_I64: return bi_pack_fma_atom_pre_i64(I, src0, src1, src2, src3); case BI_OPCODE_BITREV_I32: return bi_pack_fma_bitrev_i32(I, src0, src1, src2, src3); case BI_OPCODE_CLZ_U32: return bi_pack_fma_clz_u32(I, src0, src1, src2, src3); case BI_OPCODE_CLZ_V2U16: return bi_pack_fma_clz_v2u16(I, src0, src1, src2, src3); case BI_OPCODE_CLZ_V4U8: return bi_pack_fma_clz_v4u8(I, src0, src1, src2, src3); case BI_OPCODE_CSEL_F32: return bi_pack_fma_csel_f32(I, src0, src1, src2, src3); case BI_OPCODE_CSEL_I32: return bi_pack_fma_csel_i32(I, src0, src1, src2, src3); case BI_OPCODE_CSEL_S32: return bi_pack_fma_csel_s32(I, src0, src1, src2, src3); case BI_OPCODE_CSEL_U32: return bi_pack_fma_csel_u32(I, src0, src1, src2, src3); case BI_OPCODE_CSEL_V2F16: return bi_pack_fma_csel_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_CSEL_V2I16: return bi_pack_fma_csel_v2i16(I, src0, src1, src2, src3); case BI_OPCODE_CSEL_V2S16: return bi_pack_fma_csel_v2s16(I, src0, src1, src2, src3); case BI_OPCODE_CSEL_V2U16: return bi_pack_fma_csel_v2u16(I, src0, src1, src2, src3); case BI_OPCODE_CUBEFACE1: return bi_pack_fma_cubeface1(I, src0, src1, src2, src3); case BI_OPCODE_DTSEL_IMM: return bi_pack_fma_dtsel_imm(I, src0, src1, src2, src3); case BI_OPCODE_F16_TO_F32: return bi_pack_fma_f16_to_f32(I, src0, src1, src2, src3); case BI_OPCODE_FADD_F32: return bi_pack_fma_fadd_f32(I, src0, src1, src2, src3); case BI_OPCODE_FADD_V2F16: return bi_pack_fma_fadd_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_FADD_LSCALE_F32: return bi_pack_fma_fadd_lscale_f32(I, src0, src1, src2, src3); case BI_OPCODE_FCMP_F32: return bi_pack_fma_fcmp_f32(I, src0, src1, src2, src3); case BI_OPCODE_FCMP_V2F16: return bi_pack_fma_fcmp_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_FLSHIFT_DOUBLE_I32: return bi_pack_fma_flshift_double_i32(I, src0, src1, src2, src3); case BI_OPCODE_FMA_F32: return bi_pack_fma_fma_f32(I, src0, src1, src2, src3); case BI_OPCODE_FMA_V2F16: return bi_pack_fma_fma_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_FMA_RSCALE_F32: return bi_pack_fma_fma_rscale_f32(I, src0, src1, src2, src3); case BI_OPCODE_FMA_RSCALE_V2F16: return bi_pack_fma_fma_rscale_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_FMUL_CSLICE: return bi_pack_fma_fmul_cslice(I, src0, src1, src2, src3); case BI_OPCODE_FMUL_SLICE_F32: return bi_pack_fma_fmul_slice_f32(I, src0, src1, src2, src3); case BI_OPCODE_FREXPE_F32: return bi_pack_fma_frexpe_f32(I, src0, src1, src2, src3); case BI_OPCODE_FREXPE_V2F16: return bi_pack_fma_frexpe_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_FREXPM_F32: return bi_pack_fma_frexpm_f32(I, src0, src1, src2, src3); case BI_OPCODE_FREXPM_V2F16: return bi_pack_fma_frexpm_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_FROUND_F32: return bi_pack_fma_fround_f32(I, src0, src1, src2, src3); case BI_OPCODE_FROUND_V2F16: return bi_pack_fma_fround_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_FRSHIFT_DOUBLE_I32: return bi_pack_fma_frshift_double_i32(I, src0, src1, src2, src3); case BI_OPCODE_IADDC_I32: return bi_pack_fma_iaddc_i32(I, src0, src1, src2, src3); case BI_OPCODE_IDP_V4I8: return bi_pack_fma_idp_v4i8(I, src0, src1, src2, src3); case BI_OPCODE_IMUL_I32: return bi_pack_fma_imul_i32(I, src0, src1, src2, src3); case BI_OPCODE_IMUL_V2I16: return bi_pack_fma_imul_v2i16(I, src0, src1, src2, src3); case BI_OPCODE_IMUL_V4I8: return bi_pack_fma_imul_v4i8(I, src0, src1, src2, src3); case BI_OPCODE_IMULD: return bi_pack_fma_imuld(I, src0, src1, src2, src3); case BI_OPCODE_ISUBB_I32: return bi_pack_fma_isubb_i32(I, src0, src1, src2, src3); case BI_OPCODE_JUMP_EX: return bi_pack_fma_jump_ex(I, src0, src1, src2, src3); case BI_OPCODE_LROT_DOUBLE_I32: return bi_pack_fma_lrot_double_i32(I, src0, src1, src2, src3); case BI_OPCODE_LSHIFT_AND_I32: return bi_pack_fma_lshift_and_i32(I, src0, src1, src2, src3); case BI_OPCODE_LSHIFT_AND_V2I16: return bi_pack_fma_lshift_and_v2i16(I, src0, src1, src2, src3); case BI_OPCODE_LSHIFT_AND_V4I8: return bi_pack_fma_lshift_and_v4i8(I, src0, src1, src2, src3); case BI_OPCODE_LSHIFT_DOUBLE_I32: return bi_pack_fma_lshift_double_i32(I, src0, src1, src2, src3); case BI_OPCODE_LSHIFT_OR_I32: return bi_pack_fma_lshift_or_i32(I, src0, src1, src2, src3); case BI_OPCODE_LSHIFT_OR_V2I16: return bi_pack_fma_lshift_or_v2i16(I, src0, src1, src2, src3); case BI_OPCODE_LSHIFT_OR_V4I8: return bi_pack_fma_lshift_or_v4i8(I, src0, src1, src2, src3); case BI_OPCODE_LSHIFT_XOR_I32: return bi_pack_fma_lshift_xor_i32(I, src0, src1, src2, src3); case BI_OPCODE_LSHIFT_XOR_V2I16: return bi_pack_fma_lshift_xor_v2i16(I, src0, src1, src2, src3); case BI_OPCODE_LSHIFT_XOR_V4I8: return bi_pack_fma_lshift_xor_v4i8(I, src0, src1, src2, src3); case BI_OPCODE_MKVEC_V2I16: return bi_pack_fma_mkvec_v2i16(I, src0, src1, src2, src3); case BI_OPCODE_MKVEC_V4I8: return bi_pack_fma_mkvec_v4i8(I, src0, src1, src2, src3); case BI_OPCODE_MOV_I32: return bi_pack_fma_mov_i32(I, src0, src1, src2, src3); case BI_OPCODE_NOP: return bi_pack_fma_nop(I, src0, src1, src2, src3); case BI_OPCODE_POPCOUNT_I32: return bi_pack_fma_popcount_i32(I, src0, src1, src2, src3); case BI_OPCODE_QUIET_F32: return bi_pack_fma_quiet_f32(I, src0, src1, src2, src3); case BI_OPCODE_QUIET_V2F16: return bi_pack_fma_quiet_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_RROT_DOUBLE_I32: return bi_pack_fma_rrot_double_i32(I, src0, src1, src2, src3); case BI_OPCODE_RSHIFT_AND_I32: return bi_pack_fma_rshift_and_i32(I, src0, src1, src2, src3); case BI_OPCODE_RSHIFT_AND_V2I16: return bi_pack_fma_rshift_and_v2i16(I, src0, src1, src2, src3); case BI_OPCODE_RSHIFT_AND_V4I8: return bi_pack_fma_rshift_and_v4i8(I, src0, src1, src2, src3); case BI_OPCODE_RSHIFT_DOUBLE_I32: return bi_pack_fma_rshift_double_i32(I, src0, src1, src2, src3); case BI_OPCODE_RSHIFT_OR_I32: return bi_pack_fma_rshift_or_i32(I, src0, src1, src2, src3); case BI_OPCODE_RSHIFT_OR_V2I16: return bi_pack_fma_rshift_or_v2i16(I, src0, src1, src2, src3); case BI_OPCODE_RSHIFT_OR_V4I8: return bi_pack_fma_rshift_or_v4i8(I, src0, src1, src2, src3); case BI_OPCODE_RSHIFT_XOR_I32: return bi_pack_fma_rshift_xor_i32(I, src0, src1, src2, src3); case BI_OPCODE_RSHIFT_XOR_V2I16: return bi_pack_fma_rshift_xor_v2i16(I, src0, src1, src2, src3); case BI_OPCODE_RSHIFT_XOR_V4I8: return bi_pack_fma_rshift_xor_v4i8(I, src0, src1, src2, src3); case BI_OPCODE_S16_TO_S32: return bi_pack_fma_s16_to_s32(I, src0, src1, src2, src3); case BI_OPCODE_S8_TO_S32: return bi_pack_fma_s8_to_s32(I, src0, src1, src2, src3); case BI_OPCODE_SEG_ADD: return bi_pack_fma_seg_add(I, src0, src1, src2, src3); case BI_OPCODE_SHADDXL_I64: return bi_pack_fma_shaddxl_i64(I, src0, src1, src2, src3); case BI_OPCODE_SHADDXL_S32: return bi_pack_fma_shaddxl_s32(I, src0, src1, src2, src3); case BI_OPCODE_SHADDXL_U32: return bi_pack_fma_shaddxl_u32(I, src0, src1, src2, src3); case BI_OPCODE_U16_TO_U32: return bi_pack_fma_u16_to_u32(I, src0, src1, src2, src3); case BI_OPCODE_U8_TO_U32: return bi_pack_fma_u8_to_u32(I, src0, src1, src2, src3); case BI_OPCODE_V2F32_TO_V2F16: return bi_pack_fma_v2f32_to_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_VN_ASST1_F16: return bi_pack_fma_vn_asst1_f16(I, src0, src1, src2, src3); case BI_OPCODE_VN_ASST1_F32: return bi_pack_fma_vn_asst1_f32(I, src0, src1, src2, src3); default: #ifndef NDEBUG bi_print_instr(I, stderr); #endif unreachable("Cannot pack instruction as *"); } } unsigned bi_pack_add(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) { if (!I) return bi_pack_add_nop(I, src0, src1, src2, src3); switch (I->op) { case BI_OPCODE_ACMPSTORE_I32: return bi_pack_add_acmpstore_i32(I, src0, src1, src2, src3); case BI_OPCODE_ACMPSTORE_I64: return bi_pack_add_acmpstore_i64(I, src0, src1, src2, src3); case BI_OPCODE_ACMPXCHG_I32: return bi_pack_add_acmpxchg_i32(I, src0, src1, src2, src3); case BI_OPCODE_ACMPXCHG_I64: return bi_pack_add_acmpxchg_i64(I, src0, src1, src2, src3); case BI_OPCODE_ATEST: return bi_pack_add_atest(I, src0, src1, src2, src3); case BI_OPCODE_ATOM_CX: return bi_pack_add_atom_cx(I, src0, src1, src2, src3); case BI_OPCODE_AXCHG_I32: return bi_pack_add_axchg_i32(I, src0, src1, src2, src3); case BI_OPCODE_AXCHG_I64: return bi_pack_add_axchg_i64(I, src0, src1, src2, src3); case BI_OPCODE_BARRIER: return bi_pack_add_barrier(I, src0, src1, src2, src3); case BI_OPCODE_BLEND: return bi_pack_add_blend(I, src0, src1, src2, src3); case BI_OPCODE_BRANCH_F16: return bi_pack_add_branch_f16(I, src0, src1, src2, src3); case BI_OPCODE_BRANCH_F32: return bi_pack_add_branch_f32(I, src0, src1, src2, src3); case BI_OPCODE_BRANCH_I16: return bi_pack_add_branch_i16(I, src0, src1, src2, src3); case BI_OPCODE_BRANCH_I32: return bi_pack_add_branch_i32(I, src0, src1, src2, src3); case BI_OPCODE_BRANCH_S16: return bi_pack_add_branch_s16(I, src0, src1, src2, src3); case BI_OPCODE_BRANCH_S32: return bi_pack_add_branch_s32(I, src0, src1, src2, src3); case BI_OPCODE_BRANCH_U16: return bi_pack_add_branch_u16(I, src0, src1, src2, src3); case BI_OPCODE_BRANCH_U32: return bi_pack_add_branch_u32(I, src0, src1, src2, src3); case BI_OPCODE_BRANCHC_I16: return bi_pack_add_branchc_i16(I, src0, src1, src2, src3); case BI_OPCODE_BRANCHC_I32: return bi_pack_add_branchc_i32(I, src0, src1, src2, src3); case BI_OPCODE_BRANCHZ_F16: return bi_pack_add_branchz_f16(I, src0, src1, src2, src3); case BI_OPCODE_BRANCHZ_F32: return bi_pack_add_branchz_f32(I, src0, src1, src2, src3); case BI_OPCODE_BRANCHZ_I16: return bi_pack_add_branchz_i16(I, src0, src1, src2, src3); case BI_OPCODE_BRANCHZ_I32: return bi_pack_add_branchz_i32(I, src0, src1, src2, src3); case BI_OPCODE_BRANCHZ_S16: return bi_pack_add_branchz_s16(I, src0, src1, src2, src3); case BI_OPCODE_BRANCHZ_S32: return bi_pack_add_branchz_s32(I, src0, src1, src2, src3); case BI_OPCODE_BRANCHZ_U16: return bi_pack_add_branchz_u16(I, src0, src1, src2, src3); case BI_OPCODE_BRANCHZ_U32: return bi_pack_add_branchz_u32(I, src0, src1, src2, src3); case BI_OPCODE_BRANCH_DIVERG: return bi_pack_add_branch_diverg(I, src0, src1, src2, src3); case BI_OPCODE_BRANCH_LOWBITS_F32: return bi_pack_add_branch_lowbits_f32(I, src0, src1, src2, src3); case BI_OPCODE_BRANCH_NO_DIVERG: return bi_pack_add_branch_no_diverg(I, src0, src1, src2, src3); case BI_OPCODE_CLPER_I32: return bi_pack_add_clper_i32(I, src0, src1, src2, src3); case BI_OPCODE_CLPER_V6_I32: return bi_pack_add_clper_v6_i32(I, src0, src1, src2, src3); case BI_OPCODE_CUBEFACE2: return bi_pack_add_cubeface2(I, src0, src1, src2, src3); case BI_OPCODE_CUBE_SSEL: return bi_pack_add_cube_ssel(I, src0, src1, src2, src3); case BI_OPCODE_CUBE_TSEL: return bi_pack_add_cube_tsel(I, src0, src1, src2, src3); case BI_OPCODE_DISCARD_F32: return bi_pack_add_discard_f32(I, src0, src1, src2, src3); case BI_OPCODE_F16_TO_F32: return bi_pack_add_f16_to_f32(I, src0, src1, src2, src3); case BI_OPCODE_F16_TO_S32: return bi_pack_add_f16_to_s32(I, src0, src1, src2, src3); case BI_OPCODE_F16_TO_U32: return bi_pack_add_f16_to_u32(I, src0, src1, src2, src3); case BI_OPCODE_F32_TO_S32: return bi_pack_add_f32_to_s32(I, src0, src1, src2, src3); case BI_OPCODE_F32_TO_U32: return bi_pack_add_f32_to_u32(I, src0, src1, src2, src3); case BI_OPCODE_FADD_F32: return bi_pack_add_fadd_f32(I, src0, src1, src2, src3); case BI_OPCODE_FADD_V2F16: return bi_pack_add_fadd_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_FADD_RSCALE_F32: return bi_pack_add_fadd_rscale_f32(I, src0, src1, src2, src3); case BI_OPCODE_FCMP_F32: return bi_pack_add_fcmp_f32(I, src0, src1, src2, src3); case BI_OPCODE_FCMP_V2F16: return bi_pack_add_fcmp_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_FCOS_TABLE_U6: return bi_pack_add_fcos_table_u6(I, src0, src1, src2, src3); case BI_OPCODE_FEXP_F32: return bi_pack_add_fexp_f32(I, src0, src1, src2, src3); case BI_OPCODE_FEXP_TABLE_U4: return bi_pack_add_fexp_table_u4(I, src0, src1, src2, src3); case BI_OPCODE_FLOGD_F32: return bi_pack_add_flogd_f32(I, src0, src1, src2, src3); case BI_OPCODE_FLOG_TABLE_F32: return bi_pack_add_flog_table_f32(I, src0, src1, src2, src3); case BI_OPCODE_FMAX_F32: return bi_pack_add_fmax_f32(I, src0, src1, src2, src3); case BI_OPCODE_FMAX_V2F16: return bi_pack_add_fmax_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_FMIN_F32: return bi_pack_add_fmin_f32(I, src0, src1, src2, src3); case BI_OPCODE_FMIN_V2F16: return bi_pack_add_fmin_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_FPCLASS_F16: return bi_pack_add_fpclass_f16(I, src0, src1, src2, src3); case BI_OPCODE_FPCLASS_F32: return bi_pack_add_fpclass_f32(I, src0, src1, src2, src3); case BI_OPCODE_FPOW_SC_APPLY: return bi_pack_add_fpow_sc_apply(I, src0, src1, src2, src3); case BI_OPCODE_FPOW_SC_DET_F16: return bi_pack_add_fpow_sc_det_f16(I, src0, src1, src2, src3); case BI_OPCODE_FPOW_SC_DET_F32: return bi_pack_add_fpow_sc_det_f32(I, src0, src1, src2, src3); case BI_OPCODE_FRCP_F16: return bi_pack_add_frcp_f16(I, src0, src1, src2, src3); case BI_OPCODE_FRCP_F32: return bi_pack_add_frcp_f32(I, src0, src1, src2, src3); case BI_OPCODE_FRCP_APPROX_F32: return bi_pack_add_frcp_approx_f32(I, src0, src1, src2, src3); case BI_OPCODE_FREXPE_F32: return bi_pack_add_frexpe_f32(I, src0, src1, src2, src3); case BI_OPCODE_FREXPE_V2F16: return bi_pack_add_frexpe_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_FREXPM_F32: return bi_pack_add_frexpm_f32(I, src0, src1, src2, src3); case BI_OPCODE_FREXPM_V2F16: return bi_pack_add_frexpm_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_FROUND_F32: return bi_pack_add_fround_f32(I, src0, src1, src2, src3); case BI_OPCODE_FROUND_V2F16: return bi_pack_add_fround_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_FRSQ_F16: return bi_pack_add_frsq_f16(I, src0, src1, src2, src3); case BI_OPCODE_FRSQ_F32: return bi_pack_add_frsq_f32(I, src0, src1, src2, src3); case BI_OPCODE_FRSQ_APPROX_F32: return bi_pack_add_frsq_approx_f32(I, src0, src1, src2, src3); case BI_OPCODE_FSINCOS_OFFSET_U6: return bi_pack_add_fsincos_offset_u6(I, src0, src1, src2, src3); case BI_OPCODE_FSIN_TABLE_U6: return bi_pack_add_fsin_table_u6(I, src0, src1, src2, src3); case BI_OPCODE_HADD_S32: return bi_pack_add_hadd_s32(I, src0, src1, src2, src3); case BI_OPCODE_HADD_U32: return bi_pack_add_hadd_u32(I, src0, src1, src2, src3); case BI_OPCODE_HADD_V2S16: return bi_pack_add_hadd_v2s16(I, src0, src1, src2, src3); case BI_OPCODE_HADD_V2U16: return bi_pack_add_hadd_v2u16(I, src0, src1, src2, src3); case BI_OPCODE_HADD_V4S8: return bi_pack_add_hadd_v4s8(I, src0, src1, src2, src3); case BI_OPCODE_HADD_V4U8: return bi_pack_add_hadd_v4u8(I, src0, src1, src2, src3); case BI_OPCODE_IABS_S32: return bi_pack_add_iabs_s32(I, src0, src1, src2, src3); case BI_OPCODE_IABS_V2S16: return bi_pack_add_iabs_v2s16(I, src0, src1, src2, src3); case BI_OPCODE_IABS_V4S8: return bi_pack_add_iabs_v4s8(I, src0, src1, src2, src3); case BI_OPCODE_IADD_S32: return bi_pack_add_iadd_s32(I, src0, src1, src2, src3); case BI_OPCODE_IADD_U32: return bi_pack_add_iadd_u32(I, src0, src1, src2, src3); case BI_OPCODE_IADD_V2S16: return bi_pack_add_iadd_v2s16(I, src0, src1, src2, src3); case BI_OPCODE_IADD_V2U16: return bi_pack_add_iadd_v2u16(I, src0, src1, src2, src3); case BI_OPCODE_IADD_V4S8: return bi_pack_add_iadd_v4s8(I, src0, src1, src2, src3); case BI_OPCODE_IADD_V4U8: return bi_pack_add_iadd_v4u8(I, src0, src1, src2, src3); case BI_OPCODE_ICMP_I32: return bi_pack_add_icmp_i32(I, src0, src1, src2, src3); case BI_OPCODE_ICMP_S32: return bi_pack_add_icmp_s32(I, src0, src1, src2, src3); case BI_OPCODE_ICMP_U32: return bi_pack_add_icmp_u32(I, src0, src1, src2, src3); case BI_OPCODE_ICMP_V2I16: return bi_pack_add_icmp_v2i16(I, src0, src1, src2, src3); case BI_OPCODE_ICMP_V2S16: return bi_pack_add_icmp_v2s16(I, src0, src1, src2, src3); case BI_OPCODE_ICMP_V2U16: return bi_pack_add_icmp_v2u16(I, src0, src1, src2, src3); case BI_OPCODE_ICMP_V4I8: return bi_pack_add_icmp_v4i8(I, src0, src1, src2, src3); case BI_OPCODE_ICMP_V4S8: return bi_pack_add_icmp_v4s8(I, src0, src1, src2, src3); case BI_OPCODE_ICMP_V4U8: return bi_pack_add_icmp_v4u8(I, src0, src1, src2, src3); case BI_OPCODE_ICMPF_I32: return bi_pack_add_icmpf_i32(I, src0, src1, src2, src3); case BI_OPCODE_ICMPI_I32: return bi_pack_add_icmpi_i32(I, src0, src1, src2, src3); case BI_OPCODE_ICMPI_S32: return bi_pack_add_icmpi_s32(I, src0, src1, src2, src3); case BI_OPCODE_ICMPI_U32: return bi_pack_add_icmpi_u32(I, src0, src1, src2, src3); case BI_OPCODE_ICMPM_I32: return bi_pack_add_icmpm_i32(I, src0, src1, src2, src3); case BI_OPCODE_ILOGB_F32: return bi_pack_add_ilogb_f32(I, src0, src1, src2, src3); case BI_OPCODE_ILOGB_V2F16: return bi_pack_add_ilogb_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_IMOV_FMA: return bi_pack_add_imov_fma(I, src0, src1, src2, src3); case BI_OPCODE_ISUB_S32: return bi_pack_add_isub_s32(I, src0, src1, src2, src3); case BI_OPCODE_ISUB_U32: return bi_pack_add_isub_u32(I, src0, src1, src2, src3); case BI_OPCODE_ISUB_V2S16: return bi_pack_add_isub_v2s16(I, src0, src1, src2, src3); case BI_OPCODE_ISUB_V2U16: return bi_pack_add_isub_v2u16(I, src0, src1, src2, src3); case BI_OPCODE_ISUB_V4S8: return bi_pack_add_isub_v4s8(I, src0, src1, src2, src3); case BI_OPCODE_ISUB_V4U8: return bi_pack_add_isub_v4u8(I, src0, src1, src2, src3); case BI_OPCODE_JUMP: return bi_pack_add_jump(I, src0, src1, src2, src3); case BI_OPCODE_LDEXP_F32: return bi_pack_add_ldexp_f32(I, src0, src1, src2, src3); case BI_OPCODE_LDEXP_V2F16: return bi_pack_add_ldexp_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_LD_ATTR: return bi_pack_add_ld_attr(I, src0, src1, src2, src3); case BI_OPCODE_LD_ATTR_IMM: return bi_pack_add_ld_attr_imm(I, src0, src1, src2, src3); case BI_OPCODE_LD_ATTR_TEX: return bi_pack_add_ld_attr_tex(I, src0, src1, src2, src3); case BI_OPCODE_LD_CVT: return bi_pack_add_ld_cvt(I, src0, src1, src2, src3); case BI_OPCODE_LD_GCLK_U64: return bi_pack_add_ld_gclk_u64(I, src0, src1, src2, src3); case BI_OPCODE_LD_TILE: return bi_pack_add_ld_tile(I, src0, src1, src2, src3); case BI_OPCODE_LD_VAR: return bi_pack_add_ld_var(I, src0, src1, src2, src3); case BI_OPCODE_LD_VAR_FLAT: return bi_pack_add_ld_var_flat(I, src0, src1, src2, src3); case BI_OPCODE_LD_VAR_FLAT_IMM: return bi_pack_add_ld_var_flat_imm(I, src0, src1, src2, src3); case BI_OPCODE_LD_VAR_IMM: return bi_pack_add_ld_var_imm(I, src0, src1, src2, src3); case BI_OPCODE_LD_VAR_SPECIAL: return bi_pack_add_ld_var_special(I, src0, src1, src2, src3); case BI_OPCODE_LEA_ATTR: return bi_pack_add_lea_attr(I, src0, src1, src2, src3); case BI_OPCODE_LEA_ATTR_IMM: return bi_pack_add_lea_attr_imm(I, src0, src1, src2, src3); case BI_OPCODE_LEA_ATTR_TEX: return bi_pack_add_lea_attr_tex(I, src0, src1, src2, src3); case BI_OPCODE_LEA_TEX: return bi_pack_add_lea_tex(I, src0, src1, src2, src3); case BI_OPCODE_LEA_TEX_IMM: return bi_pack_add_lea_tex_imm(I, src0, src1, src2, src3); case BI_OPCODE_LOAD_I128: return bi_pack_add_load_i128(I, src0, src1, src2, src3); case BI_OPCODE_LOAD_I16: return bi_pack_add_load_i16(I, src0, src1, src2, src3); case BI_OPCODE_LOAD_I24: return bi_pack_add_load_i24(I, src0, src1, src2, src3); case BI_OPCODE_LOAD_I32: return bi_pack_add_load_i32(I, src0, src1, src2, src3); case BI_OPCODE_LOAD_I48: return bi_pack_add_load_i48(I, src0, src1, src2, src3); case BI_OPCODE_LOAD_I64: return bi_pack_add_load_i64(I, src0, src1, src2, src3); case BI_OPCODE_LOAD_I8: return bi_pack_add_load_i8(I, src0, src1, src2, src3); case BI_OPCODE_LOAD_I96: return bi_pack_add_load_i96(I, src0, src1, src2, src3); case BI_OPCODE_LOGB_F32: return bi_pack_add_logb_f32(I, src0, src1, src2, src3); case BI_OPCODE_LOGB_V2F16: return bi_pack_add_logb_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_MKVEC_V2I16: return bi_pack_add_mkvec_v2i16(I, src0, src1, src2, src3); case BI_OPCODE_MOV_I32: return bi_pack_add_mov_i32(I, src0, src1, src2, src3); case BI_OPCODE_MUX_I32: return bi_pack_add_mux_i32(I, src0, src1, src2, src3); case BI_OPCODE_MUX_V2I16: return bi_pack_add_mux_v2i16(I, src0, src1, src2, src3); case BI_OPCODE_MUX_V4I8: return bi_pack_add_mux_v4i8(I, src0, src1, src2, src3); case BI_OPCODE_NOP: return bi_pack_add_nop(I, src0, src1, src2, src3); case BI_OPCODE_QUIET_F32: return bi_pack_add_quiet_f32(I, src0, src1, src2, src3); case BI_OPCODE_QUIET_V2F16: return bi_pack_add_quiet_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_S16_TO_F32: return bi_pack_add_s16_to_f32(I, src0, src1, src2, src3); case BI_OPCODE_S16_TO_S32: return bi_pack_add_s16_to_s32(I, src0, src1, src2, src3); case BI_OPCODE_S32_TO_F32: return bi_pack_add_s32_to_f32(I, src0, src1, src2, src3); case BI_OPCODE_S8_TO_F32: return bi_pack_add_s8_to_f32(I, src0, src1, src2, src3); case BI_OPCODE_S8_TO_S32: return bi_pack_add_s8_to_s32(I, src0, src1, src2, src3); case BI_OPCODE_SEG_ADD: return bi_pack_add_seg_add(I, src0, src1, src2, src3); case BI_OPCODE_SHADDXH_I32: return bi_pack_add_shaddxh_i32(I, src0, src1, src2, src3); case BI_OPCODE_SHIFT_DOUBLE_I32: return bi_pack_add_shift_double_i32(I, src0, src1, src2, src3); case BI_OPCODE_STORE_I128: return bi_pack_add_store_i128(I, src0, src1, src2, src3); case BI_OPCODE_STORE_I16: return bi_pack_add_store_i16(I, src0, src1, src2, src3); case BI_OPCODE_STORE_I24: return bi_pack_add_store_i24(I, src0, src1, src2, src3); case BI_OPCODE_STORE_I32: return bi_pack_add_store_i32(I, src0, src1, src2, src3); case BI_OPCODE_STORE_I48: return bi_pack_add_store_i48(I, src0, src1, src2, src3); case BI_OPCODE_STORE_I64: return bi_pack_add_store_i64(I, src0, src1, src2, src3); case BI_OPCODE_STORE_I8: return bi_pack_add_store_i8(I, src0, src1, src2, src3); case BI_OPCODE_STORE_I96: return bi_pack_add_store_i96(I, src0, src1, src2, src3); case BI_OPCODE_ST_CVT: return bi_pack_add_st_cvt(I, src0, src1, src2, src3); case BI_OPCODE_ST_TILE: return bi_pack_add_st_tile(I, src0, src1, src2, src3); case BI_OPCODE_SWZ_V2I16: return bi_pack_add_swz_v2i16(I, src0, src1, src2, src3); case BI_OPCODE_SWZ_V4I8: return bi_pack_add_swz_v4i8(I, src0, src1, src2, src3); case BI_OPCODE_TEXC: return bi_pack_add_texc(I, src0, src1, src2, src3); case BI_OPCODE_TEXS_2D_F16: return bi_pack_add_texs_2d_f16(I, src0, src1, src2, src3); case BI_OPCODE_TEXS_2D_F32: return bi_pack_add_texs_2d_f32(I, src0, src1, src2, src3); case BI_OPCODE_TEXS_CUBE_F16: return bi_pack_add_texs_cube_f16(I, src0, src1, src2, src3); case BI_OPCODE_TEXS_CUBE_F32: return bi_pack_add_texs_cube_f32(I, src0, src1, src2, src3); case BI_OPCODE_U16_TO_F32: return bi_pack_add_u16_to_f32(I, src0, src1, src2, src3); case BI_OPCODE_U16_TO_U32: return bi_pack_add_u16_to_u32(I, src0, src1, src2, src3); case BI_OPCODE_U32_TO_F32: return bi_pack_add_u32_to_f32(I, src0, src1, src2, src3); case BI_OPCODE_U8_TO_F32: return bi_pack_add_u8_to_f32(I, src0, src1, src2, src3); case BI_OPCODE_U8_TO_U32: return bi_pack_add_u8_to_u32(I, src0, src1, src2, src3); case BI_OPCODE_V2F16_TO_V2S16: return bi_pack_add_v2f16_to_v2s16(I, src0, src1, src2, src3); case BI_OPCODE_V2F16_TO_V2U16: return bi_pack_add_v2f16_to_v2u16(I, src0, src1, src2, src3); case BI_OPCODE_V2F32_TO_V2F16: return bi_pack_add_v2f32_to_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_V2S16_TO_V2F16: return bi_pack_add_v2s16_to_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_V2S8_TO_V2F16: return bi_pack_add_v2s8_to_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_V2S8_TO_V2S16: return bi_pack_add_v2s8_to_v2s16(I, src0, src1, src2, src3); case BI_OPCODE_V2U16_TO_V2F16: return bi_pack_add_v2u16_to_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_V2U8_TO_V2F16: return bi_pack_add_v2u8_to_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_V2U8_TO_V2U16: return bi_pack_add_v2u8_to_v2u16(I, src0, src1, src2, src3); case BI_OPCODE_VAR_TEX_F16: return bi_pack_add_var_tex_f16(I, src0, src1, src2, src3); case BI_OPCODE_VAR_TEX_F32: return bi_pack_add_var_tex_f32(I, src0, src1, src2, src3); case BI_OPCODE_VN_ASST2_F32: return bi_pack_add_vn_asst2_f32(I, src0, src1, src2, src3); case BI_OPCODE_VN_ASST2_V2F16: return bi_pack_add_vn_asst2_v2f16(I, src0, src1, src2, src3); case BI_OPCODE_WMASK: return bi_pack_add_wmask(I, src0, src1, src2, src3); case BI_OPCODE_ZS_EMIT: return bi_pack_add_zs_emit(I, src0, src1, src2, src3); default: #ifndef NDEBUG bi_print_instr(I, stderr); #endif unreachable("Cannot pack instruction as +"); } }