/* * Copyright (C) 2014 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * Authors: * Jason Ekstrand (jason@jlekstrand.net) */ #include #include "util/rounding.h" /* for _mesa_roundeven */ #include "util/half_float.h" #include "util/double.h" #include "util/softfloat.h" #include "util/bigmath.h" #include "nir_constant_expressions.h" /** * rief Checks if the provided value is a denorm and flushes it to zero. */ static void constant_denorm_flush_to_zero(nir_const_value *value, unsigned bit_size) { switch(bit_size) { case 64: if (0 == (value->u64 & 0x7ff0000000000000)) value->u64 &= 0x8000000000000000; break; case 32: if (0 == (value->u32 & 0x7f800000)) value->u32 &= 0x80000000; break; case 16: if (0 == (value->u16 & 0x7c00)) value->u16 &= 0x8000; } } /** * Evaluate one component of packSnorm4x8. */ static uint8_t pack_snorm_1x8(float x) { /* From section 8.4 of the GLSL 4.30 spec: * * packSnorm4x8 * ------------ * The conversion for component c of v to fixed point is done as * follows: * * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) * * We must first cast the float to an int, because casting a negative * float to a uint is undefined. */ return (uint8_t) (int) _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f); } /** * Evaluate one component of packSnorm2x16. */ static uint16_t pack_snorm_1x16(float x) { /* From section 8.4 of the GLSL ES 3.00 spec: * * packSnorm2x16 * ------------- * The conversion for component c of v to fixed point is done as * follows: * * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) * * We must first cast the float to an int, because casting a negative * float to a uint is undefined. */ return (uint16_t) (int) _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f); } /** * Evaluate one component of unpackSnorm4x8. */ static float unpack_snorm_1x8(uint8_t u) { /* From section 8.4 of the GLSL 4.30 spec: * * unpackSnorm4x8 * -------------- * The conversion for unpacked fixed-point value f to floating point is * done as follows: * * unpackSnorm4x8: clamp(f / 127.0, -1, +1) */ return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); } /** * Evaluate one component of unpackSnorm2x16. */ static float unpack_snorm_1x16(uint16_t u) { /* From section 8.4 of the GLSL ES 3.00 spec: * * unpackSnorm2x16 * --------------- * The conversion for unpacked fixed-point value f to floating point is * done as follows: * * unpackSnorm2x16: clamp(f / 32767.0, -1, +1) */ return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); } /** * Evaluate one component packUnorm4x8. */ static uint8_t pack_unorm_1x8(float x) { /* From section 8.4 of the GLSL 4.30 spec: * * packUnorm4x8 * ------------ * The conversion for component c of v to fixed point is done as * follows: * * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) */ return (uint8_t) (int) _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f); } /** * Evaluate one component packUnorm2x16. */ static uint16_t pack_unorm_1x16(float x) { /* From section 8.4 of the GLSL ES 3.00 spec: * * packUnorm2x16 * ------------- * The conversion for component c of v to fixed point is done as * follows: * * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) */ return (uint16_t) (int) _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f); } /** * Evaluate one component of unpackUnorm4x8. */ static float unpack_unorm_1x8(uint8_t u) { /* From section 8.4 of the GLSL 4.30 spec: * * unpackUnorm4x8 * -------------- * The conversion for unpacked fixed-point value f to floating point is * done as follows: * * unpackUnorm4x8: f / 255.0 */ return (float) u / 255.0f; } /** * Evaluate one component of unpackUnorm2x16. */ static float unpack_unorm_1x16(uint16_t u) { /* From section 8.4 of the GLSL ES 3.00 spec: * * unpackUnorm2x16 * --------------- * The conversion for unpacked fixed-point value f to floating point is * done as follows: * * unpackUnorm2x16: f / 65535.0 */ return (float) u / 65535.0f; } /** * Evaluate one component of packHalf2x16. */ static uint16_t pack_half_1x16(float x) { return _mesa_float_to_half(x); } /** * Evaluate one component of unpackHalf2x16. */ static float unpack_half_1x16_flush_to_zero(uint16_t u) { if (0 == (u & 0x7c00)) u &= 0x8000; return _mesa_half_to_float(u); } /** * Evaluate one component of unpackHalf2x16. */ static float unpack_half_1x16(uint16_t u) { return _mesa_half_to_float(u); } /* Some typed vector structures to make things like src0.y work */ typedef int8_t int1_t; typedef uint8_t uint1_t; typedef float float16_t; typedef float float32_t; typedef double float64_t; typedef bool bool1_t; typedef bool bool8_t; typedef bool bool16_t; typedef bool bool32_t; typedef bool bool64_t; struct float16_vec { float16_t x; float16_t y; float16_t z; float16_t w; float16_t e; float16_t f; float16_t g; float16_t h; float16_t i; float16_t j; float16_t k; float16_t l; float16_t m; float16_t n; float16_t o; float16_t p; }; struct float32_vec { float32_t x; float32_t y; float32_t z; float32_t w; float32_t e; float32_t f; float32_t g; float32_t h; float32_t i; float32_t j; float32_t k; float32_t l; float32_t m; float32_t n; float32_t o; float32_t p; }; struct float64_vec { float64_t x; float64_t y; float64_t z; float64_t w; float64_t e; float64_t f; float64_t g; float64_t h; float64_t i; float64_t j; float64_t k; float64_t l; float64_t m; float64_t n; float64_t o; float64_t p; }; struct int1_vec { int1_t x; int1_t y; int1_t z; int1_t w; int1_t e; int1_t f; int1_t g; int1_t h; int1_t i; int1_t j; int1_t k; int1_t l; int1_t m; int1_t n; int1_t o; int1_t p; }; struct int8_vec { int8_t x; int8_t y; int8_t z; int8_t w; int8_t e; int8_t f; int8_t g; int8_t h; int8_t i; int8_t j; int8_t k; int8_t l; int8_t m; int8_t n; int8_t o; int8_t p; }; struct int16_vec { int16_t x; int16_t y; int16_t z; int16_t w; int16_t e; int16_t f; int16_t g; int16_t h; int16_t i; int16_t j; int16_t k; int16_t l; int16_t m; int16_t n; int16_t o; int16_t p; }; struct int32_vec { int32_t x; int32_t y; int32_t z; int32_t w; int32_t e; int32_t f; int32_t g; int32_t h; int32_t i; int32_t j; int32_t k; int32_t l; int32_t m; int32_t n; int32_t o; int32_t p; }; struct int64_vec { int64_t x; int64_t y; int64_t z; int64_t w; int64_t e; int64_t f; int64_t g; int64_t h; int64_t i; int64_t j; int64_t k; int64_t l; int64_t m; int64_t n; int64_t o; int64_t p; }; struct uint1_vec { uint1_t x; uint1_t y; uint1_t z; uint1_t w; uint1_t e; uint1_t f; uint1_t g; uint1_t h; uint1_t i; uint1_t j; uint1_t k; uint1_t l; uint1_t m; uint1_t n; uint1_t o; uint1_t p; }; struct uint8_vec { uint8_t x; uint8_t y; uint8_t z; uint8_t w; uint8_t e; uint8_t f; uint8_t g; uint8_t h; uint8_t i; uint8_t j; uint8_t k; uint8_t l; uint8_t m; uint8_t n; uint8_t o; uint8_t p; }; struct uint16_vec { uint16_t x; uint16_t y; uint16_t z; uint16_t w; uint16_t e; uint16_t f; uint16_t g; uint16_t h; uint16_t i; uint16_t j; uint16_t k; uint16_t l; uint16_t m; uint16_t n; uint16_t o; uint16_t p; }; struct uint32_vec { uint32_t x; uint32_t y; uint32_t z; uint32_t w; uint32_t e; uint32_t f; uint32_t g; uint32_t h; uint32_t i; uint32_t j; uint32_t k; uint32_t l; uint32_t m; uint32_t n; uint32_t o; uint32_t p; }; struct uint64_vec { uint64_t x; uint64_t y; uint64_t z; uint64_t w; uint64_t e; uint64_t f; uint64_t g; uint64_t h; uint64_t i; uint64_t j; uint64_t k; uint64_t l; uint64_t m; uint64_t n; uint64_t o; uint64_t p; }; struct bool1_vec { bool1_t x; bool1_t y; bool1_t z; bool1_t w; bool1_t e; bool1_t f; bool1_t g; bool1_t h; bool1_t i; bool1_t j; bool1_t k; bool1_t l; bool1_t m; bool1_t n; bool1_t o; bool1_t p; }; struct bool8_vec { bool8_t x; bool8_t y; bool8_t z; bool8_t w; bool8_t e; bool8_t f; bool8_t g; bool8_t h; bool8_t i; bool8_t j; bool8_t k; bool8_t l; bool8_t m; bool8_t n; bool8_t o; bool8_t p; }; struct bool16_vec { bool16_t x; bool16_t y; bool16_t z; bool16_t w; bool16_t e; bool16_t f; bool16_t g; bool16_t h; bool16_t i; bool16_t j; bool16_t k; bool16_t l; bool16_t m; bool16_t n; bool16_t o; bool16_t p; }; struct bool32_vec { bool32_t x; bool32_t y; bool32_t z; bool32_t w; bool32_t e; bool32_t f; bool32_t g; bool32_t h; bool32_t i; bool32_t j; bool32_t k; bool32_t l; bool32_t m; bool32_t n; bool32_t o; bool32_t p; }; static void evaluate_amul(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; int1_t dst = src0 * src1; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; int8_t dst = src0 * src1; _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; int16_t dst = src0 * src1; _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = src0 * src1; _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; int64_t dst = src0 * src1; _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_b16all_fequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), _mesa_half_to_float(_src[0][8].u16), _mesa_half_to_float(_src[0][9].u16), _mesa_half_to_float(_src[0][10].u16), _mesa_half_to_float(_src[0][11].u16), _mesa_half_to_float(_src[0][12].u16), _mesa_half_to_float(_src[0][13].u16), _mesa_half_to_float(_src[0][14].u16), _mesa_half_to_float(_src[0][15].u16), }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), _mesa_half_to_float(_src[1][8].u16), _mesa_half_to_float(_src[1][9].u16), _mesa_half_to_float(_src[1][10].u16), _mesa_half_to_float(_src[1][11].u16), _mesa_half_to_float(_src[1][12].u16), _mesa_half_to_float(_src[1][13].u16), _mesa_half_to_float(_src[1][14].u16), _mesa_half_to_float(_src[1][15].u16), }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, _src[0][8].f32, _src[0][9].f32, _src[0][10].f32, _src[0][11].f32, _src[0][12].f32, _src[0][13].f32, _src[0][14].f32, _src[0][15].f32, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, _src[1][8].f32, _src[1][9].f32, _src[1][10].f32, _src[1][11].f32, _src[1][12].f32, _src[1][13].f32, _src[1][14].f32, _src[1][15].f32, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, _src[0][8].f64, _src[0][9].f64, _src[0][10].f64, _src[0][11].f64, _src[0][12].f64, _src[0][13].f64, _src[0][14].f64, _src[0][15].f64, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, _src[1][8].f64, _src[1][9].f64, _src[1][10].f64, _src[1][11].f64, _src[1][12].f64, _src[1][13].f64, _src[1][14].f64, _src[1][15].f64, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16all_fequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16all_fequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16all_fequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16all_fequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16all_fequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16all_iequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][7].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][8].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][9].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][10].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][11].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][12].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][13].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][14].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][15].b, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][7].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][8].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][9].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][10].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][11].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][12].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][13].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][14].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][15].b, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, _src[0][5].i8, _src[0][6].i8, _src[0][7].i8, _src[0][8].i8, _src[0][9].i8, _src[0][10].i8, _src[0][11].i8, _src[0][12].i8, _src[0][13].i8, _src[0][14].i8, _src[0][15].i8, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, _src[1][5].i8, _src[1][6].i8, _src[1][7].i8, _src[1][8].i8, _src[1][9].i8, _src[1][10].i8, _src[1][11].i8, _src[1][12].i8, _src[1][13].i8, _src[1][14].i8, _src[1][15].i8, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, _src[0][5].i16, _src[0][6].i16, _src[0][7].i16, _src[0][8].i16, _src[0][9].i16, _src[0][10].i16, _src[0][11].i16, _src[0][12].i16, _src[0][13].i16, _src[0][14].i16, _src[0][15].i16, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, _src[1][5].i16, _src[1][6].i16, _src[1][7].i16, _src[1][8].i16, _src[1][9].i16, _src[1][10].i16, _src[1][11].i16, _src[1][12].i16, _src[1][13].i16, _src[1][14].i16, _src[1][15].i16, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, _src[0][5].i32, _src[0][6].i32, _src[0][7].i32, _src[0][8].i32, _src[0][9].i32, _src[0][10].i32, _src[0][11].i32, _src[0][12].i32, _src[0][13].i32, _src[0][14].i32, _src[0][15].i32, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, _src[1][5].i32, _src[1][6].i32, _src[1][7].i32, _src[1][8].i32, _src[1][9].i32, _src[1][10].i32, _src[1][11].i32, _src[1][12].i32, _src[1][13].i32, _src[1][14].i32, _src[1][15].i32, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, _src[0][5].i64, _src[0][6].i64, _src[0][7].i64, _src[0][8].i64, _src[0][9].i64, _src[0][10].i64, _src[0][11].i64, _src[0][12].i64, _src[0][13].i64, _src[0][14].i64, _src[0][15].i64, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, _src[1][5].i64, _src[1][6].i64, _src[1][7].i64, _src[1][8].i64, _src[1][9].i64, _src[1][10].i64, _src[1][11].i64, _src[1][12].i64, _src[1][13].i64, _src[1][14].i64, _src[1][15].i64, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16all_iequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16all_iequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16all_iequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16all_iequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16all_iequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][7].b, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][7].b, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, _src[0][5].i8, _src[0][6].i8, _src[0][7].i8, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, _src[1][5].i8, _src[1][6].i8, _src[1][7].i8, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, _src[0][5].i16, _src[0][6].i16, _src[0][7].i16, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, _src[1][5].i16, _src[1][6].i16, _src[1][7].i16, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, _src[0][5].i32, _src[0][6].i32, _src[0][7].i32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, _src[1][5].i32, _src[1][6].i32, _src[1][7].i32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, _src[0][5].i64, _src[0][6].i64, _src[0][7].i64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, _src[1][5].i64, _src[1][6].i64, _src[1][7].i64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16any_fnequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), _mesa_half_to_float(_src[0][8].u16), _mesa_half_to_float(_src[0][9].u16), _mesa_half_to_float(_src[0][10].u16), _mesa_half_to_float(_src[0][11].u16), _mesa_half_to_float(_src[0][12].u16), _mesa_half_to_float(_src[0][13].u16), _mesa_half_to_float(_src[0][14].u16), _mesa_half_to_float(_src[0][15].u16), }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), _mesa_half_to_float(_src[1][8].u16), _mesa_half_to_float(_src[1][9].u16), _mesa_half_to_float(_src[1][10].u16), _mesa_half_to_float(_src[1][11].u16), _mesa_half_to_float(_src[1][12].u16), _mesa_half_to_float(_src[1][13].u16), _mesa_half_to_float(_src[1][14].u16), _mesa_half_to_float(_src[1][15].u16), }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, _src[0][8].f32, _src[0][9].f32, _src[0][10].f32, _src[0][11].f32, _src[0][12].f32, _src[0][13].f32, _src[0][14].f32, _src[0][15].f32, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, _src[1][8].f32, _src[1][9].f32, _src[1][10].f32, _src[1][11].f32, _src[1][12].f32, _src[1][13].f32, _src[1][14].f32, _src[1][15].f32, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, _src[0][8].f64, _src[0][9].f64, _src[0][10].f64, _src[0][11].f64, _src[0][12].f64, _src[0][13].f64, _src[0][14].f64, _src[0][15].f64, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, _src[1][8].f64, _src[1][9].f64, _src[1][10].f64, _src[1][11].f64, _src[1][12].f64, _src[1][13].f64, _src[1][14].f64, _src[1][15].f64, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16any_fnequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16any_fnequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16any_fnequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16any_fnequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16any_fnequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16any_inequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][7].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][8].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][9].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][10].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][11].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][12].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][13].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][14].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][15].b, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][7].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][8].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][9].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][10].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][11].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][12].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][13].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][14].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][15].b, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, _src[0][5].i8, _src[0][6].i8, _src[0][7].i8, _src[0][8].i8, _src[0][9].i8, _src[0][10].i8, _src[0][11].i8, _src[0][12].i8, _src[0][13].i8, _src[0][14].i8, _src[0][15].i8, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, _src[1][5].i8, _src[1][6].i8, _src[1][7].i8, _src[1][8].i8, _src[1][9].i8, _src[1][10].i8, _src[1][11].i8, _src[1][12].i8, _src[1][13].i8, _src[1][14].i8, _src[1][15].i8, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, _src[0][5].i16, _src[0][6].i16, _src[0][7].i16, _src[0][8].i16, _src[0][9].i16, _src[0][10].i16, _src[0][11].i16, _src[0][12].i16, _src[0][13].i16, _src[0][14].i16, _src[0][15].i16, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, _src[1][5].i16, _src[1][6].i16, _src[1][7].i16, _src[1][8].i16, _src[1][9].i16, _src[1][10].i16, _src[1][11].i16, _src[1][12].i16, _src[1][13].i16, _src[1][14].i16, _src[1][15].i16, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, _src[0][5].i32, _src[0][6].i32, _src[0][7].i32, _src[0][8].i32, _src[0][9].i32, _src[0][10].i32, _src[0][11].i32, _src[0][12].i32, _src[0][13].i32, _src[0][14].i32, _src[0][15].i32, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, _src[1][5].i32, _src[1][6].i32, _src[1][7].i32, _src[1][8].i32, _src[1][9].i32, _src[1][10].i32, _src[1][11].i32, _src[1][12].i32, _src[1][13].i32, _src[1][14].i32, _src[1][15].i32, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, _src[0][5].i64, _src[0][6].i64, _src[0][7].i64, _src[0][8].i64, _src[0][9].i64, _src[0][10].i64, _src[0][11].i64, _src[0][12].i64, _src[0][13].i64, _src[0][14].i64, _src[0][15].i64, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, _src[1][5].i64, _src[1][6].i64, _src[1][7].i64, _src[1][8].i64, _src[1][9].i64, _src[1][10].i64, _src[1][11].i64, _src[1][12].i64, _src[1][13].i64, _src[1][14].i64, _src[1][15].i64, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16any_inequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16any_inequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16any_inequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16any_inequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16any_inequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][7].b, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][7].b, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, _src[0][5].i8, _src[0][6].i8, _src[0][7].i8, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, _src[1][5].i8, _src[1][6].i8, _src[1][7].i8, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, _src[0][5].i16, _src[0][6].i16, _src[0][7].i16, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, _src[1][5].i16, _src[1][6].i16, _src[1][7].i16, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, _src[0][5].i32, _src[0][6].i32, _src[0][7].i32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, _src[1][5].i32, _src[1][6].i32, _src[1][7].i32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, _src[0][5].i64, _src[0][6].i64, _src[0][7].i64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, _src[1][5].i64, _src[1][6].i64, _src[1][7].i64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i16 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b16csel(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; const uint1_t src1 = _src[1][_i].b; const uint1_t src2 = _src[2][_i].b; uint1_t dst = src0 ? src1 : src2; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; const uint8_t src1 = _src[1][_i].u8; const uint8_t src2 = _src[2][_i].u8; uint8_t dst = src0 ? src1 : src2; _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; const uint16_t src1 = _src[1][_i].u16; const uint16_t src2 = _src[2][_i].u16; uint16_t dst = src0 ? src1 : src2; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; const uint32_t src1 = _src[1][_i].u32; const uint32_t src2 = _src[2][_i].u32; uint32_t dst = src0 ? src1 : src2; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; const uint64_t src1 = _src[1][_i].u64; const uint64_t src2 = _src[2][_i].u64; uint64_t dst = src0 ? src1 : src2; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_b2b1(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; bool1_t dst = src0 != 0; _dst_val[_i].b = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; bool1_t dst = src0 != 0; _dst_val[_i].b = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; bool1_t dst = src0 != 0; _dst_val[_i].b = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; bool1_t dst = src0 != 0; _dst_val[_i].b = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_b2b16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; bool16_t dst = src0 != 0; _dst_val[_i].i16 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; bool16_t dst = src0 != 0; _dst_val[_i].i16 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; bool16_t dst = src0 != 0; _dst_val[_i].i16 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; bool16_t dst = src0 != 0; _dst_val[_i].i16 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_b2b32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; bool32_t dst = src0 != 0; _dst_val[_i].i32 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; bool32_t dst = src0 != 0; _dst_val[_i].i32 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; bool32_t dst = src0 != 0; _dst_val[_i].i32 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; bool32_t dst = src0 != 0; _dst_val[_i].i32 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_b2b8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; bool8_t dst = src0 != 0; _dst_val[_i].i8 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; bool8_t dst = src0 != 0; _dst_val[_i].i8 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; bool8_t dst = src0 != 0; _dst_val[_i].i8 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; bool8_t dst = src0 != 0; _dst_val[_i].i8 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_b2f16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_b2f32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; float32_t dst = src0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; float32_t dst = src0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; float32_t dst = src0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; float32_t dst = src0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_b2f64(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_b2i1(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; int1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; int1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; int1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; int1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } default: unreachable("unknown bit width"); } } static void evaluate_b2i16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; int16_t dst = src0; _dst_val[_i].i16 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; int16_t dst = src0; _dst_val[_i].i16 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; int16_t dst = src0; _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; int16_t dst = src0; _dst_val[_i].i16 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_b2i32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; int32_t dst = src0; _dst_val[_i].i32 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; int32_t dst = src0; _dst_val[_i].i32 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; int32_t dst = src0; _dst_val[_i].i32 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; int32_t dst = src0; _dst_val[_i].i32 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_b2i64(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; int64_t dst = src0; _dst_val[_i].i64 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; int64_t dst = src0; _dst_val[_i].i64 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; int64_t dst = src0; _dst_val[_i].i64 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; int64_t dst = src0; _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_b2i8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; int8_t dst = src0; _dst_val[_i].i8 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; int8_t dst = src0; _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const bool16_t src0 = _src[0][_i].i16; int8_t dst = src0; _dst_val[_i].i8 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; int8_t dst = src0; _dst_val[_i].i8 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_b32all_fequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), _mesa_half_to_float(_src[0][8].u16), _mesa_half_to_float(_src[0][9].u16), _mesa_half_to_float(_src[0][10].u16), _mesa_half_to_float(_src[0][11].u16), _mesa_half_to_float(_src[0][12].u16), _mesa_half_to_float(_src[0][13].u16), _mesa_half_to_float(_src[0][14].u16), _mesa_half_to_float(_src[0][15].u16), }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), _mesa_half_to_float(_src[1][8].u16), _mesa_half_to_float(_src[1][9].u16), _mesa_half_to_float(_src[1][10].u16), _mesa_half_to_float(_src[1][11].u16), _mesa_half_to_float(_src[1][12].u16), _mesa_half_to_float(_src[1][13].u16), _mesa_half_to_float(_src[1][14].u16), _mesa_half_to_float(_src[1][15].u16), }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, _src[0][8].f32, _src[0][9].f32, _src[0][10].f32, _src[0][11].f32, _src[0][12].f32, _src[0][13].f32, _src[0][14].f32, _src[0][15].f32, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, _src[1][8].f32, _src[1][9].f32, _src[1][10].f32, _src[1][11].f32, _src[1][12].f32, _src[1][13].f32, _src[1][14].f32, _src[1][15].f32, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, _src[0][8].f64, _src[0][9].f64, _src[0][10].f64, _src[0][11].f64, _src[0][12].f64, _src[0][13].f64, _src[0][14].f64, _src[0][15].f64, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, _src[1][8].f64, _src[1][9].f64, _src[1][10].f64, _src[1][11].f64, _src[1][12].f64, _src[1][13].f64, _src[1][14].f64, _src[1][15].f64, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32all_fequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32all_fequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32all_fequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32all_fequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32all_fequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32all_iequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][7].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][8].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][9].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][10].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][11].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][12].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][13].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][14].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][15].b, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][7].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][8].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][9].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][10].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][11].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][12].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][13].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][14].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][15].b, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, _src[0][5].i8, _src[0][6].i8, _src[0][7].i8, _src[0][8].i8, _src[0][9].i8, _src[0][10].i8, _src[0][11].i8, _src[0][12].i8, _src[0][13].i8, _src[0][14].i8, _src[0][15].i8, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, _src[1][5].i8, _src[1][6].i8, _src[1][7].i8, _src[1][8].i8, _src[1][9].i8, _src[1][10].i8, _src[1][11].i8, _src[1][12].i8, _src[1][13].i8, _src[1][14].i8, _src[1][15].i8, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, _src[0][5].i16, _src[0][6].i16, _src[0][7].i16, _src[0][8].i16, _src[0][9].i16, _src[0][10].i16, _src[0][11].i16, _src[0][12].i16, _src[0][13].i16, _src[0][14].i16, _src[0][15].i16, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, _src[1][5].i16, _src[1][6].i16, _src[1][7].i16, _src[1][8].i16, _src[1][9].i16, _src[1][10].i16, _src[1][11].i16, _src[1][12].i16, _src[1][13].i16, _src[1][14].i16, _src[1][15].i16, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, _src[0][5].i32, _src[0][6].i32, _src[0][7].i32, _src[0][8].i32, _src[0][9].i32, _src[0][10].i32, _src[0][11].i32, _src[0][12].i32, _src[0][13].i32, _src[0][14].i32, _src[0][15].i32, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, _src[1][5].i32, _src[1][6].i32, _src[1][7].i32, _src[1][8].i32, _src[1][9].i32, _src[1][10].i32, _src[1][11].i32, _src[1][12].i32, _src[1][13].i32, _src[1][14].i32, _src[1][15].i32, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, _src[0][5].i64, _src[0][6].i64, _src[0][7].i64, _src[0][8].i64, _src[0][9].i64, _src[0][10].i64, _src[0][11].i64, _src[0][12].i64, _src[0][13].i64, _src[0][14].i64, _src[0][15].i64, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, _src[1][5].i64, _src[1][6].i64, _src[1][7].i64, _src[1][8].i64, _src[1][9].i64, _src[1][10].i64, _src[1][11].i64, _src[1][12].i64, _src[1][13].i64, _src[1][14].i64, _src[1][15].i64, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32all_iequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32all_iequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32all_iequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32all_iequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32all_iequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][7].b, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][7].b, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, _src[0][5].i8, _src[0][6].i8, _src[0][7].i8, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, _src[1][5].i8, _src[1][6].i8, _src[1][7].i8, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, _src[0][5].i16, _src[0][6].i16, _src[0][7].i16, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, _src[1][5].i16, _src[1][6].i16, _src[1][7].i16, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, _src[0][5].i32, _src[0][6].i32, _src[0][7].i32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, _src[1][5].i32, _src[1][6].i32, _src[1][7].i32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, _src[0][5].i64, _src[0][6].i64, _src[0][7].i64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, _src[1][5].i64, _src[1][6].i64, _src[1][7].i64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32any_fnequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), _mesa_half_to_float(_src[0][8].u16), _mesa_half_to_float(_src[0][9].u16), _mesa_half_to_float(_src[0][10].u16), _mesa_half_to_float(_src[0][11].u16), _mesa_half_to_float(_src[0][12].u16), _mesa_half_to_float(_src[0][13].u16), _mesa_half_to_float(_src[0][14].u16), _mesa_half_to_float(_src[0][15].u16), }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), _mesa_half_to_float(_src[1][8].u16), _mesa_half_to_float(_src[1][9].u16), _mesa_half_to_float(_src[1][10].u16), _mesa_half_to_float(_src[1][11].u16), _mesa_half_to_float(_src[1][12].u16), _mesa_half_to_float(_src[1][13].u16), _mesa_half_to_float(_src[1][14].u16), _mesa_half_to_float(_src[1][15].u16), }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, _src[0][8].f32, _src[0][9].f32, _src[0][10].f32, _src[0][11].f32, _src[0][12].f32, _src[0][13].f32, _src[0][14].f32, _src[0][15].f32, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, _src[1][8].f32, _src[1][9].f32, _src[1][10].f32, _src[1][11].f32, _src[1][12].f32, _src[1][13].f32, _src[1][14].f32, _src[1][15].f32, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, _src[0][8].f64, _src[0][9].f64, _src[0][10].f64, _src[0][11].f64, _src[0][12].f64, _src[0][13].f64, _src[0][14].f64, _src[0][15].f64, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, _src[1][8].f64, _src[1][9].f64, _src[1][10].f64, _src[1][11].f64, _src[1][12].f64, _src[1][13].f64, _src[1][14].f64, _src[1][15].f64, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32any_fnequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32any_fnequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32any_fnequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32any_fnequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32any_fnequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32any_inequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][7].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][8].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][9].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][10].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][11].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][12].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][13].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][14].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][15].b, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][7].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][8].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][9].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][10].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][11].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][12].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][13].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][14].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][15].b, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, _src[0][5].i8, _src[0][6].i8, _src[0][7].i8, _src[0][8].i8, _src[0][9].i8, _src[0][10].i8, _src[0][11].i8, _src[0][12].i8, _src[0][13].i8, _src[0][14].i8, _src[0][15].i8, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, _src[1][5].i8, _src[1][6].i8, _src[1][7].i8, _src[1][8].i8, _src[1][9].i8, _src[1][10].i8, _src[1][11].i8, _src[1][12].i8, _src[1][13].i8, _src[1][14].i8, _src[1][15].i8, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, _src[0][5].i16, _src[0][6].i16, _src[0][7].i16, _src[0][8].i16, _src[0][9].i16, _src[0][10].i16, _src[0][11].i16, _src[0][12].i16, _src[0][13].i16, _src[0][14].i16, _src[0][15].i16, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, _src[1][5].i16, _src[1][6].i16, _src[1][7].i16, _src[1][8].i16, _src[1][9].i16, _src[1][10].i16, _src[1][11].i16, _src[1][12].i16, _src[1][13].i16, _src[1][14].i16, _src[1][15].i16, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, _src[0][5].i32, _src[0][6].i32, _src[0][7].i32, _src[0][8].i32, _src[0][9].i32, _src[0][10].i32, _src[0][11].i32, _src[0][12].i32, _src[0][13].i32, _src[0][14].i32, _src[0][15].i32, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, _src[1][5].i32, _src[1][6].i32, _src[1][7].i32, _src[1][8].i32, _src[1][9].i32, _src[1][10].i32, _src[1][11].i32, _src[1][12].i32, _src[1][13].i32, _src[1][14].i32, _src[1][15].i32, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, _src[0][5].i64, _src[0][6].i64, _src[0][7].i64, _src[0][8].i64, _src[0][9].i64, _src[0][10].i64, _src[0][11].i64, _src[0][12].i64, _src[0][13].i64, _src[0][14].i64, _src[0][15].i64, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, _src[1][5].i64, _src[1][6].i64, _src[1][7].i64, _src[1][8].i64, _src[1][9].i64, _src[1][10].i64, _src[1][11].i64, _src[1][12].i64, _src[1][13].i64, _src[1][14].i64, _src[1][15].i64, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32any_inequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32any_inequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32any_inequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32any_inequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32any_inequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][7].b, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][7].b, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, _src[0][5].i8, _src[0][6].i8, _src[0][7].i8, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, _src[1][5].i8, _src[1][6].i8, _src[1][7].i8, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, _src[0][5].i16, _src[0][6].i16, _src[0][7].i16, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, _src[1][5].i16, _src[1][6].i16, _src[1][7].i16, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, _src[0][5].i32, _src[0][6].i32, _src[0][7].i32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, _src[1][5].i32, _src[1][6].i32, _src[1][7].i32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, _src[0][5].i64, _src[0][6].i64, _src[0][7].i64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, _src[1][5].i64, _src[1][6].i64, _src[1][7].i64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i32 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b32csel(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; const uint1_t src1 = _src[1][_i].b; const uint1_t src2 = _src[2][_i].b; uint1_t dst = src0 ? src1 : src2; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; const uint8_t src1 = _src[1][_i].u8; const uint8_t src2 = _src[2][_i].u8; uint8_t dst = src0 ? src1 : src2; _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; const uint16_t src1 = _src[1][_i].u16; const uint16_t src2 = _src[2][_i].u16; uint16_t dst = src0 ? src1 : src2; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; const uint32_t src1 = _src[1][_i].u32; const uint32_t src2 = _src[2][_i].u32; uint32_t dst = src0 ? src1 : src2; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const bool32_t src0 = _src[0][_i].i32; const uint64_t src1 = _src[1][_i].u64; const uint64_t src2 = _src[2][_i].u64; uint64_t dst = src0 ? src1 : src2; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_b8all_fequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), _mesa_half_to_float(_src[0][8].u16), _mesa_half_to_float(_src[0][9].u16), _mesa_half_to_float(_src[0][10].u16), _mesa_half_to_float(_src[0][11].u16), _mesa_half_to_float(_src[0][12].u16), _mesa_half_to_float(_src[0][13].u16), _mesa_half_to_float(_src[0][14].u16), _mesa_half_to_float(_src[0][15].u16), }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), _mesa_half_to_float(_src[1][8].u16), _mesa_half_to_float(_src[1][9].u16), _mesa_half_to_float(_src[1][10].u16), _mesa_half_to_float(_src[1][11].u16), _mesa_half_to_float(_src[1][12].u16), _mesa_half_to_float(_src[1][13].u16), _mesa_half_to_float(_src[1][14].u16), _mesa_half_to_float(_src[1][15].u16), }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, _src[0][8].f32, _src[0][9].f32, _src[0][10].f32, _src[0][11].f32, _src[0][12].f32, _src[0][13].f32, _src[0][14].f32, _src[0][15].f32, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, _src[1][8].f32, _src[1][9].f32, _src[1][10].f32, _src[1][11].f32, _src[1][12].f32, _src[1][13].f32, _src[1][14].f32, _src[1][15].f32, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, _src[0][8].f64, _src[0][9].f64, _src[0][10].f64, _src[0][11].f64, _src[0][12].f64, _src[0][13].f64, _src[0][14].f64, _src[0][15].f64, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, _src[1][8].f64, _src[1][9].f64, _src[1][10].f64, _src[1][11].f64, _src[1][12].f64, _src[1][13].f64, _src[1][14].f64, _src[1][15].f64, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8all_fequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8all_fequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8all_fequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8all_fequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8all_fequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8all_iequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][7].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][8].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][9].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][10].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][11].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][12].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][13].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][14].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][15].b, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][7].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][8].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][9].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][10].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][11].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][12].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][13].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][14].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][15].b, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, _src[0][5].i8, _src[0][6].i8, _src[0][7].i8, _src[0][8].i8, _src[0][9].i8, _src[0][10].i8, _src[0][11].i8, _src[0][12].i8, _src[0][13].i8, _src[0][14].i8, _src[0][15].i8, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, _src[1][5].i8, _src[1][6].i8, _src[1][7].i8, _src[1][8].i8, _src[1][9].i8, _src[1][10].i8, _src[1][11].i8, _src[1][12].i8, _src[1][13].i8, _src[1][14].i8, _src[1][15].i8, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, _src[0][5].i16, _src[0][6].i16, _src[0][7].i16, _src[0][8].i16, _src[0][9].i16, _src[0][10].i16, _src[0][11].i16, _src[0][12].i16, _src[0][13].i16, _src[0][14].i16, _src[0][15].i16, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, _src[1][5].i16, _src[1][6].i16, _src[1][7].i16, _src[1][8].i16, _src[1][9].i16, _src[1][10].i16, _src[1][11].i16, _src[1][12].i16, _src[1][13].i16, _src[1][14].i16, _src[1][15].i16, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, _src[0][5].i32, _src[0][6].i32, _src[0][7].i32, _src[0][8].i32, _src[0][9].i32, _src[0][10].i32, _src[0][11].i32, _src[0][12].i32, _src[0][13].i32, _src[0][14].i32, _src[0][15].i32, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, _src[1][5].i32, _src[1][6].i32, _src[1][7].i32, _src[1][8].i32, _src[1][9].i32, _src[1][10].i32, _src[1][11].i32, _src[1][12].i32, _src[1][13].i32, _src[1][14].i32, _src[1][15].i32, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, _src[0][5].i64, _src[0][6].i64, _src[0][7].i64, _src[0][8].i64, _src[0][9].i64, _src[0][10].i64, _src[0][11].i64, _src[0][12].i64, _src[0][13].i64, _src[0][14].i64, _src[0][15].i64, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, _src[1][5].i64, _src[1][6].i64, _src[1][7].i64, _src[1][8].i64, _src[1][9].i64, _src[1][10].i64, _src[1][11].i64, _src[1][12].i64, _src[1][13].i64, _src[1][14].i64, _src[1][15].i64, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8all_iequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8all_iequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8all_iequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8all_iequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8all_iequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][7].b, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][7].b, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, _src[0][5].i8, _src[0][6].i8, _src[0][7].i8, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, _src[1][5].i8, _src[1][6].i8, _src[1][7].i8, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, _src[0][5].i16, _src[0][6].i16, _src[0][7].i16, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, _src[1][5].i16, _src[1][6].i16, _src[1][7].i16, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, _src[0][5].i32, _src[0][6].i32, _src[0][7].i32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, _src[1][5].i32, _src[1][6].i32, _src[1][7].i32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, _src[0][5].i64, _src[0][6].i64, _src[0][7].i64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, _src[1][5].i64, _src[1][6].i64, _src[1][7].i64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8any_fnequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), _mesa_half_to_float(_src[0][8].u16), _mesa_half_to_float(_src[0][9].u16), _mesa_half_to_float(_src[0][10].u16), _mesa_half_to_float(_src[0][11].u16), _mesa_half_to_float(_src[0][12].u16), _mesa_half_to_float(_src[0][13].u16), _mesa_half_to_float(_src[0][14].u16), _mesa_half_to_float(_src[0][15].u16), }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), _mesa_half_to_float(_src[1][8].u16), _mesa_half_to_float(_src[1][9].u16), _mesa_half_to_float(_src[1][10].u16), _mesa_half_to_float(_src[1][11].u16), _mesa_half_to_float(_src[1][12].u16), _mesa_half_to_float(_src[1][13].u16), _mesa_half_to_float(_src[1][14].u16), _mesa_half_to_float(_src[1][15].u16), }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, _src[0][8].f32, _src[0][9].f32, _src[0][10].f32, _src[0][11].f32, _src[0][12].f32, _src[0][13].f32, _src[0][14].f32, _src[0][15].f32, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, _src[1][8].f32, _src[1][9].f32, _src[1][10].f32, _src[1][11].f32, _src[1][12].f32, _src[1][13].f32, _src[1][14].f32, _src[1][15].f32, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, _src[0][8].f64, _src[0][9].f64, _src[0][10].f64, _src[0][11].f64, _src[0][12].f64, _src[0][13].f64, _src[0][14].f64, _src[0][15].f64, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, _src[1][8].f64, _src[1][9].f64, _src[1][10].f64, _src[1][11].f64, _src[1][12].f64, _src[1][13].f64, _src[1][14].f64, _src[1][15].f64, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8any_fnequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8any_fnequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8any_fnequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8any_fnequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8any_fnequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8any_inequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][7].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][8].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][9].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][10].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][11].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][12].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][13].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][14].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][15].b, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][7].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][8].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][9].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][10].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][11].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][12].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][13].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][14].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][15].b, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, _src[0][5].i8, _src[0][6].i8, _src[0][7].i8, _src[0][8].i8, _src[0][9].i8, _src[0][10].i8, _src[0][11].i8, _src[0][12].i8, _src[0][13].i8, _src[0][14].i8, _src[0][15].i8, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, _src[1][5].i8, _src[1][6].i8, _src[1][7].i8, _src[1][8].i8, _src[1][9].i8, _src[1][10].i8, _src[1][11].i8, _src[1][12].i8, _src[1][13].i8, _src[1][14].i8, _src[1][15].i8, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, _src[0][5].i16, _src[0][6].i16, _src[0][7].i16, _src[0][8].i16, _src[0][9].i16, _src[0][10].i16, _src[0][11].i16, _src[0][12].i16, _src[0][13].i16, _src[0][14].i16, _src[0][15].i16, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, _src[1][5].i16, _src[1][6].i16, _src[1][7].i16, _src[1][8].i16, _src[1][9].i16, _src[1][10].i16, _src[1][11].i16, _src[1][12].i16, _src[1][13].i16, _src[1][14].i16, _src[1][15].i16, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, _src[0][5].i32, _src[0][6].i32, _src[0][7].i32, _src[0][8].i32, _src[0][9].i32, _src[0][10].i32, _src[0][11].i32, _src[0][12].i32, _src[0][13].i32, _src[0][14].i32, _src[0][15].i32, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, _src[1][5].i32, _src[1][6].i32, _src[1][7].i32, _src[1][8].i32, _src[1][9].i32, _src[1][10].i32, _src[1][11].i32, _src[1][12].i32, _src[1][13].i32, _src[1][14].i32, _src[1][15].i32, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, _src[0][5].i64, _src[0][6].i64, _src[0][7].i64, _src[0][8].i64, _src[0][9].i64, _src[0][10].i64, _src[0][11].i64, _src[0][12].i64, _src[0][13].i64, _src[0][14].i64, _src[0][15].i64, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, _src[1][5].i64, _src[1][6].i64, _src[1][7].i64, _src[1][8].i64, _src[1][9].i64, _src[1][10].i64, _src[1][11].i64, _src[1][12].i64, _src[1][13].i64, _src[1][14].i64, _src[1][15].i64, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8any_inequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8any_inequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8any_inequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8any_inequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8any_inequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][7].b, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][7].b, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, _src[0][5].i8, _src[0][6].i8, _src[0][7].i8, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, _src[1][5].i8, _src[1][6].i8, _src[1][7].i8, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, _src[0][5].i16, _src[0][6].i16, _src[0][7].i16, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, _src[1][5].i16, _src[1][6].i16, _src[1][7].i16, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, _src[0][5].i32, _src[0][6].i32, _src[0][7].i32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, _src[1][5].i32, _src[1][6].i32, _src[1][7].i32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, _src[0][5].i64, _src[0][6].i64, _src[0][7].i64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, _src[1][5].i64, _src[1][6].i64, _src[1][7].i64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool8_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].i8 = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_b8csel(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; const uint1_t src1 = _src[1][_i].b; const uint1_t src2 = _src[2][_i].b; uint1_t dst = src0 ? src1 : src2; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; const uint8_t src1 = _src[1][_i].u8; const uint8_t src2 = _src[2][_i].u8; uint8_t dst = src0 ? src1 : src2; _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; const uint16_t src1 = _src[1][_i].u16; const uint16_t src2 = _src[2][_i].u16; uint16_t dst = src0 ? src1 : src2; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; const uint32_t src1 = _src[1][_i].u32; const uint32_t src2 = _src[2][_i].u32; uint32_t dst = src0 ? src1 : src2; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const bool8_t src0 = _src[0][_i].i8; const uint64_t src1 = _src[1][_i].u64; const uint64_t src2 = _src[2][_i].u64; uint64_t dst = src0 ? src1 : src2; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ball_fequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), _mesa_half_to_float(_src[0][8].u16), _mesa_half_to_float(_src[0][9].u16), _mesa_half_to_float(_src[0][10].u16), _mesa_half_to_float(_src[0][11].u16), _mesa_half_to_float(_src[0][12].u16), _mesa_half_to_float(_src[0][13].u16), _mesa_half_to_float(_src[0][14].u16), _mesa_half_to_float(_src[0][15].u16), }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), _mesa_half_to_float(_src[1][8].u16), _mesa_half_to_float(_src[1][9].u16), _mesa_half_to_float(_src[1][10].u16), _mesa_half_to_float(_src[1][11].u16), _mesa_half_to_float(_src[1][12].u16), _mesa_half_to_float(_src[1][13].u16), _mesa_half_to_float(_src[1][14].u16), _mesa_half_to_float(_src[1][15].u16), }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, _src[0][8].f32, _src[0][9].f32, _src[0][10].f32, _src[0][11].f32, _src[0][12].f32, _src[0][13].f32, _src[0][14].f32, _src[0][15].f32, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, _src[1][8].f32, _src[1][9].f32, _src[1][10].f32, _src[1][11].f32, _src[1][12].f32, _src[1][13].f32, _src[1][14].f32, _src[1][15].f32, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, _src[0][8].f64, _src[0][9].f64, _src[0][10].f64, _src[0][11].f64, _src[0][12].f64, _src[0][13].f64, _src[0][14].f64, _src[0][15].f64, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, _src[1][8].f64, _src[1][9].f64, _src[1][10].f64, _src[1][11].f64, _src[1][12].f64, _src[1][13].f64, _src[1][14].f64, _src[1][15].f64, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_ball_fequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_ball_fequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_ball_fequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_ball_fequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_ball_fequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_ball_iequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][7].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][8].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][9].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][10].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][11].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][12].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][13].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][14].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][15].b, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][7].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][8].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][9].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][10].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][11].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][12].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][13].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][14].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][15].b, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, _src[0][5].i8, _src[0][6].i8, _src[0][7].i8, _src[0][8].i8, _src[0][9].i8, _src[0][10].i8, _src[0][11].i8, _src[0][12].i8, _src[0][13].i8, _src[0][14].i8, _src[0][15].i8, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, _src[1][5].i8, _src[1][6].i8, _src[1][7].i8, _src[1][8].i8, _src[1][9].i8, _src[1][10].i8, _src[1][11].i8, _src[1][12].i8, _src[1][13].i8, _src[1][14].i8, _src[1][15].i8, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, _src[0][5].i16, _src[0][6].i16, _src[0][7].i16, _src[0][8].i16, _src[0][9].i16, _src[0][10].i16, _src[0][11].i16, _src[0][12].i16, _src[0][13].i16, _src[0][14].i16, _src[0][15].i16, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, _src[1][5].i16, _src[1][6].i16, _src[1][7].i16, _src[1][8].i16, _src[1][9].i16, _src[1][10].i16, _src[1][11].i16, _src[1][12].i16, _src[1][13].i16, _src[1][14].i16, _src[1][15].i16, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, _src[0][5].i32, _src[0][6].i32, _src[0][7].i32, _src[0][8].i32, _src[0][9].i32, _src[0][10].i32, _src[0][11].i32, _src[0][12].i32, _src[0][13].i32, _src[0][14].i32, _src[0][15].i32, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, _src[1][5].i32, _src[1][6].i32, _src[1][7].i32, _src[1][8].i32, _src[1][9].i32, _src[1][10].i32, _src[1][11].i32, _src[1][12].i32, _src[1][13].i32, _src[1][14].i32, _src[1][15].i32, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, _src[0][5].i64, _src[0][6].i64, _src[0][7].i64, _src[0][8].i64, _src[0][9].i64, _src[0][10].i64, _src[0][11].i64, _src[0][12].i64, _src[0][13].i64, _src[0][14].i64, _src[0][15].i64, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, _src[1][5].i64, _src[1][6].i64, _src[1][7].i64, _src[1][8].i64, _src[1][9].i64, _src[1][10].i64, _src[1][11].i64, _src[1][12].i64, _src[1][13].i64, _src[1][14].i64, _src[1][15].i64, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_ball_iequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_ball_iequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_ball_iequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_ball_iequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_ball_iequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][7].b, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][7].b, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, _src[0][5].i8, _src[0][6].i8, _src[0][7].i8, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, _src[1][5].i8, _src[1][6].i8, _src[1][7].i8, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, _src[0][5].i16, _src[0][6].i16, _src[0][7].i16, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, _src[1][5].i16, _src[1][6].i16, _src[1][7].i16, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, _src[0][5].i32, _src[0][6].i32, _src[0][7].i32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, _src[1][5].i32, _src[1][6].i32, _src[1][7].i32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, _src[0][5].i64, _src[0][6].i64, _src[0][7].i64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, _src[1][5].i64, _src[1][6].i64, _src[1][7].i64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_bany_fnequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), _mesa_half_to_float(_src[0][8].u16), _mesa_half_to_float(_src[0][9].u16), _mesa_half_to_float(_src[0][10].u16), _mesa_half_to_float(_src[0][11].u16), _mesa_half_to_float(_src[0][12].u16), _mesa_half_to_float(_src[0][13].u16), _mesa_half_to_float(_src[0][14].u16), _mesa_half_to_float(_src[0][15].u16), }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), _mesa_half_to_float(_src[1][8].u16), _mesa_half_to_float(_src[1][9].u16), _mesa_half_to_float(_src[1][10].u16), _mesa_half_to_float(_src[1][11].u16), _mesa_half_to_float(_src[1][12].u16), _mesa_half_to_float(_src[1][13].u16), _mesa_half_to_float(_src[1][14].u16), _mesa_half_to_float(_src[1][15].u16), }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, _src[0][8].f32, _src[0][9].f32, _src[0][10].f32, _src[0][11].f32, _src[0][12].f32, _src[0][13].f32, _src[0][14].f32, _src[0][15].f32, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, _src[1][8].f32, _src[1][9].f32, _src[1][10].f32, _src[1][11].f32, _src[1][12].f32, _src[1][13].f32, _src[1][14].f32, _src[1][15].f32, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, _src[0][8].f64, _src[0][9].f64, _src[0][10].f64, _src[0][11].f64, _src[0][12].f64, _src[0][13].f64, _src[0][14].f64, _src[0][15].f64, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, _src[1][8].f64, _src[1][9].f64, _src[1][10].f64, _src[1][11].f64, _src[1][12].f64, _src[1][13].f64, _src[1][14].f64, _src[1][15].f64, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_bany_fnequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_bany_fnequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_bany_fnequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_bany_fnequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_bany_fnequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_bany_inequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][7].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][8].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][9].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][10].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][11].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][12].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][13].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][14].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][15].b, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][7].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][8].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][9].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][10].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][11].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][12].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][13].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][14].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][15].b, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, _src[0][5].i8, _src[0][6].i8, _src[0][7].i8, _src[0][8].i8, _src[0][9].i8, _src[0][10].i8, _src[0][11].i8, _src[0][12].i8, _src[0][13].i8, _src[0][14].i8, _src[0][15].i8, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, _src[1][5].i8, _src[1][6].i8, _src[1][7].i8, _src[1][8].i8, _src[1][9].i8, _src[1][10].i8, _src[1][11].i8, _src[1][12].i8, _src[1][13].i8, _src[1][14].i8, _src[1][15].i8, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, _src[0][5].i16, _src[0][6].i16, _src[0][7].i16, _src[0][8].i16, _src[0][9].i16, _src[0][10].i16, _src[0][11].i16, _src[0][12].i16, _src[0][13].i16, _src[0][14].i16, _src[0][15].i16, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, _src[1][5].i16, _src[1][6].i16, _src[1][7].i16, _src[1][8].i16, _src[1][9].i16, _src[1][10].i16, _src[1][11].i16, _src[1][12].i16, _src[1][13].i16, _src[1][14].i16, _src[1][15].i16, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, _src[0][5].i32, _src[0][6].i32, _src[0][7].i32, _src[0][8].i32, _src[0][9].i32, _src[0][10].i32, _src[0][11].i32, _src[0][12].i32, _src[0][13].i32, _src[0][14].i32, _src[0][15].i32, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, _src[1][5].i32, _src[1][6].i32, _src[1][7].i32, _src[1][8].i32, _src[1][9].i32, _src[1][10].i32, _src[1][11].i32, _src[1][12].i32, _src[1][13].i32, _src[1][14].i32, _src[1][15].i32, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, _src[0][5].i64, _src[0][6].i64, _src[0][7].i64, _src[0][8].i64, _src[0][9].i64, _src[0][10].i64, _src[0][11].i64, _src[0][12].i64, _src[0][13].i64, _src[0][14].i64, _src[0][15].i64, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, _src[1][5].i64, _src[1][6].i64, _src[1][7].i64, _src[1][8].i64, _src[1][9].i64, _src[1][10].i64, _src[1][11].i64, _src[1][12].i64, _src[1][13].i64, _src[1][14].i64, _src[1][15].i64, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_bany_inequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_bany_inequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_bany_inequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_bany_inequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_bany_inequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct int1_vec src0 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[0][7].b, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int1_vec src1 = { /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][0].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][1].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][2].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][3].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][4].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][5].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][6].b, /* 1-bit integers use a 0/-1 convention */ -(int1_t)_src[1][7].b, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 8: { const struct int8_vec src0 = { _src[0][0].i8, _src[0][1].i8, _src[0][2].i8, _src[0][3].i8, _src[0][4].i8, _src[0][5].i8, _src[0][6].i8, _src[0][7].i8, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int8_vec src1 = { _src[1][0].i8, _src[1][1].i8, _src[1][2].i8, _src[1][3].i8, _src[1][4].i8, _src[1][5].i8, _src[1][6].i8, _src[1][7].i8, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 16: { const struct int16_vec src0 = { _src[0][0].i16, _src[0][1].i16, _src[0][2].i16, _src[0][3].i16, _src[0][4].i16, _src[0][5].i16, _src[0][6].i16, _src[0][7].i16, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int16_vec src1 = { _src[1][0].i16, _src[1][1].i16, _src[1][2].i16, _src[1][3].i16, _src[1][4].i16, _src[1][5].i16, _src[1][6].i16, _src[1][7].i16, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 32: { const struct int32_vec src0 = { _src[0][0].i32, _src[0][1].i32, _src[0][2].i32, _src[0][3].i32, _src[0][4].i32, _src[0][5].i32, _src[0][6].i32, _src[0][7].i32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int32_vec src1 = { _src[1][0].i32, _src[1][1].i32, _src[1][2].i32, _src[1][3].i32, _src[1][4].i32, _src[1][5].i32, _src[1][6].i32, _src[1][7].i32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } case 64: { const struct int64_vec src0 = { _src[0][0].i64, _src[0][1].i64, _src[0][2].i64, _src[0][3].i64, _src[0][4].i64, _src[0][5].i64, _src[0][6].i64, _src[0][7].i64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct int64_vec src1 = { _src[1][0].i64, _src[1][1].i64, _src[1][2].i64, _src[1][3].i64, _src[1][4].i64, _src[1][5].i64, _src[1][6].i64, _src[1][7].i64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct bool1_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)); _dst_val[0].b = -(int)dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_bcsel(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; const uint1_t src2 = _src[2][_i].b; uint1_t dst = src0 ? src1 : src2; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; const uint8_t src1 = _src[1][_i].u8; const uint8_t src2 = _src[2][_i].u8; uint8_t dst = src0 ? src1 : src2; _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; const uint16_t src1 = _src[1][_i].u16; const uint16_t src2 = _src[2][_i].u16; uint16_t dst = src0 ? src1 : src2; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; const uint32_t src1 = _src[1][_i].u32; const uint32_t src2 = _src[2][_i].u32; uint32_t dst = src0 ? src1 : src2; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const bool1_t src0 = _src[0][_i].b; const uint64_t src1 = _src[1][_i].u64; const uint64_t src2 = _src[2][_i].u64; uint64_t dst = src0 ? src1 : src2; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_bfi(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; const uint32_t src2 = _src[2][_i].u32; uint32_t dst; unsigned mask = src0, insert = src1, base = src2; if (mask == 0) { dst = base; } else { unsigned tmp = mask; while (!(tmp & 1)) { tmp >>= 1; insert <<= 1; } dst = (base & ~mask) | (insert & mask); } _dst_val[_i].u32 = dst; } } static void evaluate_bfm(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; uint32_t dst; int bits = src0 & 0x1F; int offset = src1 & 0x1F; dst = ((1u << bits) - 1) << offset; _dst_val[_i].u32 = dst; } } static void evaluate_bit_count(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; uint32_t dst; dst = 0; for (unsigned bit = 0; bit < bit_size; bit++) { if ((src0 >> bit) & 1) dst++; } _dst_val[_i].u32 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; uint32_t dst; dst = 0; for (unsigned bit = 0; bit < bit_size; bit++) { if ((src0 >> bit) & 1) dst++; } _dst_val[_i].u32 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; uint32_t dst; dst = 0; for (unsigned bit = 0; bit < bit_size; bit++) { if ((src0 >> bit) & 1) dst++; } _dst_val[_i].u32 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; uint32_t dst; dst = 0; for (unsigned bit = 0; bit < bit_size; bit++) { if ((src0 >> bit) & 1) dst++; } _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; uint32_t dst; dst = 0; for (unsigned bit = 0; bit < bit_size; bit++) { if ((src0 >> bit) & 1) dst++; } _dst_val[_i].u32 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_bitfield_insert(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; const int32_t src2 = _src[2][_i].i32; const int32_t src3 = _src[3][_i].i32; uint32_t dst; unsigned base = src0, insert = src1; int offset = src2, bits = src3; if (bits == 0) { dst = base; } else if (offset < 0 || bits < 0 || bits + offset > 32) { dst = 0; } else { unsigned mask = ((1ull << bits) - 1) << offset; dst = (base & ~mask) | ((insert << offset) & mask); } _dst_val[_i].u32 = dst; } } static void evaluate_bitfield_reverse(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; uint32_t dst; /* we're not winning any awards for speed here, but that's ok */ dst = 0; for (unsigned bit = 0; bit < 32; bit++) dst |= ((src0 >> bit) & 1) << (31 - bit); _dst_val[_i].u32 = dst; } } static void evaluate_bitfield_select(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; const uint1_t src2 = _src[2][_i].b; uint1_t dst = (src0 & src1) | (~src0 & src2); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; const uint8_t src2 = _src[2][_i].u8; uint8_t dst = (src0 & src1) | (~src0 & src2); _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; const uint16_t src2 = _src[2][_i].u16; uint16_t dst = (src0 & src1) | (~src0 & src2); _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; const uint32_t src2 = _src[2][_i].u32; uint32_t dst = (src0 & src1) | (~src0 & src2); _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; const uint64_t src2 = _src[2][_i].u64; uint64_t dst = (src0 & src1) | (~src0 & src2); _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_cube_face_coord_amd(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = 0.0; float absX = fabsf(src0.x); float absY = fabsf(src0.y); float absZ = fabsf(src0.z); float ma = 0.0; if (absX >= absY && absX >= absZ) { ma = 2 * src0.x; } if (absY >= absX && absY >= absZ) { ma = 2 * src0.y; } if (absZ >= absX && absZ >= absY) { ma = 2 * src0.z; } if (src0.x >= 0 && absX >= absY && absX >= absZ) { dst.x = -src0.z; dst.y = -src0.y; } if (src0.x < 0 && absX >= absY && absX >= absZ) { dst.x = src0.z; dst.y = -src0.y; } if (src0.y >= 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = src0.z; } if (src0.y < 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = -src0.z; } if (src0.z >= 0 && absZ >= absX && absZ >= absY) { dst.x = src0.x; dst.y = -src0.y; } if (src0.z < 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.x; dst.y = -src0.y; } dst.x = dst.x * (1.0f / ma) + 0.5f; dst.y = dst.y * (1.0f / ma) + 0.5f; _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } _dst_val[1].f32 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[1], 32); } } static void evaluate_cube_face_index_amd(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = 0.0; float absX = fabsf(src0.x); float absY = fabsf(src0.y); float absZ = fabsf(src0.z); if (src0.x >= 0 && absX >= absY && absX >= absZ) dst.x = 0; if (src0.x < 0 && absX >= absY && absX >= absZ) dst.x = 1; if (src0.y >= 0 && absY >= absX && absY >= absZ) dst.x = 2; if (src0.y < 0 && absY >= absX && absY >= absZ) dst.x = 3; if (src0.z >= 0 && absZ >= absX && absZ >= absY) dst.x = 4; if (src0.z < 0 && absZ >= absX && absZ >= absY) dst.x = 5; _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } } static void evaluate_cube_r600(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = 0.0; float absX = fabsf(src0.x); float absY = fabsf(src0.y); float absZ = fabsf(src0.z); if (absX >= absY && absX >= absZ) { dst.z = 2 * src0.x; } if (absY >= absX && absY >= absZ) { dst.z = 2 * src0.y; } if (absZ >= absX && absZ >= absY) { dst.z = 2 * src0.z; } if (src0.x >= 0 && absX >= absY && absX >= absZ) { dst.y = -src0.z; dst.x = -src0.y; dst.w = 0; } if (src0.x < 0 && absX >= absY && absX >= absZ) { dst.y = src0.z; dst.x = -src0.y; dst.w = 1; } if (src0.y >= 0 && absY >= absX && absY >= absZ) { dst.y = src0.x; dst.x = src0.z; dst.w = 2; } if (src0.y < 0 && absY >= absX && absY >= absZ) { dst.y = src0.x; dst.x = -src0.z; dst.w = 3; } if (src0.z >= 0 && absZ >= absX && absZ >= absY) { dst.y = src0.x; dst.x = -src0.y; dst.w = 4; } if (src0.z < 0 && absZ >= absX && absZ >= absY) { dst.y = -src0.x; dst.x = -src0.y; dst.w = 5; } _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } _dst_val[1].f32 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[1], 32); } _dst_val[2].f32 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[2], 32); } _dst_val[3].f32 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[3], 32); } } static void evaluate_extract_i16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; int1_t dst = (int16_t)(src0 >> (src1 * 16)); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; int8_t dst = (int16_t)(src0 >> (src1 * 16)); _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; int16_t dst = (int16_t)(src0 >> (src1 * 16)); _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = (int16_t)(src0 >> (src1 * 16)); _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; int64_t dst = (int16_t)(src0 >> (src1 * 16)); _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_extract_i8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; int1_t dst = (int8_t)(src0 >> (src1 * 8)); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; int8_t dst = (int8_t)(src0 >> (src1 * 8)); _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; int16_t dst = (int8_t)(src0 >> (src1 * 8)); _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = (int8_t)(src0 >> (src1 * 8)); _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; int64_t dst = (int8_t)(src0 >> (src1 * 8)); _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_extract_u16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = (uint16_t)(src0 >> (src1 * 16)); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = (uint16_t)(src0 >> (src1 * 16)); _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = (uint16_t)(src0 >> (src1 * 16)); _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = (uint16_t)(src0 >> (src1 * 16)); _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = (uint16_t)(src0 >> (src1 * 16)); _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_extract_u8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = (uint8_t)(src0 >> (src1 * 8)); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = (uint8_t)(src0 >> (src1 * 8)); _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = (uint8_t)(src0 >> (src1 * 8)); _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = (uint8_t)(src0 >> (src1 * 8)); _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = (uint8_t)(src0 >> (src1 * 8)); _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2b1(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); bool1_t dst = src0 != 0; _dst_val[_i].b = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; bool1_t dst = src0 != 0; _dst_val[_i].b = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; bool1_t dst = src0 != 0; _dst_val[_i].b = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2b16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); bool16_t dst = src0 != 0; _dst_val[_i].i16 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; bool16_t dst = src0 != 0; _dst_val[_i].i16 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; bool16_t dst = src0 != 0; _dst_val[_i].i16 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2b32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); bool32_t dst = src0 != 0; _dst_val[_i].i32 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; bool32_t dst = src0 != 0; _dst_val[_i].i32 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; bool32_t dst = src0 != 0; _dst_val[_i].i32 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2b8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); bool8_t dst = src0 != 0; _dst_val[_i].i8 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; bool8_t dst = src0 != 0; _dst_val[_i].i8 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; bool8_t dst = src0 != 0; _dst_val[_i].i8 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2f16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2f16_rtne(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst; if (bit_size > 16) { dst = _mesa_half_to_float(_mesa_float_to_float16_rtne(src0)); } else { dst = src0; } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float16_t dst; if (bit_size > 16) { dst = _mesa_half_to_float(_mesa_float_to_float16_rtne(src0)); } else { dst = src0; } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float16_t dst; if (bit_size > 16) { dst = _mesa_half_to_float(_mesa_float_to_float16_rtne(src0)); } else { dst = src0; } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2f16_rtz(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst; if (bit_size > 16) { dst = _mesa_half_to_float(_mesa_float_to_float16_rtz(src0)); } else { dst = src0; } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float16_t dst; if (bit_size > 16) { dst = _mesa_half_to_float(_mesa_float_to_float16_rtz(src0)); } else { dst = src0; } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float16_t dst; if (bit_size > 16) { dst = _mesa_half_to_float(_mesa_float_to_float16_rtz(src0)); } else { dst = src0; } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2f32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float32_t dst; if (bit_size > 32 && nir_is_rounding_mode_rtz(execution_mode, 32)) { dst = _mesa_double_to_float_rtz(src0); } else { dst = src0; } _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst; if (bit_size > 32 && nir_is_rounding_mode_rtz(execution_mode, 32)) { dst = _mesa_double_to_float_rtz(src0); } else { dst = src0; } _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float32_t dst; if (bit_size > 32 && nir_is_rounding_mode_rtz(execution_mode, 32)) { dst = _mesa_double_to_float_rtz(src0); } else { dst = src0; } _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2f64(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2fmp(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } } static void evaluate_f2i1(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); int1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; int1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; int1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2i16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); int16_t dst = src0; _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; int16_t dst = src0; _dst_val[_i].i16 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; int16_t dst = src0; _dst_val[_i].i16 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2i32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); int32_t dst = src0; _dst_val[_i].i32 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; int32_t dst = src0; _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; int32_t dst = src0; _dst_val[_i].i32 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2i64(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); int64_t dst = src0; _dst_val[_i].i64 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; int64_t dst = src0; _dst_val[_i].i64 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; int64_t dst = src0; _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2i8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); int8_t dst = src0; _dst_val[_i].i8 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; int8_t dst = src0; _dst_val[_i].i8 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; int8_t dst = src0; _dst_val[_i].i8 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2imp(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; int16_t dst = src0; _dst_val[_i].i16 = dst; } } static void evaluate_f2u1(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); uint1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; uint1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; uint1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2u16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); uint16_t dst = src0; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; uint16_t dst = src0; _dst_val[_i].u16 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; uint16_t dst = src0; _dst_val[_i].u16 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2u32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); uint32_t dst = src0; _dst_val[_i].u32 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; uint32_t dst = src0; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; uint32_t dst = src0; _dst_val[_i].u32 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2u64(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); uint64_t dst = src0; _dst_val[_i].u64 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; uint64_t dst = src0; _dst_val[_i].u64 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; uint64_t dst = src0; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2u8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); uint8_t dst = src0; _dst_val[_i].u8 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; uint8_t dst = src0; _dst_val[_i].u8 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; uint8_t dst = src0; _dst_val[_i].u8 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_f2ump(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; uint16_t dst = src0; _dst_val[_i].u16 = dst; } } static void evaluate_fabs(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = fabs(src0); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = fabs(src0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = fabs(src0); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fadd(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); float16_t dst; if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { if (bit_size == 64) dst = _mesa_double_add_rtz(src0, src1); else dst = _mesa_double_to_float_rtz((double)src0 + (double)src1); } else { dst = src0 + src1; } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; float32_t dst; if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { if (bit_size == 64) dst = _mesa_double_add_rtz(src0, src1); else dst = _mesa_double_to_float_rtz((double)src0 + (double)src1); } else { dst = src0 + src1; } _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; float64_t dst; if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { if (bit_size == 64) dst = _mesa_double_add_rtz(src0, src1); else dst = _mesa_double_to_float_rtz((double)src0 + (double)src1); } else { dst = src0 + src1; } _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fall_equal16(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, _src[0][8].f32, _src[0][9].f32, _src[0][10].f32, _src[0][11].f32, _src[0][12].f32, _src[0][13].f32, _src[0][14].f32, _src[0][15].f32, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, _src[1][8].f32, _src[1][9].f32, _src[1][10].f32, _src[1][11].f32, _src[1][12].f32, _src[1][13].f32, _src[1][14].f32, _src[1][15].f32, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p == src1.p) && (src0.o == src1.o) && (src0.n == src1.n) && (src0.m == src1.m) && (src0.l == src1.l) && (src0.k == src1.k) && (src0.j == src1.j) && (src0.i == src1.i) && (src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)) ? 1.0f : 0.0f; _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } } static void evaluate_fall_equal2(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y == src1.y) && (src0.x == src1.x)) ? 1.0f : 0.0f; _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } } static void evaluate_fall_equal3(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)) ? 1.0f : 0.0f; _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } } static void evaluate_fall_equal4(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)) ? 1.0f : 0.0f; _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } } static void evaluate_fall_equal5(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)) ? 1.0f : 0.0f; _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } } static void evaluate_fall_equal8(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h == src1.h) && (src0.g == src1.g) && (src0.f == src1.f) && (src0.e == src1.e) && (src0.w == src1.w) && (src0.z == src1.z) && (src0.y == src1.y) && (src0.x == src1.x)) ? 1.0f : 0.0f; _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } } static void evaluate_fany_nequal16(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, _src[0][8].f32, _src[0][9].f32, _src[0][10].f32, _src[0][11].f32, _src[0][12].f32, _src[0][13].f32, _src[0][14].f32, _src[0][15].f32, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, _src[1][8].f32, _src[1][9].f32, _src[1][10].f32, _src[1][11].f32, _src[1][12].f32, _src[1][13].f32, _src[1][14].f32, _src[1][15].f32, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p != src1.p) || (src0.o != src1.o) || (src0.n != src1.n) || (src0.m != src1.m) || (src0.l != src1.l) || (src0.k != src1.k) || (src0.j != src1.j) || (src0.i != src1.i) || (src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)) ? 1.0f : 0.0f; _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } } static void evaluate_fany_nequal2(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y != src1.y) || (src0.x != src1.x)) ? 1.0f : 0.0f; _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } } static void evaluate_fany_nequal3(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)) ? 1.0f : 0.0f; _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } } static void evaluate_fany_nequal4(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)) ? 1.0f : 0.0f; _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } } static void evaluate_fany_nequal5(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)) ? 1.0f : 0.0f; _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } } static void evaluate_fany_nequal8(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h != src1.h) || (src0.g != src1.g) || (src0.f != src1.f) || (src0.e != src1.e) || (src0.w != src1.w) || (src0.z != src1.z) || (src0.y != src1.y) || (src0.x != src1.x)) ? 1.0f : 0.0f; _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } } static void evaluate_fceil(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = bit_size == 64 ? ceil(src0) : ceilf(src0); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = bit_size == 64 ? ceil(src0) : ceilf(src0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = bit_size == 64 ? ceil(src0) : ceilf(src0); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fclamp_pos_mali(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = fmax(src0, 0.0); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = fmax(src0, 0.0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = fmax(src0, 0.0); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fcos(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = bit_size == 64 ? cos(src0) : cosf(src0); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = bit_size == 64 ? cos(src0) : cosf(src0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = bit_size == 64 ? cos(src0) : cosf(src0); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fcos_r600(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = cosf(6.2831853 * src0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } } static void evaluate_fcsel(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; const float32_t src2 = _src[2][_i].f32; float32_t dst = (src0 != 0.0f) ? src1 : src2; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } } static void evaluate_fcsel_ge(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; const float32_t src2 = _src[2][_i].f32; float32_t dst = (src0 >= 0.0f) ? src1 : src2; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } } static void evaluate_fcsel_gt(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; const float32_t src2 = _src[2][_i].f32; float32_t dst = (src0 > 0.0f) ? src1 : src2; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } } static void evaluate_fddx(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { float16_t dst = 0.0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { float32_t dst = 0.0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { float64_t dst = 0.0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fddx_coarse(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { float16_t dst = 0.0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { float32_t dst = 0.0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { float64_t dst = 0.0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fddx_fine(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { float16_t dst = 0.0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { float32_t dst = 0.0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { float64_t dst = 0.0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fddx_must_abs_mali(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { float16_t dst = 0.0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { float32_t dst = 0.0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { float64_t dst = 0.0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fddy(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { float16_t dst = 0.0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { float32_t dst = 0.0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { float64_t dst = 0.0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fddy_coarse(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { float16_t dst = 0.0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { float32_t dst = 0.0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { float64_t dst = 0.0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fddy_fine(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { float16_t dst = 0.0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { float32_t dst = 0.0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { float64_t dst = 0.0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fddy_must_abs_mali(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { float16_t dst = 0.0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { float32_t dst = 0.0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { float64_t dst = 0.0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fdiv(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); float16_t dst = src0 / src1; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; float32_t dst = src0 / src1; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; float64_t dst = src0 / src1; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fdot16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), _mesa_half_to_float(_src[0][8].u16), _mesa_half_to_float(_src[0][9].u16), _mesa_half_to_float(_src[0][10].u16), _mesa_half_to_float(_src[0][11].u16), _mesa_half_to_float(_src[0][12].u16), _mesa_half_to_float(_src[0][13].u16), _mesa_half_to_float(_src[0][14].u16), _mesa_half_to_float(_src[0][15].u16), }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), _mesa_half_to_float(_src[1][8].u16), _mesa_half_to_float(_src[1][9].u16), _mesa_half_to_float(_src[1][10].u16), _mesa_half_to_float(_src[1][11].u16), _mesa_half_to_float(_src[1][12].u16), _mesa_half_to_float(_src[1][13].u16), _mesa_half_to_float(_src[1][14].u16), _mesa_half_to_float(_src[1][15].u16), }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p * src1.p) + (src0.o * src1.o) + (src0.n * src1.n) + (src0.m * src1.m) + (src0.l * src1.l) + (src0.k * src1.k) + (src0.j * src1.j) + (src0.i * src1.i) + (src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, _src[0][8].f32, _src[0][9].f32, _src[0][10].f32, _src[0][11].f32, _src[0][12].f32, _src[0][13].f32, _src[0][14].f32, _src[0][15].f32, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, _src[1][8].f32, _src[1][9].f32, _src[1][10].f32, _src[1][11].f32, _src[1][12].f32, _src[1][13].f32, _src[1][14].f32, _src[1][15].f32, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p * src1.p) + (src0.o * src1.o) + (src0.n * src1.n) + (src0.m * src1.m) + (src0.l * src1.l) + (src0.k * src1.k) + (src0.j * src1.j) + (src0.i * src1.i) + (src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, _src[0][8].f64, _src[0][9].f64, _src[0][10].f64, _src[0][11].f64, _src[0][12].f64, _src[0][13].f64, _src[0][14].f64, _src[0][15].f64, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, _src[1][8].f64, _src[1][9].f64, _src[1][10].f64, _src[1][11].f64, _src[1][12].f64, _src[1][13].f64, _src[1][14].f64, _src[1][15].f64, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p * src1.p) + (src0.o * src1.o) + (src0.n * src1.n) + (src0.m * src1.m) + (src0.l * src1.l) + (src0.k * src1.k) + (src0.j * src1.j) + (src0.i * src1.i) + (src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_fdot16_replicated(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), _mesa_half_to_float(_src[0][8].u16), _mesa_half_to_float(_src[0][9].u16), _mesa_half_to_float(_src[0][10].u16), _mesa_half_to_float(_src[0][11].u16), _mesa_half_to_float(_src[0][12].u16), _mesa_half_to_float(_src[0][13].u16), _mesa_half_to_float(_src[0][14].u16), _mesa_half_to_float(_src[0][15].u16), }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), _mesa_half_to_float(_src[1][8].u16), _mesa_half_to_float(_src[1][9].u16), _mesa_half_to_float(_src[1][10].u16), _mesa_half_to_float(_src[1][11].u16), _mesa_half_to_float(_src[1][12].u16), _mesa_half_to_float(_src[1][13].u16), _mesa_half_to_float(_src[1][14].u16), _mesa_half_to_float(_src[1][15].u16), }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p * src1.p) + (src0.o * src1.o) + (src0.n * src1.n) + (src0.m * src1.m) + (src0.l * src1.l) + (src0.k * src1.k) + (src0.j * src1.j) + (src0.i * src1.i) + (src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[1].u16 = _mesa_float_to_float16_rtz(dst.y); } else { _dst_val[1].u16 = _mesa_float_to_float16_rtne(dst.y); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[1], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[2].u16 = _mesa_float_to_float16_rtz(dst.z); } else { _dst_val[2].u16 = _mesa_float_to_float16_rtne(dst.z); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[2], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[3].u16 = _mesa_float_to_float16_rtz(dst.w); } else { _dst_val[3].u16 = _mesa_float_to_float16_rtne(dst.w); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[3], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, _src[0][8].f32, _src[0][9].f32, _src[0][10].f32, _src[0][11].f32, _src[0][12].f32, _src[0][13].f32, _src[0][14].f32, _src[0][15].f32, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, _src[1][8].f32, _src[1][9].f32, _src[1][10].f32, _src[1][11].f32, _src[1][12].f32, _src[1][13].f32, _src[1][14].f32, _src[1][15].f32, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p * src1.p) + (src0.o * src1.o) + (src0.n * src1.n) + (src0.m * src1.m) + (src0.l * src1.l) + (src0.k * src1.k) + (src0.j * src1.j) + (src0.i * src1.i) + (src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } _dst_val[1].f32 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[1], 32); } _dst_val[2].f32 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[2], 32); } _dst_val[3].f32 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[3], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, _src[0][8].f64, _src[0][9].f64, _src[0][10].f64, _src[0][11].f64, _src[0][12].f64, _src[0][13].f64, _src[0][14].f64, _src[0][15].f64, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, _src[1][8].f64, _src[1][9].f64, _src[1][10].f64, _src[1][11].f64, _src[1][12].f64, _src[1][13].f64, _src[1][14].f64, _src[1][15].f64, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.p * src1.p) + (src0.o * src1.o) + (src0.n * src1.n) + (src0.m * src1.m) + (src0.l * src1.l) + (src0.k * src1.k) + (src0.j * src1.j) + (src0.i * src1.i) + (src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } _dst_val[1].f64 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[1], 64); } _dst_val[2].f64 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[2], 64); } _dst_val[3].f64 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[3], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_fdot2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y * src1.y) + (src0.x * src1.x)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_fdot2_replicated(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y * src1.y) + (src0.x * src1.x)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[1].u16 = _mesa_float_to_float16_rtz(dst.y); } else { _dst_val[1].u16 = _mesa_float_to_float16_rtne(dst.y); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[1], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[2].u16 = _mesa_float_to_float16_rtz(dst.z); } else { _dst_val[2].u16 = _mesa_float_to_float16_rtne(dst.z); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[2], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[3].u16 = _mesa_float_to_float16_rtz(dst.w); } else { _dst_val[3].u16 = _mesa_float_to_float16_rtne(dst.w); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[3], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } _dst_val[1].f32 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[1], 32); } _dst_val[2].f32 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[2], 32); } _dst_val[3].f32 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[3], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } _dst_val[1].f64 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[1], 64); } _dst_val[2].f64 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[2], 64); } _dst_val[3].f64 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[3], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_fdot3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_fdot3_replicated(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[1].u16 = _mesa_float_to_float16_rtz(dst.y); } else { _dst_val[1].u16 = _mesa_float_to_float16_rtne(dst.y); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[1], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[2].u16 = _mesa_float_to_float16_rtz(dst.z); } else { _dst_val[2].u16 = _mesa_float_to_float16_rtne(dst.z); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[2], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[3].u16 = _mesa_float_to_float16_rtz(dst.w); } else { _dst_val[3].u16 = _mesa_float_to_float16_rtne(dst.w); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[3], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } _dst_val[1].f32 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[1], 32); } _dst_val[2].f32 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[2], 32); } _dst_val[3].f32 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[3], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } _dst_val[1].f64 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[1], 64); } _dst_val[2].f64 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[2], 64); } _dst_val[3].f64 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[3], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_fdot4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_fdot4_replicated(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[1].u16 = _mesa_float_to_float16_rtz(dst.y); } else { _dst_val[1].u16 = _mesa_float_to_float16_rtne(dst.y); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[1], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[2].u16 = _mesa_float_to_float16_rtz(dst.z); } else { _dst_val[2].u16 = _mesa_float_to_float16_rtne(dst.z); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[2], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[3].u16 = _mesa_float_to_float16_rtz(dst.w); } else { _dst_val[3].u16 = _mesa_float_to_float16_rtne(dst.w); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[3], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } _dst_val[1].f32 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[1], 32); } _dst_val[2].f32 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[2], 32); } _dst_val[3].f32 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[3], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } _dst_val[1].f64 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[1], 64); } _dst_val[2].f64 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[2], 64); } _dst_val[3].f64 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[3], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_fdot5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_fdot5_replicated(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[1].u16 = _mesa_float_to_float16_rtz(dst.y); } else { _dst_val[1].u16 = _mesa_float_to_float16_rtne(dst.y); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[1], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[2].u16 = _mesa_float_to_float16_rtz(dst.z); } else { _dst_val[2].u16 = _mesa_float_to_float16_rtne(dst.z); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[2], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[3].u16 = _mesa_float_to_float16_rtz(dst.w); } else { _dst_val[3].u16 = _mesa_float_to_float16_rtne(dst.w); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[3], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } _dst_val[1].f32 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[1], 32); } _dst_val[2].f32 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[2], 32); } _dst_val[3].f32 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[3], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } _dst_val[1].f64 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[1], 64); } _dst_val[2].f64 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[2], 64); } _dst_val[3].f64 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[3], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_fdot8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_fdot8_replicated(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), _mesa_half_to_float(_src[0][4].u16), _mesa_half_to_float(_src[0][5].u16), _mesa_half_to_float(_src[0][6].u16), _mesa_half_to_float(_src[0][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), _mesa_half_to_float(_src[1][4].u16), _mesa_half_to_float(_src[1][5].u16), _mesa_half_to_float(_src[1][6].u16), _mesa_half_to_float(_src[1][7].u16), 0, 0, 0, 0, 0, 0, 0, 0, }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[1].u16 = _mesa_float_to_float16_rtz(dst.y); } else { _dst_val[1].u16 = _mesa_float_to_float16_rtne(dst.y); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[1], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[2].u16 = _mesa_float_to_float16_rtz(dst.z); } else { _dst_val[2].u16 = _mesa_float_to_float16_rtne(dst.z); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[2], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[3].u16 = _mesa_float_to_float16_rtz(dst.w); } else { _dst_val[3].u16 = _mesa_float_to_float16_rtne(dst.w); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[3], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, _src[0][4].f32, _src[0][5].f32, _src[0][6].f32, _src[0][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, _src[1][4].f32, _src[1][5].f32, _src[1][6].f32, _src[1][7].f32, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } _dst_val[1].f32 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[1], 32); } _dst_val[2].f32 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[2], 32); } _dst_val[3].f32 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[3], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, _src[0][4].f64, _src[0][5].f64, _src[0][6].f64, _src[0][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, _src[1][4].f64, _src[1][5].f64, _src[1][6].f64, _src[1][7].f64, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.h * src1.h) + (src0.g * src1.g) + (src0.f * src1.f) + (src0.e * src1.e) + (src0.w * src1.w) + (src0.z * src1.z) + (src0.y * src1.y) + (src0.x * src1.x)); _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } _dst_val[1].f64 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[1], 64); } _dst_val[2].f64 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[2], 64); } _dst_val[3].f64 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[3], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_fdph(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_fdph_replicated(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float16_vec src1 = { _mesa_half_to_float(_src[1][0].u16), _mesa_half_to_float(_src[1][1].u16), _mesa_half_to_float(_src[1][2].u16), _mesa_half_to_float(_src[1][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[1].u16 = _mesa_float_to_float16_rtz(dst.y); } else { _dst_val[1].u16 = _mesa_float_to_float16_rtne(dst.y); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[1], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[2].u16 = _mesa_float_to_float16_rtz(dst.z); } else { _dst_val[2].u16 = _mesa_float_to_float16_rtne(dst.z); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[2], 16); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[3].u16 = _mesa_float_to_float16_rtz(dst.w); } else { _dst_val[3].u16 = _mesa_float_to_float16_rtne(dst.w); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[3], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, _src[1][1].f32, _src[1][2].f32, _src[1][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } _dst_val[1].f32 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[1], 32); } _dst_val[2].f32 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[2], 32); } _dst_val[3].f32 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[3], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float64_vec src1 = { _src[1][0].f64, _src[1][1].f64, _src[1][2].f64, _src[1][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } _dst_val[1].f64 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[1], 64); } _dst_val[2].f64 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[2], 64); } _dst_val[3].f64 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[3], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_feq(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); bool1_t dst = src0 == src1; _dst_val[_i].b = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; bool1_t dst = src0 == src1; _dst_val[_i].b = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; bool1_t dst = src0 == src1; _dst_val[_i].b = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_feq16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); bool16_t dst = src0 == src1; _dst_val[_i].i16 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; bool16_t dst = src0 == src1; _dst_val[_i].i16 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; bool16_t dst = src0 == src1; _dst_val[_i].i16 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_feq32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); bool32_t dst = src0 == src1; _dst_val[_i].i32 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; bool32_t dst = src0 == src1; _dst_val[_i].i32 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; bool32_t dst = src0 == src1; _dst_val[_i].i32 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_feq8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); bool8_t dst = src0 == src1; _dst_val[_i].i8 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; bool8_t dst = src0 == src1; _dst_val[_i].i8 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; bool8_t dst = src0 == src1; _dst_val[_i].i8 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_fexp2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = exp2f(src0); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = exp2f(src0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = exp2f(src0); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_ffloor(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = bit_size == 64 ? floor(src0) : floorf(src0); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = bit_size == 64 ? floor(src0) : floorf(src0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = bit_size == 64 ? floor(src0) : floorf(src0); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_ffma(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); const float src2 = _mesa_half_to_float(_src[2][_i].u16); float16_t dst; if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { if (bit_size == 64) dst = _mesa_double_fma_rtz(src0, src1, src2); else if (bit_size == 32) dst = _mesa_float_fma_rtz(src0, src1, src2); else dst = _mesa_double_to_float_rtz(_mesa_double_fma_rtz(src0, src1, src2)); } else { if (bit_size == 32) dst = fmaf(src0, src1, src2); else dst = fma(src0, src1, src2); } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; const float32_t src2 = _src[2][_i].f32; float32_t dst; if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { if (bit_size == 64) dst = _mesa_double_fma_rtz(src0, src1, src2); else if (bit_size == 32) dst = _mesa_float_fma_rtz(src0, src1, src2); else dst = _mesa_double_to_float_rtz(_mesa_double_fma_rtz(src0, src1, src2)); } else { if (bit_size == 32) dst = fmaf(src0, src1, src2); else dst = fma(src0, src1, src2); } _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; const float64_t src2 = _src[2][_i].f64; float64_t dst; if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { if (bit_size == 64) dst = _mesa_double_fma_rtz(src0, src1, src2); else if (bit_size == 32) dst = _mesa_float_fma_rtz(src0, src1, src2); else dst = _mesa_double_to_float_rtz(_mesa_double_fma_rtz(src0, src1, src2)); } else { if (bit_size == 32) dst = fmaf(src0, src1, src2); else dst = fma(src0, src1, src2); } _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_ffract(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = src0 - (bit_size == 64 ? floor(src0) : floorf(src0)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = src0 - (bit_size == 64 ? floor(src0) : floorf(src0)); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = src0 - (bit_size == 64 ? floor(src0) : floorf(src0)); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fge(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); bool1_t dst = src0 >= src1; _dst_val[_i].b = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; bool1_t dst = src0 >= src1; _dst_val[_i].b = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; bool1_t dst = src0 >= src1; _dst_val[_i].b = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_fge16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); bool16_t dst = src0 >= src1; _dst_val[_i].i16 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; bool16_t dst = src0 >= src1; _dst_val[_i].i16 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; bool16_t dst = src0 >= src1; _dst_val[_i].i16 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_fge32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); bool32_t dst = src0 >= src1; _dst_val[_i].i32 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; bool32_t dst = src0 >= src1; _dst_val[_i].i32 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; bool32_t dst = src0 >= src1; _dst_val[_i].i32 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_fge8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); bool8_t dst = src0 >= src1; _dst_val[_i].i8 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; bool8_t dst = src0 >= src1; _dst_val[_i].i8 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; bool8_t dst = src0 >= src1; _dst_val[_i].i8 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_find_lsb(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; int32_t dst; dst = -1; for (unsigned bit = 0; bit < bit_size; bit++) { if ((src0 >> bit) & 1) { dst = bit; break; } } _dst_val[_i].i32 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; int32_t dst; dst = -1; for (unsigned bit = 0; bit < bit_size; bit++) { if ((src0 >> bit) & 1) { dst = bit; break; } } _dst_val[_i].i32 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; int32_t dst; dst = -1; for (unsigned bit = 0; bit < bit_size; bit++) { if ((src0 >> bit) & 1) { dst = bit; break; } } _dst_val[_i].i32 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; int32_t dst; dst = -1; for (unsigned bit = 0; bit < bit_size; bit++) { if ((src0 >> bit) & 1) { dst = bit; break; } } _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; int32_t dst; dst = -1; for (unsigned bit = 0; bit < bit_size; bit++) { if ((src0 >> bit) & 1) { dst = bit; break; } } _dst_val[_i].i32 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_fisfinite(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); bool1_t dst = isfinite(src0); _dst_val[_i].b = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; bool1_t dst = isfinite(src0); _dst_val[_i].b = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; bool1_t dst = isfinite(src0); _dst_val[_i].b = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_fisfinite32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); int32_t dst = isfinite(src0); _dst_val[_i].i32 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; int32_t dst = isfinite(src0); _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; int32_t dst = isfinite(src0); _dst_val[_i].i32 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_fisnormal(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); bool1_t dst = isnormal(src0); _dst_val[_i].b = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; bool1_t dst = isnormal(src0); _dst_val[_i].b = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; bool1_t dst = isnormal(src0); _dst_val[_i].b = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_flog2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = log2f(src0); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = log2f(src0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = log2f(src0); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_flrp(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); const float src2 = _mesa_half_to_float(_src[2][_i].u16); float16_t dst = src0 * (1 - src2) + src1 * src2; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; const float32_t src2 = _src[2][_i].f32; float32_t dst = src0 * (1 - src2) + src1 * src2; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; const float64_t src2 = _src[2][_i].f64; float64_t dst = src0 * (1 - src2) + src1 * src2; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_flt(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); bool1_t dst = src0 < src1; _dst_val[_i].b = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; bool1_t dst = src0 < src1; _dst_val[_i].b = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; bool1_t dst = src0 < src1; _dst_val[_i].b = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_flt16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); bool16_t dst = src0 < src1; _dst_val[_i].i16 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; bool16_t dst = src0 < src1; _dst_val[_i].i16 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; bool16_t dst = src0 < src1; _dst_val[_i].i16 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_flt32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); bool32_t dst = src0 < src1; _dst_val[_i].i32 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; bool32_t dst = src0 < src1; _dst_val[_i].i32 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; bool32_t dst = src0 < src1; _dst_val[_i].i32 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_flt8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); bool8_t dst = src0 < src1; _dst_val[_i].i8 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; bool8_t dst = src0 < src1; _dst_val[_i].i8 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; bool8_t dst = src0 < src1; _dst_val[_i].i8 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_fmax(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); float16_t dst = fmax(src0, src1); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; float32_t dst = fmax(src0, src1); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; float64_t dst = fmax(src0, src1); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fmin(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); float16_t dst = fmin(src0, src1); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; float32_t dst = fmin(src0, src1); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; float64_t dst = fmin(src0, src1); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fmod(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); float16_t dst = src0 - src1 * floorf(src0 / src1); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; float32_t dst = src0 - src1 * floorf(src0 / src1); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; float64_t dst = src0 - src1 * floorf(src0 / src1); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fmul(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); float16_t dst; if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { if (bit_size == 64) dst = _mesa_double_mul_rtz(src0, src1); else dst = _mesa_double_to_float_rtz((double)src0 * (double)src1); } else { dst = src0 * src1; } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; float32_t dst; if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { if (bit_size == 64) dst = _mesa_double_mul_rtz(src0, src1); else dst = _mesa_double_to_float_rtz((double)src0 * (double)src1); } else { dst = src0 * src1; } _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; float64_t dst; if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { if (bit_size == 64) dst = _mesa_double_mul_rtz(src0, src1); else dst = _mesa_double_to_float_rtz((double)src0 * (double)src1); } else { dst = src0 * src1; } _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fneg(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = -src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = -src0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = -src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fneu(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); bool1_t dst = src0 != src1; _dst_val[_i].b = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; bool1_t dst = src0 != src1; _dst_val[_i].b = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; bool1_t dst = src0 != src1; _dst_val[_i].b = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_fneu16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); bool16_t dst = src0 != src1; _dst_val[_i].i16 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; bool16_t dst = src0 != src1; _dst_val[_i].i16 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; bool16_t dst = src0 != src1; _dst_val[_i].i16 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_fneu32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); bool32_t dst = src0 != src1; _dst_val[_i].i32 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; bool32_t dst = src0 != src1; _dst_val[_i].i32 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; bool32_t dst = src0 != src1; _dst_val[_i].i32 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_fneu8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); bool8_t dst = src0 != src1; _dst_val[_i].i8 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; bool8_t dst = src0 != src1; _dst_val[_i].i8 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; bool8_t dst = src0 != src1; _dst_val[_i].i8 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_fpow(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); float16_t dst = bit_size == 64 ? powf(src0, src1) : pow(src0, src1); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; float32_t dst = bit_size == 64 ? powf(src0, src1) : pow(src0, src1); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; float64_t dst = bit_size == 64 ? powf(src0, src1) : pow(src0, src1); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fquantize2f16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = (fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = (fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0)); _dst_val[_i].f32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = (fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0)); _dst_val[_i].f64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_frcp(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = bit_size == 64 ? 1.0 / src0 : 1.0f / src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = bit_size == 64 ? 1.0 / src0 : 1.0f / src0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = bit_size == 64 ? 1.0 / src0 : 1.0f / src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_frem(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); float16_t dst = src0 - src1 * truncf(src0 / src1); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; float32_t dst = src0 - src1 * truncf(src0 / src1); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; float64_t dst = src0 - src1 * truncf(src0 / src1); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_frexp_exp(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); int32_t dst; frexp(src0, &dst); _dst_val[_i].i32 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; int32_t dst; frexp(src0, &dst); _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; int32_t dst; frexp(src0, &dst); _dst_val[_i].i32 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_frexp_sig(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst; int n; dst = frexp(src0, &n); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst; int n; dst = frexp(src0, &n); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst; int n; dst = frexp(src0, &n); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fround_even(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_frsq(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = bit_size == 64 ? 1.0 / sqrt(src0) : 1.0f / sqrtf(src0); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = bit_size == 64 ? 1.0 / sqrt(src0) : 1.0f / sqrtf(src0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = bit_size == 64 ? 1.0 / sqrt(src0) : 1.0f / sqrtf(src0); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } #if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC)) #pragma optimize("", off) /* Temporary work-around for MSVC compiler bug, present in VS2019 16.9.2 */ #endif static void evaluate_fsat(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = fmin(fmax(src0, 0.0), 1.0); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = fmin(fmax(src0, 0.0), 1.0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = fmin(fmax(src0, 0.0), 1.0); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } #if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC)) #pragma optimize("", on) /* Temporary work-around for MSVC compiler bug, present in VS2019 16.9.2 */ #endif static void evaluate_fsat_signed_mali(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = fmin(fmax(src0, -1.0), 1.0); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = fmin(fmax(src0, -1.0), 1.0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = fmin(fmax(src0, -1.0), 1.0); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fsign(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = bit_size == 64 ? (isnan(src0) ? 0.0 : ((src0 == 0.0 ) ? src0 : (src0 > 0.0 ) ? 1.0 : -1.0 )) : (isnan(src0) ? 0.0f : ((src0 == 0.0f) ? src0 : (src0 > 0.0f) ? 1.0f : -1.0f)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = bit_size == 64 ? (isnan(src0) ? 0.0 : ((src0 == 0.0 ) ? src0 : (src0 > 0.0 ) ? 1.0 : -1.0 )) : (isnan(src0) ? 0.0f : ((src0 == 0.0f) ? src0 : (src0 > 0.0f) ? 1.0f : -1.0f)); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = bit_size == 64 ? (isnan(src0) ? 0.0 : ((src0 == 0.0 ) ? src0 : (src0 > 0.0 ) ? 1.0 : -1.0 )) : (isnan(src0) ? 0.0f : ((src0 == 0.0f) ? src0 : (src0 > 0.0f) ? 1.0f : -1.0f)); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fsin(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = bit_size == 64 ? sin(src0) : sinf(src0); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = bit_size == 64 ? sin(src0) : sinf(src0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = bit_size == 64 ? sin(src0) : sinf(src0); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fsin_agx(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = sinf(src0 * (6.2831853/4.0)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = sinf(src0 * (6.2831853/4.0)); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = sinf(src0 * (6.2831853/4.0)); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fsin_r600(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = sinf(6.2831853 * src0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } } static void evaluate_fsqrt(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = bit_size == 64 ? sqrt(src0) : sqrtf(src0); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = bit_size == 64 ? sqrt(src0) : sqrtf(src0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = bit_size == 64 ? sqrt(src0) : sqrtf(src0); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fsub(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); float16_t dst; if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { if (bit_size == 64) dst = _mesa_double_sub_rtz(src0, src1); else dst = _mesa_double_to_float_rtz((double)src0 - (double)src1); } else { dst = src0 - src1; } if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; float32_t dst; if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { if (bit_size == 64) dst = _mesa_double_sub_rtz(src0, src1); else dst = _mesa_double_to_float_rtz((double)src0 - (double)src1); } else { dst = src0 - src1; } _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; float64_t dst; if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { if (bit_size == 64) dst = _mesa_double_sub_rtz(src0, src1); else dst = _mesa_double_to_float_rtz((double)src0 - (double)src1); } else { dst = src0 - src1; } _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_fsum2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y)); _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_fsum3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y) + (src0.z)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y) + (src0.z)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y) + (src0.z)); _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_fsum4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { const struct float16_vec src0 = { _mesa_half_to_float(_src[0][0].u16), _mesa_half_to_float(_src[0][1].u16), _mesa_half_to_float(_src[0][2].u16), _mesa_half_to_float(_src[0][3].u16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float16_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y) + (src0.z) + (src0.w)); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[0].u16 = _mesa_float_to_float16_rtz(dst.x); } else { _dst_val[0].u16 = _mesa_float_to_float16_rtne(dst.x); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[0], 16); } break; } case 32: { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y) + (src0.z) + (src0.w)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } break; } case 64: { const struct float64_vec src0 = { _src[0][0].f64, _src[0][1].f64, _src[0][2].f64, _src[0][3].f64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float64_vec dst; dst.x = dst.y = dst.z = dst.w = ((src0.x) + (src0.y) + (src0.z) + (src0.w)); _dst_val[0].f64 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[0], 64); } break; } default: unreachable("unknown bit width"); } } static void evaluate_ftrunc(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); float16_t dst = bit_size == 64 ? trunc(src0) : truncf(src0); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; float32_t dst = bit_size == 64 ? trunc(src0) : truncf(src0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; float64_t dst = bit_size == 64 ? trunc(src0) : truncf(src0); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_i2b1(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; bool1_t dst = src0 != 0; _dst_val[_i].b = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; bool1_t dst = src0 != 0; _dst_val[_i].b = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; bool1_t dst = src0 != 0; _dst_val[_i].b = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; bool1_t dst = src0 != 0; _dst_val[_i].b = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; bool1_t dst = src0 != 0; _dst_val[_i].b = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_i2b16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; bool16_t dst = src0 != 0; _dst_val[_i].i16 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; bool16_t dst = src0 != 0; _dst_val[_i].i16 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; bool16_t dst = src0 != 0; _dst_val[_i].i16 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; bool16_t dst = src0 != 0; _dst_val[_i].i16 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; bool16_t dst = src0 != 0; _dst_val[_i].i16 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_i2b32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; bool32_t dst = src0 != 0; _dst_val[_i].i32 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; bool32_t dst = src0 != 0; _dst_val[_i].i32 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; bool32_t dst = src0 != 0; _dst_val[_i].i32 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; bool32_t dst = src0 != 0; _dst_val[_i].i32 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; bool32_t dst = src0 != 0; _dst_val[_i].i32 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_i2b8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; bool8_t dst = src0 != 0; _dst_val[_i].i8 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; bool8_t dst = src0 != 0; _dst_val[_i].i8 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; bool8_t dst = src0 != 0; _dst_val[_i].i8 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; bool8_t dst = src0 != 0; _dst_val[_i].i8 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; bool8_t dst = src0 != 0; _dst_val[_i].i8 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_i2f16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_i2f32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; float32_t dst = src0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; float32_t dst = src0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; float32_t dst = src0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; float32_t dst = src0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; float32_t dst = src0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_i2f64(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_i2fmp(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } } static void evaluate_i2i1(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; int1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; int1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; int1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; int1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; int1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } default: unreachable("unknown bit width"); } } static void evaluate_i2i16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; int16_t dst = src0; _dst_val[_i].i16 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; int16_t dst = src0; _dst_val[_i].i16 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; int16_t dst = src0; _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; int16_t dst = src0; _dst_val[_i].i16 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; int16_t dst = src0; _dst_val[_i].i16 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_i2i32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; int32_t dst = src0; _dst_val[_i].i32 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; int32_t dst = src0; _dst_val[_i].i32 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; int32_t dst = src0; _dst_val[_i].i32 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; int32_t dst = src0; _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; int32_t dst = src0; _dst_val[_i].i32 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_i2i64(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; int64_t dst = src0; _dst_val[_i].i64 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; int64_t dst = src0; _dst_val[_i].i64 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; int64_t dst = src0; _dst_val[_i].i64 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; int64_t dst = src0; _dst_val[_i].i64 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; int64_t dst = src0; _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_i2i8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; int8_t dst = src0; _dst_val[_i].i8 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; int8_t dst = src0; _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; int8_t dst = src0; _dst_val[_i].i8 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; int8_t dst = src0; _dst_val[_i].i8 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; int8_t dst = src0; _dst_val[_i].i8 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_i2imp(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; int16_t dst = src0; _dst_val[_i].i16 = dst; } } static void evaluate_i32csel_ge(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; const int32_t src2 = _src[2][_i].i32; int32_t dst = (src0 >= 0.0f) ? src1 : src2; _dst_val[_i].i32 = dst; } } static void evaluate_i32csel_gt(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; const int32_t src2 = _src[2][_i].i32; int32_t dst = (src0 > 0.0f) ? src1 : src2; _dst_val[_i].i32 = dst; } } static void evaluate_iabs(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; int1_t dst = (src0 < 0) ? -src0 : src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; int8_t dst = (src0 < 0) ? -src0 : src0; _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; int16_t dst = (src0 < 0) ? -src0 : src0; _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; int32_t dst = (src0 < 0) ? -src0 : src0; _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; int64_t dst = (src0 < 0) ? -src0 : src0; _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_iadd(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; int1_t dst = (uint64_t)src0 + (uint64_t)src1; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; int8_t dst = (uint64_t)src0 + (uint64_t)src1; _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; int16_t dst = (uint64_t)src0 + (uint64_t)src1; _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = (uint64_t)src0 + (uint64_t)src1; _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; int64_t dst = (uint64_t)src0 + (uint64_t)src1; _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_iadd3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src2 = -(int1_t)_src[2][_i].b; int1_t dst = src0 + src1 + src2; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; const int8_t src2 = _src[2][_i].i8; int8_t dst = src0 + src1 + src2; _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; const int16_t src2 = _src[2][_i].i16; int16_t dst = src0 + src1 + src2; _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; const int32_t src2 = _src[2][_i].i32; int32_t dst = src0 + src1 + src2; _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; const int64_t src2 = _src[2][_i].i64; int64_t dst = src0 + src1 + src2; _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_iadd_sat(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; int1_t dst = src1 > 0 ? (src0 + src1 < src0 ? u_intN_max(bit_size) : src0 + src1) : (src0 < src0 + src1 ? u_intN_min(bit_size) : src0 + src1) ; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; int8_t dst = src1 > 0 ? (src0 + src1 < src0 ? u_intN_max(bit_size) : src0 + src1) : (src0 < src0 + src1 ? u_intN_min(bit_size) : src0 + src1) ; _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; int16_t dst = src1 > 0 ? (src0 + src1 < src0 ? u_intN_max(bit_size) : src0 + src1) : (src0 < src0 + src1 ? u_intN_min(bit_size) : src0 + src1) ; _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = src1 > 0 ? (src0 + src1 < src0 ? u_intN_max(bit_size) : src0 + src1) : (src0 < src0 + src1 ? u_intN_min(bit_size) : src0 + src1) ; _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; int64_t dst = src1 > 0 ? (src0 + src1 < src0 ? u_intN_max(bit_size) : src0 + src1) : (src0 < src0 + src1 ? u_intN_min(bit_size) : src0 + src1) ; _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_iand(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = src0 & src1; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = src0 & src1; _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = src0 & src1; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = src0 & src1; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = src0 & src1; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ibfe(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const uint32_t src1 = _src[1][_i].u32; const uint32_t src2 = _src[2][_i].u32; int32_t dst; int base = src0; unsigned offset = src1 & 0x1F; unsigned bits = src2 & 0x1F; if (bits == 0) { dst = 0; } else if (offset + bits < 32) { dst = (base << (32 - bits - offset)) >> (32 - bits); } else { dst = base >> offset; } _dst_val[_i].i32 = dst; } } static void evaluate_ibitfield_extract(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; const int32_t src2 = _src[2][_i].i32; int32_t dst; int base = src0; int offset = src1, bits = src2; if (bits == 0) { dst = 0; } else if (offset < 0 || bits < 0 || offset + bits > 32) { dst = 0; } else { dst = (base << (32 - offset - bits)) >> (32 - bits); /* use sign-extending shift */ } _dst_val[_i].i32 = dst; } } static void evaluate_idiv(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; int1_t dst = src1 == 0 ? 0 : (src0 / src1); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; int8_t dst = src1 == 0 ? 0 : (src0 / src1); _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; int16_t dst = src1 == 0 ? 0 : (src0 / src1); _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = src1 == 0 ? 0 : (src0 / src1); _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; int64_t dst = src1 == 0 ? 0 : (src0 / src1); _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ieq(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; bool1_t dst = src0 == src1; _dst_val[_i].b = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; bool1_t dst = src0 == src1; _dst_val[_i].b = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; bool1_t dst = src0 == src1; _dst_val[_i].b = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; bool1_t dst = src0 == src1; _dst_val[_i].b = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; bool1_t dst = src0 == src1; _dst_val[_i].b = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ieq16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; bool16_t dst = src0 == src1; _dst_val[_i].i16 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; bool16_t dst = src0 == src1; _dst_val[_i].i16 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; bool16_t dst = src0 == src1; _dst_val[_i].i16 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; bool16_t dst = src0 == src1; _dst_val[_i].i16 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; bool16_t dst = src0 == src1; _dst_val[_i].i16 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ieq32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; bool32_t dst = src0 == src1; _dst_val[_i].i32 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; bool32_t dst = src0 == src1; _dst_val[_i].i32 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; bool32_t dst = src0 == src1; _dst_val[_i].i32 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; bool32_t dst = src0 == src1; _dst_val[_i].i32 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; bool32_t dst = src0 == src1; _dst_val[_i].i32 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ieq8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; bool8_t dst = src0 == src1; _dst_val[_i].i8 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; bool8_t dst = src0 == src1; _dst_val[_i].i8 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; bool8_t dst = src0 == src1; _dst_val[_i].i8 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; bool8_t dst = src0 == src1; _dst_val[_i].i8 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; bool8_t dst = src0 == src1; _dst_val[_i].i8 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ifind_msb(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; int32_t dst; dst = -1; for (int bit = 31; bit >= 0; bit--) { /* If src0 < 0, we're looking for the first 0 bit. * if src0 >= 0, we're looking for the first 1 bit. */ if ((((src0 >> bit) & 1) && (src0 >= 0)) || (!((src0 >> bit) & 1) && (src0 < 0))) { dst = bit; break; } } _dst_val[_i].i32 = dst; } } static void evaluate_ifind_msb_rev(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; int32_t dst; dst = -1; if (src0 != 0 && src0 != -1) { for (int bit = 0; bit < 31; bit++) { /* If src0 < 0, we're looking for the first 0 bit. * if src0 >= 0, we're looking for the first 1 bit. */ if ((((src0 << bit) & 0x40000000) && (src0 >= 0)) || ((!((src0 << bit) & 0x40000000)) && (src0 < 0))) { dst = bit; break; } } } _dst_val[_i].i32 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; int32_t dst; dst = -1; if (src0 != 0 && src0 != -1) { for (int bit = 0; bit < 31; bit++) { /* If src0 < 0, we're looking for the first 0 bit. * if src0 >= 0, we're looking for the first 1 bit. */ if ((((src0 << bit) & 0x40000000) && (src0 >= 0)) || ((!((src0 << bit) & 0x40000000)) && (src0 < 0))) { dst = bit; break; } } } _dst_val[_i].i32 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; int32_t dst; dst = -1; if (src0 != 0 && src0 != -1) { for (int bit = 0; bit < 31; bit++) { /* If src0 < 0, we're looking for the first 0 bit. * if src0 >= 0, we're looking for the first 1 bit. */ if ((((src0 << bit) & 0x40000000) && (src0 >= 0)) || ((!((src0 << bit) & 0x40000000)) && (src0 < 0))) { dst = bit; break; } } } _dst_val[_i].i32 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; int32_t dst; dst = -1; if (src0 != 0 && src0 != -1) { for (int bit = 0; bit < 31; bit++) { /* If src0 < 0, we're looking for the first 0 bit. * if src0 >= 0, we're looking for the first 1 bit. */ if ((((src0 << bit) & 0x40000000) && (src0 >= 0)) || ((!((src0 << bit) & 0x40000000)) && (src0 < 0))) { dst = bit; break; } } } _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; int32_t dst; dst = -1; if (src0 != 0 && src0 != -1) { for (int bit = 0; bit < 31; bit++) { /* If src0 < 0, we're looking for the first 0 bit. * if src0 >= 0, we're looking for the first 1 bit. */ if ((((src0 << bit) & 0x40000000) && (src0 >= 0)) || ((!((src0 << bit) & 0x40000000)) && (src0 < 0))) { dst = bit; break; } } } _dst_val[_i].i32 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ige(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; bool1_t dst = src0 >= src1; _dst_val[_i].b = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; bool1_t dst = src0 >= src1; _dst_val[_i].b = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; bool1_t dst = src0 >= src1; _dst_val[_i].b = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; bool1_t dst = src0 >= src1; _dst_val[_i].b = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; bool1_t dst = src0 >= src1; _dst_val[_i].b = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ige16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; bool16_t dst = src0 >= src1; _dst_val[_i].i16 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; bool16_t dst = src0 >= src1; _dst_val[_i].i16 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; bool16_t dst = src0 >= src1; _dst_val[_i].i16 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; bool16_t dst = src0 >= src1; _dst_val[_i].i16 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; bool16_t dst = src0 >= src1; _dst_val[_i].i16 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ige32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; bool32_t dst = src0 >= src1; _dst_val[_i].i32 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; bool32_t dst = src0 >= src1; _dst_val[_i].i32 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; bool32_t dst = src0 >= src1; _dst_val[_i].i32 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; bool32_t dst = src0 >= src1; _dst_val[_i].i32 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; bool32_t dst = src0 >= src1; _dst_val[_i].i32 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ige8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; bool8_t dst = src0 >= src1; _dst_val[_i].i8 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; bool8_t dst = src0 >= src1; _dst_val[_i].i8 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; bool8_t dst = src0 >= src1; _dst_val[_i].i8 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; bool8_t dst = src0 >= src1; _dst_val[_i].i8 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; bool8_t dst = src0 >= src1; _dst_val[_i].i8 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ihadd(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; int1_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; int8_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; int16_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; int64_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ilt(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; bool1_t dst = src0 < src1; _dst_val[_i].b = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; bool1_t dst = src0 < src1; _dst_val[_i].b = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; bool1_t dst = src0 < src1; _dst_val[_i].b = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; bool1_t dst = src0 < src1; _dst_val[_i].b = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; bool1_t dst = src0 < src1; _dst_val[_i].b = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ilt16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; bool16_t dst = src0 < src1; _dst_val[_i].i16 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; bool16_t dst = src0 < src1; _dst_val[_i].i16 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; bool16_t dst = src0 < src1; _dst_val[_i].i16 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; bool16_t dst = src0 < src1; _dst_val[_i].i16 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; bool16_t dst = src0 < src1; _dst_val[_i].i16 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ilt32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; bool32_t dst = src0 < src1; _dst_val[_i].i32 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; bool32_t dst = src0 < src1; _dst_val[_i].i32 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; bool32_t dst = src0 < src1; _dst_val[_i].i32 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; bool32_t dst = src0 < src1; _dst_val[_i].i32 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; bool32_t dst = src0 < src1; _dst_val[_i].i32 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ilt8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; bool8_t dst = src0 < src1; _dst_val[_i].i8 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; bool8_t dst = src0 < src1; _dst_val[_i].i8 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; bool8_t dst = src0 < src1; _dst_val[_i].i8 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; bool8_t dst = src0 < src1; _dst_val[_i].i8 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; bool8_t dst = src0 < src1; _dst_val[_i].i8 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_imad24_ir3(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; const int32_t src2 = _src[2][_i].i32; int32_t dst = (((int32_t)src0 << 8) >> 8) * (((int32_t)src1 << 8) >> 8) + src2; _dst_val[_i].i32 = dst; } } static void evaluate_imadsh_mix16(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; const int32_t src2 = _src[2][_i].i32; int32_t dst; dst = ((((src0 & 0xffff0000) >> 16) * (src1 & 0x0000ffff)) << 16) + src2; _dst_val[_i].i32 = dst; } } static void evaluate_imax(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; int1_t dst = src1 > src0 ? src1 : src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; int8_t dst = src1 > src0 ? src1 : src0; _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; int16_t dst = src1 > src0 ? src1 : src0; _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = src1 > src0 ? src1 : src0; _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; int64_t dst = src1 > src0 ? src1 : src0; _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_imin(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; int1_t dst = src1 > src0 ? src0 : src1; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; int8_t dst = src1 > src0 ? src0 : src1; _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; int16_t dst = src1 > src0 ? src0 : src1; _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = src1 > src0 ? src0 : src1; _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; int64_t dst = src1 > src0 ? src0 : src1; _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_imod(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; int1_t dst = src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ? src0 % src1 : src0 % src1 + src1); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; int8_t dst = src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ? src0 % src1 : src0 % src1 + src1); _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; int16_t dst = src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ? src0 % src1 : src0 % src1 + src1); _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ? src0 % src1 : src0 % src1 + src1); _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; int64_t dst = src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ? src0 % src1 : src0 % src1 + src1); _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_imul(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; int1_t dst; /* Use 64-bit multiplies to prevent overflow of signed arithmetic */ dst = (uint64_t)src0 * (uint64_t)src1; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; int8_t dst; /* Use 64-bit multiplies to prevent overflow of signed arithmetic */ dst = (uint64_t)src0 * (uint64_t)src1; _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; int16_t dst; /* Use 64-bit multiplies to prevent overflow of signed arithmetic */ dst = (uint64_t)src0 * (uint64_t)src1; _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst; /* Use 64-bit multiplies to prevent overflow of signed arithmetic */ dst = (uint64_t)src0 * (uint64_t)src1; _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; int64_t dst; /* Use 64-bit multiplies to prevent overflow of signed arithmetic */ dst = (uint64_t)src0 * (uint64_t)src1; _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_imul24(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = (((int32_t)src0 << 8) >> 8) * (((int32_t)src1 << 8) >> 8); _dst_val[_i].i32 = dst; } } static void evaluate_imul24_relaxed(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = src0 * src1; _dst_val[_i].i32 = dst; } } static void evaluate_imul_2x32_64(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int64_t dst = (int64_t)src0 * (int64_t)src1; _dst_val[_i].i64 = dst; } } static void evaluate_imul_32x16(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = src0 * (int16_t) src1; _dst_val[_i].i32 = dst; } } static void evaluate_imul_high(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; int1_t dst; if (bit_size == 64) { /* We need to do a full 128-bit x 128-bit multiply in order for the sign * extension to work properly. The casts are kind-of annoying but needed * to prevent compiler warnings. */ uint32_t src0_u32[4] = { src0, (int64_t)src0 >> 32, (int64_t)src0 >> 63, (int64_t)src0 >> 63, }; uint32_t src1_u32[4] = { src1, (int64_t)src1 >> 32, (int64_t)src1 >> 63, (int64_t)src1 >> 63, }; uint32_t prod_u32[4]; ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); } else { /* First, sign-extend to 64-bit, then convert to unsigned to prevent * potential overflow of signed multiply */ dst = ((uint64_t)(int64_t)src0 * (uint64_t)(int64_t)src1) >> bit_size; } /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; int8_t dst; if (bit_size == 64) { /* We need to do a full 128-bit x 128-bit multiply in order for the sign * extension to work properly. The casts are kind-of annoying but needed * to prevent compiler warnings. */ uint32_t src0_u32[4] = { src0, (int64_t)src0 >> 32, (int64_t)src0 >> 63, (int64_t)src0 >> 63, }; uint32_t src1_u32[4] = { src1, (int64_t)src1 >> 32, (int64_t)src1 >> 63, (int64_t)src1 >> 63, }; uint32_t prod_u32[4]; ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); } else { /* First, sign-extend to 64-bit, then convert to unsigned to prevent * potential overflow of signed multiply */ dst = ((uint64_t)(int64_t)src0 * (uint64_t)(int64_t)src1) >> bit_size; } _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; int16_t dst; if (bit_size == 64) { /* We need to do a full 128-bit x 128-bit multiply in order for the sign * extension to work properly. The casts are kind-of annoying but needed * to prevent compiler warnings. */ uint32_t src0_u32[4] = { src0, (int64_t)src0 >> 32, (int64_t)src0 >> 63, (int64_t)src0 >> 63, }; uint32_t src1_u32[4] = { src1, (int64_t)src1 >> 32, (int64_t)src1 >> 63, (int64_t)src1 >> 63, }; uint32_t prod_u32[4]; ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); } else { /* First, sign-extend to 64-bit, then convert to unsigned to prevent * potential overflow of signed multiply */ dst = ((uint64_t)(int64_t)src0 * (uint64_t)(int64_t)src1) >> bit_size; } _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst; if (bit_size == 64) { /* We need to do a full 128-bit x 128-bit multiply in order for the sign * extension to work properly. The casts are kind-of annoying but needed * to prevent compiler warnings. */ uint32_t src0_u32[4] = { src0, (int64_t)src0 >> 32, (int64_t)src0 >> 63, (int64_t)src0 >> 63, }; uint32_t src1_u32[4] = { src1, (int64_t)src1 >> 32, (int64_t)src1 >> 63, (int64_t)src1 >> 63, }; uint32_t prod_u32[4]; ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); } else { /* First, sign-extend to 64-bit, then convert to unsigned to prevent * potential overflow of signed multiply */ dst = ((uint64_t)(int64_t)src0 * (uint64_t)(int64_t)src1) >> bit_size; } _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; int64_t dst; if (bit_size == 64) { /* We need to do a full 128-bit x 128-bit multiply in order for the sign * extension to work properly. The casts are kind-of annoying but needed * to prevent compiler warnings. */ uint32_t src0_u32[4] = { src0, (int64_t)src0 >> 32, (int64_t)src0 >> 63, (int64_t)src0 >> 63, }; uint32_t src1_u32[4] = { src1, (int64_t)src1 >> 32, (int64_t)src1 >> 63, (int64_t)src1 >> 63, }; uint32_t prod_u32[4]; ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); } else { /* First, sign-extend to 64-bit, then convert to unsigned to prevent * potential overflow of signed multiply */ dst = ((uint64_t)(int64_t)src0 * (uint64_t)(int64_t)src1) >> bit_size; } _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ine(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; bool1_t dst = src0 != src1; _dst_val[_i].b = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; bool1_t dst = src0 != src1; _dst_val[_i].b = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; bool1_t dst = src0 != src1; _dst_val[_i].b = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; bool1_t dst = src0 != src1; _dst_val[_i].b = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; bool1_t dst = src0 != src1; _dst_val[_i].b = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ine16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; bool16_t dst = src0 != src1; _dst_val[_i].i16 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; bool16_t dst = src0 != src1; _dst_val[_i].i16 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; bool16_t dst = src0 != src1; _dst_val[_i].i16 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; bool16_t dst = src0 != src1; _dst_val[_i].i16 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; bool16_t dst = src0 != src1; _dst_val[_i].i16 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ine32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; bool32_t dst = src0 != src1; _dst_val[_i].i32 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; bool32_t dst = src0 != src1; _dst_val[_i].i32 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; bool32_t dst = src0 != src1; _dst_val[_i].i32 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; bool32_t dst = src0 != src1; _dst_val[_i].i32 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; bool32_t dst = src0 != src1; _dst_val[_i].i32 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ine8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; bool8_t dst = src0 != src1; _dst_val[_i].i8 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; bool8_t dst = src0 != src1; _dst_val[_i].i8 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; bool8_t dst = src0 != src1; _dst_val[_i].i8 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; bool8_t dst = src0 != src1; _dst_val[_i].i8 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; bool8_t dst = src0 != src1; _dst_val[_i].i8 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ineg(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; int1_t dst = -src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; int8_t dst = -src0; _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; int16_t dst = -src0; _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; int32_t dst = -src0; _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; int64_t dst = -src0; _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_inot(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; int1_t dst = ~src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; int8_t dst = ~src0; _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; int16_t dst = ~src0; _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; int32_t dst = ~src0; _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; int64_t dst = ~src0; _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_insert_u16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = (src0 & 0xffff) << (src1 * 16); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = (src0 & 0xffff) << (src1 * 16); _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = (src0 & 0xffff) << (src1 * 16); _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = (src0 & 0xffff) << (src1 * 16); _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = (src0 & 0xffff) << (src1 * 16); _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_insert_u8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = (src0 & 0xff) << (src1 * 8); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = (src0 & 0xff) << (src1 * 8); _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = (src0 & 0xff) << (src1 * 8); _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = (src0 & 0xff) << (src1 * 8); _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = (src0 & 0xff) << (src1 * 8); _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ior(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = src0 | src1; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = src0 | src1; _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = src0 | src1; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = src0 | src1; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = src0 | src1; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_irem(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; int1_t dst = src1 == 0 ? 0 : src0 % src1; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; int8_t dst = src1 == 0 ? 0 : src0 % src1; _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; int16_t dst = src1 == 0 ? 0 : src0 % src1; _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = src1 == 0 ? 0 : src0 % src1; _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; int64_t dst = src1 == 0 ? 0 : src0 % src1; _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_irhadd(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; int1_t dst = (src0 | src1) - ((src0 ^ src1) >> 1); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; int8_t dst = (src0 | src1) - ((src0 ^ src1) >> 1); _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; int16_t dst = (src0 | src1) - ((src0 ^ src1) >> 1); _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = (src0 | src1) - ((src0 ^ src1) >> 1); _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; int64_t dst = (src0 | src1) - ((src0 ^ src1) >> 1); _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ishl(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; const uint32_t src1 = _src[1][_i].u32; int1_t dst = (uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1)); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const uint32_t src1 = _src[1][_i].u32; int8_t dst = (uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1)); _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const uint32_t src1 = _src[1][_i].u32; int16_t dst = (uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1)); _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const uint32_t src1 = _src[1][_i].u32; int32_t dst = (uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1)); _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const uint32_t src1 = _src[1][_i].u32; int64_t dst = (uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1)); _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ishr(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; const uint32_t src1 = _src[1][_i].u32; int1_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const uint32_t src1 = _src[1][_i].u32; int8_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const uint32_t src1 = _src[1][_i].u32; int16_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const uint32_t src1 = _src[1][_i].u32; int32_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const uint32_t src1 = _src[1][_i].u32; int64_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_isign(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; int1_t dst = (src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; int8_t dst = (src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1); _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; int16_t dst = (src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1); _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; int32_t dst = (src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1); _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; int64_t dst = (src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1); _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_isub(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; int1_t dst = src0 - src1; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; int8_t dst = src0 - src1; _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; int16_t dst = src0 - src1; _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = src0 - src1; _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; int64_t dst = src0 - src1; _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_isub_sat(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; int1_t dst = src1 < 0 ? (src0 - src1 < src0 ? u_intN_max(bit_size) : src0 - src1) : (src0 < src0 - src1 ? u_intN_min(bit_size) : src0 - src1) ; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; int8_t dst = src1 < 0 ? (src0 - src1 < src0 ? u_intN_max(bit_size) : src0 - src1) : (src0 < src0 - src1 ? u_intN_min(bit_size) : src0 - src1) ; _dst_val[_i].i8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; int16_t dst = src1 < 0 ? (src0 - src1 < src0 ? u_intN_max(bit_size) : src0 - src1) : (src0 < src0 - src1 ? u_intN_min(bit_size) : src0 - src1) ; _dst_val[_i].i16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = src1 < 0 ? (src0 - src1 < src0 ? u_intN_max(bit_size) : src0 - src1) : (src0 < src0 - src1 ? u_intN_min(bit_size) : src0 - src1) ; _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; int64_t dst = src1 < 0 ? (src0 - src1 < src0 ? u_intN_max(bit_size) : src0 - src1) : (src0 < src0 - src1 ? u_intN_min(bit_size) : src0 - src1) ; _dst_val[_i].i64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ixor(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = src0 ^ src1; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = src0 ^ src1; _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = src0 ^ src1; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = src0 ^ src1; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = src0 ^ src1; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ldexp(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const int32_t src1 = _src[1][_i].i32; float16_t dst; dst = (bit_size == 64) ? ldexp(src0, src1) : ldexpf(src0, src1); /* flush denormals to zero. */ if (!isnormal(dst)) dst = copysignf(0.0f, src0); if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const int32_t src1 = _src[1][_i].i32; float32_t dst; dst = (bit_size == 64) ? ldexp(src0, src1) : ldexpf(src0, src1); /* flush denormals to zero. */ if (!isnormal(dst)) dst = copysignf(0.0f, src0); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const int32_t src1 = _src[1][_i].i32; float64_t dst; dst = (bit_size == 64) ? ldexp(src0, src1) : ldexpf(src0, src1); /* flush denormals to zero. */ if (!isnormal(dst)) dst = copysignf(0.0f, src0); _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_mov(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; uint1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; uint8_t dst = src0; _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; uint16_t dst = src0; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; uint32_t dst = src0; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; uint64_t dst = src0; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_pack_32_2x16(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint16_vec src0 = { _src[0][0].u16, _src[0][1].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = src0.x | ((uint32_t)src0.y << 16); _dst_val[0].u32 = dst.x; } static void evaluate_pack_32_2x16_split(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint32_t dst = src0 | ((uint32_t)src1 << 16); _dst_val[_i].u32 = dst; } } static void evaluate_pack_32_4x8(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint8_vec src0 = { _src[0][0].u8, _src[0][1].u8, _src[0][2].u8, _src[0][3].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = src0.x | ((uint32_t)src0.y << 8) | ((uint32_t)src0.z << 16) | ((uint32_t)src0.w << 24); _dst_val[0].u32 = dst.x; } static void evaluate_pack_32_4x8_split(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; const uint8_t src2 = _src[2][_i].u8; const uint8_t src3 = _src[3][_i].u8; uint32_t dst = src0 | ((uint32_t)src1 << 8) | ((uint32_t)src2 << 16) | ((uint32_t)src3 << 24); _dst_val[_i].u32 = dst; } } static void evaluate_pack_64_2x32(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint32_vec src0 = { _src[0][0].u32, _src[0][1].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint64_vec dst; dst.x = src0.x | ((uint64_t)src0.y << 32); _dst_val[0].u64 = dst.x; } static void evaluate_pack_64_2x32_split(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint64_t dst = src0 | ((uint64_t)src1 << 32); _dst_val[_i].u64 = dst; } } static void evaluate_pack_64_4x16(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint16_vec src0 = { _src[0][0].u16, _src[0][1].u16, _src[0][2].u16, _src[0][3].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint64_vec dst; dst.x = src0.x | ((uint64_t)src0.y << 16) | ((uint64_t)src0.z << 32) | ((uint64_t)src0.w << 48); _dst_val[0].u64 = dst.x; } static void evaluate_pack_double_2x32_dxil(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint32_vec src0 = { _src[0][0].u32, _src[0][1].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint64_vec dst; dst.x = src0.x | ((uint64_t)src0.y << 32); _dst_val[0].u64 = dst.x; } static void evaluate_pack_half_2x16(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = (uint32_t) pack_half_1x16(src0.x); dst.x |= ((uint32_t) pack_half_1x16(src0.y)) << 16; _dst_val[0].u32 = dst.x; } static void evaluate_pack_half_2x16_split(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct float32_vec src1 = { _src[1][0].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = dst.y = dst.z = dst.w = pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16); _dst_val[0].u32 = dst.x; } static void evaluate_pack_snorm_2x16(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = (uint32_t) pack_snorm_1x16(src0.x); dst.x |= ((uint32_t) pack_snorm_1x16(src0.y)) << 16; _dst_val[0].u32 = dst.x; } static void evaluate_pack_snorm_4x8(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = (uint32_t) pack_snorm_1x8(src0.x); dst.x |= ((uint32_t) pack_snorm_1x8(src0.y)) << 8; dst.x |= ((uint32_t) pack_snorm_1x8(src0.z)) << 16; dst.x |= ((uint32_t) pack_snorm_1x8(src0.w)) << 24; _dst_val[0].u32 = dst.x; } static void evaluate_pack_unorm_2x16(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = (uint32_t) pack_unorm_1x16(src0.x); dst.x |= ((uint32_t) pack_unorm_1x16(src0.y)) << 16; _dst_val[0].u32 = dst.x; } static void evaluate_pack_unorm_4x8(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct float32_vec src0 = { _src[0][0].f32, _src[0][1].f32, _src[0][2].f32, _src[0][3].f32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = (uint32_t) pack_unorm_1x8(src0.x); dst.x |= ((uint32_t) pack_unorm_1x8(src0.y)) << 8; dst.x |= ((uint32_t) pack_unorm_1x8(src0.z)) << 16; dst.x |= ((uint32_t) pack_unorm_1x8(src0.w)) << 24; _dst_val[0].u32 = dst.x; } static void evaluate_pack_uvec2_to_uint(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint32_vec src0 = { _src[0][0].u32, _src[0][1].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = (src0.x & 0xffff) | (src0.y << 16); _dst_val[0].u32 = dst.x; } static void evaluate_pack_uvec4_to_uint(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint32_vec src0 = { _src[0][0].u32, _src[0][1].u32, _src[0][2].u32, _src[0][3].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = (src0.x << 0) | (src0.y << 8) | (src0.z << 16) | (src0.w << 24); _dst_val[0].u32 = dst.x; } static void evaluate_sad_u8x4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct uint1_vec src0 = { _src[0][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src1 = { _src[1][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src2 = { _src[2][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint1_vec dst; uint8_t s0_b0 = (src0.x & 0x000000ff) >> 0; uint8_t s0_b1 = (src0.x & 0x0000ff00) >> 8; uint8_t s0_b2 = (src0.x & 0x00ff0000) >> 16; uint8_t s0_b3 = (src0.x & 0xff000000) >> 24; uint8_t s1_b0 = (src1.x & 0x000000ff) >> 0; uint8_t s1_b1 = (src1.x & 0x0000ff00) >> 8; uint8_t s1_b2 = (src1.x & 0x00ff0000) >> 16; uint8_t s1_b3 = (src1.x & 0xff000000) >> 24; dst.x = src2.x + (s0_b0 > s1_b0 ? (s0_b0 - s1_b0) : (s1_b0 - s0_b0)) + (s0_b1 > s1_b1 ? (s0_b1 - s1_b1) : (s1_b1 - s0_b1)) + (s0_b2 > s1_b2 ? (s0_b2 - s1_b2) : (s1_b2 - s0_b2)) + (s0_b3 > s1_b3 ? (s0_b3 - s1_b3) : (s1_b3 - s0_b3)); /* 1-bit integers get truncated */ _dst_val[0].b = dst.x & 1; break; } case 8: { const struct uint8_vec src0 = { _src[0][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src1 = { _src[1][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src2 = { _src[2][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint8_vec dst; uint8_t s0_b0 = (src0.x & 0x000000ff) >> 0; uint8_t s0_b1 = (src0.x & 0x0000ff00) >> 8; uint8_t s0_b2 = (src0.x & 0x00ff0000) >> 16; uint8_t s0_b3 = (src0.x & 0xff000000) >> 24; uint8_t s1_b0 = (src1.x & 0x000000ff) >> 0; uint8_t s1_b1 = (src1.x & 0x0000ff00) >> 8; uint8_t s1_b2 = (src1.x & 0x00ff0000) >> 16; uint8_t s1_b3 = (src1.x & 0xff000000) >> 24; dst.x = src2.x + (s0_b0 > s1_b0 ? (s0_b0 - s1_b0) : (s1_b0 - s0_b0)) + (s0_b1 > s1_b1 ? (s0_b1 - s1_b1) : (s1_b1 - s0_b1)) + (s0_b2 > s1_b2 ? (s0_b2 - s1_b2) : (s1_b2 - s0_b2)) + (s0_b3 > s1_b3 ? (s0_b3 - s1_b3) : (s1_b3 - s0_b3)); _dst_val[0].u8 = dst.x; break; } case 16: { const struct uint16_vec src0 = { _src[0][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src1 = { _src[1][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src2 = { _src[2][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint16_vec dst; uint8_t s0_b0 = (src0.x & 0x000000ff) >> 0; uint8_t s0_b1 = (src0.x & 0x0000ff00) >> 8; uint8_t s0_b2 = (src0.x & 0x00ff0000) >> 16; uint8_t s0_b3 = (src0.x & 0xff000000) >> 24; uint8_t s1_b0 = (src1.x & 0x000000ff) >> 0; uint8_t s1_b1 = (src1.x & 0x0000ff00) >> 8; uint8_t s1_b2 = (src1.x & 0x00ff0000) >> 16; uint8_t s1_b3 = (src1.x & 0xff000000) >> 24; dst.x = src2.x + (s0_b0 > s1_b0 ? (s0_b0 - s1_b0) : (s1_b0 - s0_b0)) + (s0_b1 > s1_b1 ? (s0_b1 - s1_b1) : (s1_b1 - s0_b1)) + (s0_b2 > s1_b2 ? (s0_b2 - s1_b2) : (s1_b2 - s0_b2)) + (s0_b3 > s1_b3 ? (s0_b3 - s1_b3) : (s1_b3 - s0_b3)); _dst_val[0].u16 = dst.x; break; } case 32: { const struct uint32_vec src0 = { _src[0][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src1 = { _src[1][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src2 = { _src[2][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; uint8_t s0_b0 = (src0.x & 0x000000ff) >> 0; uint8_t s0_b1 = (src0.x & 0x0000ff00) >> 8; uint8_t s0_b2 = (src0.x & 0x00ff0000) >> 16; uint8_t s0_b3 = (src0.x & 0xff000000) >> 24; uint8_t s1_b0 = (src1.x & 0x000000ff) >> 0; uint8_t s1_b1 = (src1.x & 0x0000ff00) >> 8; uint8_t s1_b2 = (src1.x & 0x00ff0000) >> 16; uint8_t s1_b3 = (src1.x & 0xff000000) >> 24; dst.x = src2.x + (s0_b0 > s1_b0 ? (s0_b0 - s1_b0) : (s1_b0 - s0_b0)) + (s0_b1 > s1_b1 ? (s0_b1 - s1_b1) : (s1_b1 - s0_b1)) + (s0_b2 > s1_b2 ? (s0_b2 - s1_b2) : (s1_b2 - s0_b2)) + (s0_b3 > s1_b3 ? (s0_b3 - s1_b3) : (s1_b3 - s0_b3)); _dst_val[0].u32 = dst.x; break; } case 64: { const struct uint64_vec src0 = { _src[0][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src1 = { _src[1][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src2 = { _src[2][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint64_vec dst; uint8_t s0_b0 = (src0.x & 0x000000ff) >> 0; uint8_t s0_b1 = (src0.x & 0x0000ff00) >> 8; uint8_t s0_b2 = (src0.x & 0x00ff0000) >> 16; uint8_t s0_b3 = (src0.x & 0xff000000) >> 24; uint8_t s1_b0 = (src1.x & 0x000000ff) >> 0; uint8_t s1_b1 = (src1.x & 0x0000ff00) >> 8; uint8_t s1_b2 = (src1.x & 0x00ff0000) >> 16; uint8_t s1_b3 = (src1.x & 0xff000000) >> 24; dst.x = src2.x + (s0_b0 > s1_b0 ? (s0_b0 - s1_b0) : (s1_b0 - s0_b0)) + (s0_b1 > s1_b1 ? (s0_b1 - s1_b1) : (s1_b1 - s0_b1)) + (s0_b2 > s1_b2 ? (s0_b2 - s1_b2) : (s1_b2 - s0_b2)) + (s0_b3 > s1_b3 ? (s0_b3 - s1_b3) : (s1_b3 - s0_b3)); _dst_val[0].u64 = dst.x; break; } default: unreachable("unknown bit width"); } } static void evaluate_sdot_2x16_iadd(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; const int32_t src2 = _src[2][_i].i32; int32_t dst; const int32_t v0x = (int16_t)(src0 ); const int32_t v0y = (int16_t)(src0 >> 16); const int32_t v1x = (int16_t)(src1 ); const int32_t v1y = (int16_t)(src1 >> 16); dst = (v0x * v1x) + (v0y * v1y) + src2; _dst_val[_i].i32 = dst; } } static void evaluate_sdot_2x16_iadd_sat(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; const int32_t src2 = _src[2][_i].i32; int32_t dst; const int64_t v0x = (int16_t)(src0 ); const int64_t v0y = (int16_t)(src0 >> 16); const int64_t v1x = (int16_t)(src1 ); const int64_t v1y = (int16_t)(src1 >> 16); const int64_t tmp = (v0x * v1x) + (v0y * v1y) + src2; dst = tmp >= INT32_MAX ? INT32_MAX : (tmp <= INT32_MIN ? INT32_MIN : tmp); _dst_val[_i].i32 = dst; } } static void evaluate_sdot_4x8_iadd(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; const int32_t src2 = _src[2][_i].i32; int32_t dst; const int32_t v0x = (int8_t)(src0 ); const int32_t v0y = (int8_t)(src0 >> 8); const int32_t v0z = (int8_t)(src0 >> 16); const int32_t v0w = (int8_t)(src0 >> 24); const int32_t v1x = (int8_t)(src1 ); const int32_t v1y = (int8_t)(src1 >> 8); const int32_t v1z = (int8_t)(src1 >> 16); const int32_t v1w = (int8_t)(src1 >> 24); dst = (v0x * v1x) + (v0y * v1y) + (v0z * v1z) + (v0w * v1w) + src2; _dst_val[_i].i32 = dst; } } static void evaluate_sdot_4x8_iadd_sat(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; const int32_t src2 = _src[2][_i].i32; int32_t dst; const int64_t v0x = (int8_t)(src0 ); const int64_t v0y = (int8_t)(src0 >> 8); const int64_t v0z = (int8_t)(src0 >> 16); const int64_t v0w = (int8_t)(src0 >> 24); const int64_t v1x = (int8_t)(src1 ); const int64_t v1y = (int8_t)(src1 >> 8); const int64_t v1z = (int8_t)(src1 >> 16); const int64_t v1w = (int8_t)(src1 >> 24); const int64_t tmp = (v0x * v1x) + (v0y * v1y) + (v0z * v1z) + (v0w * v1w) + src2; dst = tmp >= INT32_MAX ? INT32_MAX : (tmp <= INT32_MIN ? INT32_MIN : tmp); _dst_val[_i].i32 = dst; } } static void evaluate_seq(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); float16_t dst = (src0 == src1) ? 1.0f : 0.0f; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; float32_t dst = (src0 == src1) ? 1.0f : 0.0f; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; float64_t dst = (src0 == src1) ? 1.0f : 0.0f; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_sge(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); float16_t dst = (src0 >= src1) ? 1.0f : 0.0f; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; float32_t dst = (src0 >= src1) ? 1.0f : 0.0f; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; float64_t dst = (src0 >= src1) ? 1.0f : 0.0f; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_slt(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); float16_t dst = (src0 < src1) ? 1.0f : 0.0f; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; float32_t dst = (src0 < src1) ? 1.0f : 0.0f; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; float64_t dst = (src0 < src1) ? 1.0f : 0.0f; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_sne(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const float src0 = _mesa_half_to_float(_src[0][_i].u16); const float src1 = _mesa_half_to_float(_src[1][_i].u16); float16_t dst = (src0 != src1) ? 1.0f : 0.0f; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const float32_t src0 = _src[0][_i].f32; const float32_t src1 = _src[1][_i].f32; float32_t dst = (src0 != src1) ? 1.0f : 0.0f; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const float64_t src0 = _src[0][_i].f64; const float64_t src1 = _src[1][_i].f64; float64_t dst = (src0 != src1) ? 1.0f : 0.0f; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_sudot_4x8_iadd(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; const int32_t src2 = _src[2][_i].i32; int32_t dst; const int32_t v0x = (int8_t)(src0 ); const int32_t v0y = (int8_t)(src0 >> 8); const int32_t v0z = (int8_t)(src0 >> 16); const int32_t v0w = (int8_t)(src0 >> 24); const uint32_t v1x = (uint8_t)(src1 ); const uint32_t v1y = (uint8_t)(src1 >> 8); const uint32_t v1z = (uint8_t)(src1 >> 16); const uint32_t v1w = (uint8_t)(src1 >> 24); dst = (v0x * v1x) + (v0y * v1y) + (v0z * v1z) + (v0w * v1w) + src2; _dst_val[_i].i32 = dst; } } static void evaluate_sudot_4x8_iadd_sat(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; const int32_t src2 = _src[2][_i].i32; int32_t dst; const int64_t v0x = (int8_t)(src0 ); const int64_t v0y = (int8_t)(src0 >> 8); const int64_t v0z = (int8_t)(src0 >> 16); const int64_t v0w = (int8_t)(src0 >> 24); const uint64_t v1x = (uint8_t)(src1 ); const uint64_t v1y = (uint8_t)(src1 >> 8); const uint64_t v1z = (uint8_t)(src1 >> 16); const uint64_t v1w = (uint8_t)(src1 >> 24); const int64_t tmp = (v0x * v1x) + (v0y * v1y) + (v0z * v1z) + (v0w * v1w) + src2; dst = tmp >= INT32_MAX ? INT32_MAX : (tmp <= INT32_MIN ? INT32_MIN : tmp); _dst_val[_i].i32 = dst; } } static void evaluate_u2f16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_u2f32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; float32_t dst = src0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; float32_t dst = src0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; float32_t dst = src0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; float32_t dst = src0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; float32_t dst = src0; _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_u2f64(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; float64_t dst = src0; _dst_val[_i].f64 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 64)) { constant_denorm_flush_to_zero(&_dst_val[_i], 64); } } break; } default: unreachable("unknown bit width"); } } static void evaluate_u2fmp(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; float16_t dst = src0; if (nir_is_rounding_mode_rtz(execution_mode, 16)) { _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); } else { _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); } if (nir_is_denorm_flush_to_zero(execution_mode, 16)) { constant_denorm_flush_to_zero(&_dst_val[_i], 16); } } } static void evaluate_u2u1(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; uint1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; uint1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; uint1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; uint1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; uint1_t dst = src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } default: unreachable("unknown bit width"); } } static void evaluate_u2u16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; uint16_t dst = src0; _dst_val[_i].u16 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; uint16_t dst = src0; _dst_val[_i].u16 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; uint16_t dst = src0; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; uint16_t dst = src0; _dst_val[_i].u16 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; uint16_t dst = src0; _dst_val[_i].u16 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_u2u32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; uint32_t dst = src0; _dst_val[_i].u32 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; uint32_t dst = src0; _dst_val[_i].u32 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; uint32_t dst = src0; _dst_val[_i].u32 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; uint32_t dst = src0; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; uint32_t dst = src0; _dst_val[_i].u32 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_u2u64(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; uint64_t dst = src0; _dst_val[_i].u64 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; uint64_t dst = src0; _dst_val[_i].u64 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; uint64_t dst = src0; _dst_val[_i].u64 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; uint64_t dst = src0; _dst_val[_i].u64 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; uint64_t dst = src0; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_u2u8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; uint8_t dst = src0; _dst_val[_i].u8 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; uint8_t dst = src0; _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; uint8_t dst = src0; _dst_val[_i].u8 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; uint8_t dst = src0; _dst_val[_i].u8 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; uint8_t dst = src0; _dst_val[_i].u8 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_uabs_isub(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { /* 1-bit integers use a 0/-1 convention */ const int1_t src0 = -(int1_t)_src[0][_i].b; /* 1-bit integers use a 0/-1 convention */ const int1_t src1 = -(int1_t)_src[1][_i].b; uint1_t dst = src1 > src0 ? (uint64_t) src1 - (uint64_t) src0 : (uint64_t) src0 - (uint64_t) src1 ; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const int8_t src0 = _src[0][_i].i8; const int8_t src1 = _src[1][_i].i8; uint8_t dst = src1 > src0 ? (uint64_t) src1 - (uint64_t) src0 : (uint64_t) src0 - (uint64_t) src1 ; _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const int16_t src0 = _src[0][_i].i16; const int16_t src1 = _src[1][_i].i16; uint16_t dst = src1 > src0 ? (uint64_t) src1 - (uint64_t) src0 : (uint64_t) src0 - (uint64_t) src1 ; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; uint32_t dst = src1 > src0 ? (uint64_t) src1 - (uint64_t) src0 : (uint64_t) src0 - (uint64_t) src1 ; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const int64_t src0 = _src[0][_i].i64; const int64_t src1 = _src[1][_i].i64; uint64_t dst = src1 > src0 ? (uint64_t) src1 - (uint64_t) src0 : (uint64_t) src0 - (uint64_t) src1 ; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_uabs_usub(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = (src1 > src0) ? (src1 - src0) : (src0 - src1); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = (src1 > src0) ? (src1 - src0) : (src0 - src1); _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = (src1 > src0) ? (src1 - src0) : (src0 - src1); _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = (src1 > src0) ? (src1 - src0) : (src0 - src1); _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = (src1 > src0) ? (src1 - src0) : (src0 - src1); _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_uadd_carry(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = src0 + src1 < src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = src0 + src1 < src0; _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = src0 + src1 < src0; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = src0 + src1 < src0; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = src0 + src1 < src0; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_uadd_sat(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = (src0 + src1) < src0 ? u_uintN_max(sizeof(src0) * 8) : (src0 + src1); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = (src0 + src1) < src0 ? u_uintN_max(sizeof(src0) * 8) : (src0 + src1); _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = (src0 + src1) < src0 ? u_uintN_max(sizeof(src0) * 8) : (src0 + src1); _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = (src0 + src1) < src0 ? u_uintN_max(sizeof(src0) * 8) : (src0 + src1); _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = (src0 + src1) < src0 ? u_uintN_max(sizeof(src0) * 8) : (src0 + src1); _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ubfe(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; const uint32_t src2 = _src[2][_i].u32; uint32_t dst; unsigned base = src0; unsigned offset = src1 & 0x1F; unsigned bits = src2 & 0x1F; if (bits == 0) { dst = 0; } else if (offset + bits < 32) { dst = (base << (32 - bits - offset)) >> (32 - bits); } else { dst = base >> offset; } _dst_val[_i].u32 = dst; } } static void evaluate_ubitfield_extract(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const int32_t src1 = _src[1][_i].i32; const int32_t src2 = _src[2][_i].i32; uint32_t dst; unsigned base = src0; int offset = src1, bits = src2; if (bits == 0) { dst = 0; } else if (bits < 0 || offset < 0 || offset + bits > 32) { dst = 0; /* undefined per the spec */ } else { dst = (base >> offset) & ((1ull << bits) - 1); } _dst_val[_i].u32 = dst; } } static void evaluate_uclz(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; uint32_t dst; int bit; for (bit = bit_size - 1; bit >= 0; bit--) { if ((src0 & (1u << bit)) != 0) break; } dst = (unsigned)(31 - bit); _dst_val[_i].u32 = dst; } } static void evaluate_udiv(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = src1 == 0 ? 0 : (src0 / src1); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = src1 == 0 ? 0 : (src0 / src1); _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = src1 == 0 ? 0 : (src0 / src1); _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = src1 == 0 ? 0 : (src0 / src1); _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = src1 == 0 ? 0 : (src0 / src1); _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_udot_2x16_uadd(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; const uint32_t src2 = _src[2][_i].u32; uint32_t dst; const uint32_t v0x = (uint16_t)(src0 ); const uint32_t v0y = (uint16_t)(src0 >> 16); const uint32_t v1x = (uint16_t)(src1 ); const uint32_t v1y = (uint16_t)(src1 >> 16); dst = (v0x * v1x) + (v0y * v1y) + src2; _dst_val[_i].u32 = dst; } } static void evaluate_udot_2x16_uadd_sat(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; const int32_t src2 = _src[2][_i].i32; int32_t dst; const uint64_t v0x = (uint16_t)(src0 ); const uint64_t v0y = (uint16_t)(src0 >> 16); const uint64_t v1x = (uint16_t)(src1 ); const uint64_t v1y = (uint16_t)(src1 >> 16); const uint64_t tmp = (v0x * v1x) + (v0y * v1y) + src2; dst = tmp >= UINT32_MAX ? UINT32_MAX : tmp; _dst_val[_i].i32 = dst; } } static void evaluate_udot_4x8_uadd(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; const uint32_t src2 = _src[2][_i].u32; uint32_t dst; const uint32_t v0x = (uint8_t)(src0 ); const uint32_t v0y = (uint8_t)(src0 >> 8); const uint32_t v0z = (uint8_t)(src0 >> 16); const uint32_t v0w = (uint8_t)(src0 >> 24); const uint32_t v1x = (uint8_t)(src1 ); const uint32_t v1y = (uint8_t)(src1 >> 8); const uint32_t v1z = (uint8_t)(src1 >> 16); const uint32_t v1w = (uint8_t)(src1 >> 24); dst = (v0x * v1x) + (v0y * v1y) + (v0z * v1z) + (v0w * v1w) + src2; _dst_val[_i].u32 = dst; } } static void evaluate_udot_4x8_uadd_sat(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; const int32_t src2 = _src[2][_i].i32; int32_t dst; const uint64_t v0x = (uint8_t)(src0 ); const uint64_t v0y = (uint8_t)(src0 >> 8); const uint64_t v0z = (uint8_t)(src0 >> 16); const uint64_t v0w = (uint8_t)(src0 >> 24); const uint64_t v1x = (uint8_t)(src1 ); const uint64_t v1y = (uint8_t)(src1 >> 8); const uint64_t v1z = (uint8_t)(src1 >> 16); const uint64_t v1w = (uint8_t)(src1 >> 24); const uint64_t tmp = (v0x * v1x) + (v0y * v1y) + (v0z * v1z) + (v0w * v1w) + src2; dst = tmp >= UINT32_MAX ? UINT32_MAX : tmp; _dst_val[_i].i32 = dst; } } static void evaluate_ufind_msb(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; int32_t dst; dst = -1; for (int bit = bit_size - 1; bit >= 0; bit--) { if ((src0 >> bit) & 1) { dst = bit; break; } } _dst_val[_i].i32 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; int32_t dst; dst = -1; for (int bit = bit_size - 1; bit >= 0; bit--) { if ((src0 >> bit) & 1) { dst = bit; break; } } _dst_val[_i].i32 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; int32_t dst; dst = -1; for (int bit = bit_size - 1; bit >= 0; bit--) { if ((src0 >> bit) & 1) { dst = bit; break; } } _dst_val[_i].i32 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; int32_t dst; dst = -1; for (int bit = bit_size - 1; bit >= 0; bit--) { if ((src0 >> bit) & 1) { dst = bit; break; } } _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; int32_t dst; dst = -1; for (int bit = bit_size - 1; bit >= 0; bit--) { if ((src0 >> bit) & 1) { dst = bit; break; } } _dst_val[_i].i32 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ufind_msb_rev(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; int32_t dst; dst = -1; for (int bit = 0; bit < bit_size; bit++) { if ((src0 << bit) & 0x80000000) { dst = bit; break; } } _dst_val[_i].i32 = dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; int32_t dst; dst = -1; for (int bit = 0; bit < bit_size; bit++) { if ((src0 << bit) & 0x80000000) { dst = bit; break; } } _dst_val[_i].i32 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; int32_t dst; dst = -1; for (int bit = 0; bit < bit_size; bit++) { if ((src0 << bit) & 0x80000000) { dst = bit; break; } } _dst_val[_i].i32 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; int32_t dst; dst = -1; for (int bit = 0; bit < bit_size; bit++) { if ((src0 << bit) & 0x80000000) { dst = bit; break; } } _dst_val[_i].i32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; int32_t dst; dst = -1; for (int bit = 0; bit < bit_size; bit++) { if ((src0 << bit) & 0x80000000) { dst = bit; break; } } _dst_val[_i].i32 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_uge(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; bool1_t dst = src0 >= src1; _dst_val[_i].b = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; bool1_t dst = src0 >= src1; _dst_val[_i].b = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; bool1_t dst = src0 >= src1; _dst_val[_i].b = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; bool1_t dst = src0 >= src1; _dst_val[_i].b = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; bool1_t dst = src0 >= src1; _dst_val[_i].b = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_uge16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; bool16_t dst = src0 >= src1; _dst_val[_i].i16 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; bool16_t dst = src0 >= src1; _dst_val[_i].i16 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; bool16_t dst = src0 >= src1; _dst_val[_i].i16 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; bool16_t dst = src0 >= src1; _dst_val[_i].i16 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; bool16_t dst = src0 >= src1; _dst_val[_i].i16 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_uge32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; bool32_t dst = src0 >= src1; _dst_val[_i].i32 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; bool32_t dst = src0 >= src1; _dst_val[_i].i32 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; bool32_t dst = src0 >= src1; _dst_val[_i].i32 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; bool32_t dst = src0 >= src1; _dst_val[_i].i32 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; bool32_t dst = src0 >= src1; _dst_val[_i].i32 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_uge8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; bool8_t dst = src0 >= src1; _dst_val[_i].i8 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; bool8_t dst = src0 >= src1; _dst_val[_i].i8 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; bool8_t dst = src0 >= src1; _dst_val[_i].i8 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; bool8_t dst = src0 >= src1; _dst_val[_i].i8 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; bool8_t dst = src0 >= src1; _dst_val[_i].i8 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_uhadd(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ult(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; bool1_t dst = src0 < src1; _dst_val[_i].b = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; bool1_t dst = src0 < src1; _dst_val[_i].b = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; bool1_t dst = src0 < src1; _dst_val[_i].b = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; bool1_t dst = src0 < src1; _dst_val[_i].b = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; bool1_t dst = src0 < src1; _dst_val[_i].b = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ult16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; bool16_t dst = src0 < src1; _dst_val[_i].i16 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; bool16_t dst = src0 < src1; _dst_val[_i].i16 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; bool16_t dst = src0 < src1; _dst_val[_i].i16 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; bool16_t dst = src0 < src1; _dst_val[_i].i16 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; bool16_t dst = src0 < src1; _dst_val[_i].i16 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ult32(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; bool32_t dst = src0 < src1; _dst_val[_i].i32 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; bool32_t dst = src0 < src1; _dst_val[_i].i32 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; bool32_t dst = src0 < src1; _dst_val[_i].i32 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; bool32_t dst = src0 < src1; _dst_val[_i].i32 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; bool32_t dst = src0 < src1; _dst_val[_i].i32 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ult8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; bool8_t dst = src0 < src1; _dst_val[_i].i8 = -(int)dst; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; bool8_t dst = src0 < src1; _dst_val[_i].i8 = -(int)dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; bool8_t dst = src0 < src1; _dst_val[_i].i8 = -(int)dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; bool8_t dst = src0 < src1; _dst_val[_i].i8 = -(int)dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; bool8_t dst = src0 < src1; _dst_val[_i].i8 = -(int)dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_umad24(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; const uint32_t src2 = _src[2][_i].u32; uint32_t dst = (((uint32_t)src0 << 8) >> 8) * (((uint32_t)src1 << 8) >> 8) + src2; _dst_val[_i].u32 = dst; } } static void evaluate_umad24_relaxed(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; const uint32_t src2 = _src[2][_i].u32; uint32_t dst = src0 * src1 + src2; _dst_val[_i].u32 = dst; } } static void evaluate_umax(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = src1 > src0 ? src1 : src0; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = src1 > src0 ? src1 : src0; _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = src1 > src0 ? src1 : src0; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = src1 > src0 ? src1 : src0; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = src1 > src0 ? src1 : src0; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_umax_4x8_vc4(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst; dst = 0; for (int i = 0; i < 32; i += 8) { dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; } _dst_val[_i].i32 = dst; } } static void evaluate_umin(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = src1 > src0 ? src0 : src1; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = src1 > src0 ? src0 : src1; _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = src1 > src0 ? src0 : src1; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = src1 > src0 ? src0 : src1; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = src1 > src0 ? src0 : src1; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_umin_4x8_vc4(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst; dst = 0; for (int i = 0; i < 32; i += 8) { dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; } _dst_val[_i].i32 = dst; } } static void evaluate_umod(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = src1 == 0 ? 0 : src0 % src1; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = src1 == 0 ? 0 : src0 % src1; _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = src1 == 0 ? 0 : src0 % src1; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = src1 == 0 ? 0 : src0 % src1; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = src1 == 0 ? 0 : src0 % src1; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_umul24(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst = (((uint32_t)src0 << 8) >> 8) * (((uint32_t)src1 << 8) >> 8); _dst_val[_i].i32 = dst; } } static void evaluate_umul24_relaxed(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = src0 * src1; _dst_val[_i].u32 = dst; } } static void evaluate_umul_2x32_64(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint64_t dst = (uint64_t)src0 * (uint64_t)src1; _dst_val[_i].u64 = dst; } } static void evaluate_umul_32x16(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = src0 * (uint16_t) src1; _dst_val[_i].u32 = dst; } } static void evaluate_umul_high(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst; if (bit_size == 64) { /* The casts are kind-of annoying but needed to prevent compiler warnings. */ uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 }; uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 }; uint32_t prod_u32[4]; ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); } else { dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size; } /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst; if (bit_size == 64) { /* The casts are kind-of annoying but needed to prevent compiler warnings. */ uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 }; uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 }; uint32_t prod_u32[4]; ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); } else { dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size; } _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst; if (bit_size == 64) { /* The casts are kind-of annoying but needed to prevent compiler warnings. */ uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 }; uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 }; uint32_t prod_u32[4]; ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); } else { dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size; } _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst; if (bit_size == 64) { /* The casts are kind-of annoying but needed to prevent compiler warnings. */ uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 }; uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 }; uint32_t prod_u32[4]; ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); } else { dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size; } _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst; if (bit_size == 64) { /* The casts are kind-of annoying but needed to prevent compiler warnings. */ uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 }; uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 }; uint32_t prod_u32[4]; ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); } else { dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size; } _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_umul_low(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst; uint64_t mask = (1 << (bit_size / 2)) - 1; dst = ((uint64_t)src0 & mask) * ((uint64_t)src1 & mask); _dst_val[_i].u32 = dst; } } static void evaluate_umul_unorm_4x8_vc4(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst; dst = 0; for (int i = 0; i < 32; i += 8) { int src0_chan = (src0 >> i) & 0xff; int src1_chan = (src1 >> i) & 0xff; dst |= ((src0_chan * src1_chan) / 255) << i; } _dst_val[_i].i32 = dst; } } static void evaluate_unpack_32_2x16(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint32_vec src0 = { _src[0][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint16_vec dst; dst.x = src0.x; dst.y = src0.x >> 16; _dst_val[0].u16 = dst.x; _dst_val[1].u16 = dst.y; } static void evaluate_unpack_32_2x16_split_x(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; uint16_t dst = src0; _dst_val[_i].u16 = dst; } } static void evaluate_unpack_32_2x16_split_y(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; uint16_t dst = src0 >> 16; _dst_val[_i].u16 = dst; } } static void evaluate_unpack_32_4x8(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint32_vec src0 = { _src[0][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint8_vec dst; dst.x = src0.x; dst.y = src0.x >> 8; dst.z = src0.x >> 16; dst.w = src0.x >> 24; _dst_val[0].u8 = dst.x; _dst_val[1].u8 = dst.y; _dst_val[2].u8 = dst.z; _dst_val[3].u8 = dst.w; } static void evaluate_unpack_64_2x32(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint64_vec src0 = { _src[0][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = src0.x; dst.y = src0.x >> 32; _dst_val[0].u32 = dst.x; _dst_val[1].u32 = dst.y; } static void evaluate_unpack_64_2x32_split_x(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; uint32_t dst = src0; _dst_val[_i].u32 = dst; } } static void evaluate_unpack_64_2x32_split_y(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; uint32_t dst = src0 >> 32; _dst_val[_i].u32 = dst; } } static void evaluate_unpack_64_4x16(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint64_vec src0 = { _src[0][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint16_vec dst; dst.x = src0.x; dst.y = src0.x >> 16; dst.z = src0.x >> 32; dst.w = src0.w >> 48; _dst_val[0].u16 = dst.x; _dst_val[1].u16 = dst.y; _dst_val[2].u16 = dst.z; _dst_val[3].u16 = dst.w; } static void evaluate_unpack_double_2x32_dxil(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint64_vec src0 = { _src[0][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = src0.x; dst.y = src0.x >> 32; _dst_val[0].u32 = dst.x; _dst_val[1].u32 = dst.y; } static void evaluate_unpack_half_2x16(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint32_vec src0 = { _src[0][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = unpack_half_1x16((uint16_t)(src0.x & 0xffff)); dst.y = unpack_half_1x16((uint16_t)(src0.x << 16)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } _dst_val[1].f32 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[1], 32); } } static void evaluate_unpack_half_2x16_flush_to_zero(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint32_vec src0 = { _src[0][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = unpack_half_1x16_flush_to_zero((uint16_t)(src0.x & 0xffff)); dst.y = unpack_half_1x16_flush_to_zero((uint16_t)(src0.x << 16)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } _dst_val[1].f32 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[1], 32); } } static void evaluate_unpack_half_2x16_split_x(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; float32_t dst = unpack_half_1x16((uint16_t)(src0 & 0xffff)); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } } static void evaluate_unpack_half_2x16_split_x_flush_to_zero(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; float32_t dst = unpack_half_1x16_flush_to_zero((uint16_t)(src0 & 0xffff)); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } } static void evaluate_unpack_half_2x16_split_y(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; float32_t dst = unpack_half_1x16((uint16_t)(src0 >> 16)); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } } static void evaluate_unpack_half_2x16_split_y_flush_to_zero(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; float32_t dst = unpack_half_1x16_flush_to_zero((uint16_t)(src0 >> 16)); _dst_val[_i].f32 = dst; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[_i], 32); } } } static void evaluate_unpack_snorm_2x16(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint32_vec src0 = { _src[0][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = unpack_snorm_1x16((uint16_t)(src0.x & 0xffff)); dst.y = unpack_snorm_1x16((uint16_t)(src0.x << 16)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } _dst_val[1].f32 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[1], 32); } } static void evaluate_unpack_snorm_4x8(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint32_vec src0 = { _src[0][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = unpack_snorm_1x8((uint8_t)(src0.x & 0xff)); dst.y = unpack_snorm_1x8((uint8_t)((src0.x >> 8) & 0xff)); dst.z = unpack_snorm_1x8((uint8_t)((src0.x >> 16) & 0xff)); dst.w = unpack_snorm_1x8((uint8_t)(src0.x >> 24)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } _dst_val[1].f32 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[1], 32); } _dst_val[2].f32 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[2], 32); } _dst_val[3].f32 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[3], 32); } } static void evaluate_unpack_unorm_2x16(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint32_vec src0 = { _src[0][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = unpack_unorm_1x16((uint16_t)(src0.x & 0xffff)); dst.y = unpack_unorm_1x16((uint16_t)(src0.x << 16)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } _dst_val[1].f32 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[1], 32); } } static void evaluate_unpack_unorm_4x8(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { const struct uint32_vec src0 = { _src[0][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct float32_vec dst; dst.x = unpack_unorm_1x8((uint8_t)(src0.x & 0xff)); dst.y = unpack_unorm_1x8((uint8_t)((src0.x >> 8) & 0xff)); dst.z = unpack_unorm_1x8((uint8_t)((src0.x >> 16) & 0xff)); dst.w = unpack_unorm_1x8((uint8_t)(src0.x >> 24)); _dst_val[0].f32 = dst.x; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[0], 32); } _dst_val[1].f32 = dst.y; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[1], 32); } _dst_val[2].f32 = dst.z; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[2], 32); } _dst_val[3].f32 = dst.w; if (nir_is_denorm_flush_to_zero(execution_mode, 32)) { constant_denorm_flush_to_zero(&_dst_val[3], 32); } } static void evaluate_urhadd(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = (src0 | src1) - ((src0 ^ src1) >> 1); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = (src0 | src1) - ((src0 ^ src1) >> 1); _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = (src0 | src1) - ((src0 ^ src1) >> 1); _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = (src0 | src1) - ((src0 ^ src1) >> 1); _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = (src0 | src1) - ((src0 ^ src1) >> 1); _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_urol(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint32_t src1 = _src[1][_i].u32; uint1_t dst; uint32_t rotate_mask = sizeof(src0) * 8 - 1; dst = (src0 << (src1 & rotate_mask)) | (src0 >> (-src1 & rotate_mask)); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint32_t src1 = _src[1][_i].u32; uint8_t dst; uint32_t rotate_mask = sizeof(src0) * 8 - 1; dst = (src0 << (src1 & rotate_mask)) | (src0 >> (-src1 & rotate_mask)); _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint32_t src1 = _src[1][_i].u32; uint16_t dst; uint32_t rotate_mask = sizeof(src0) * 8 - 1; dst = (src0 << (src1 & rotate_mask)) | (src0 >> (-src1 & rotate_mask)); _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst; uint32_t rotate_mask = sizeof(src0) * 8 - 1; dst = (src0 << (src1 & rotate_mask)) | (src0 >> (-src1 & rotate_mask)); _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint32_t src1 = _src[1][_i].u32; uint64_t dst; uint32_t rotate_mask = sizeof(src0) * 8 - 1; dst = (src0 << (src1 & rotate_mask)) | (src0 >> (-src1 & rotate_mask)); _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_uror(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint32_t src1 = _src[1][_i].u32; uint1_t dst; uint32_t rotate_mask = sizeof(src0) * 8 - 1; dst = (src0 >> (src1 & rotate_mask)) | (src0 << (-src1 & rotate_mask)); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint32_t src1 = _src[1][_i].u32; uint8_t dst; uint32_t rotate_mask = sizeof(src0) * 8 - 1; dst = (src0 >> (src1 & rotate_mask)) | (src0 << (-src1 & rotate_mask)); _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint32_t src1 = _src[1][_i].u32; uint16_t dst; uint32_t rotate_mask = sizeof(src0) * 8 - 1; dst = (src0 >> (src1 & rotate_mask)) | (src0 << (-src1 & rotate_mask)); _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst; uint32_t rotate_mask = sizeof(src0) * 8 - 1; dst = (src0 >> (src1 & rotate_mask)) | (src0 << (-src1 & rotate_mask)); _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint32_t src1 = _src[1][_i].u32; uint64_t dst; uint32_t rotate_mask = sizeof(src0) * 8 - 1; dst = (src0 >> (src1 & rotate_mask)) | (src0 << (-src1 & rotate_mask)); _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_usadd_4x8_vc4(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst; dst = 0; for (int i = 0; i < 32; i += 8) { dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i; } _dst_val[_i].i32 = dst; } } static void evaluate_ushr(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint32_t src1 = _src[1][_i].u32; uint1_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint32_t src1 = _src[1][_i].u32; uint8_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint32_t src1 = _src[1][_i].u32; uint16_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint32_t src1 = _src[1][_i].u32; uint64_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_ussub_4x8_vc4(nir_const_value *_dst_val, UNUSED unsigned num_components, UNUSED unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { for (unsigned _i = 0; _i < num_components; _i++) { const int32_t src0 = _src[0][_i].i32; const int32_t src1 = _src[1][_i].i32; int32_t dst; dst = 0; for (int i = 0; i < 32; i += 8) { int src0_chan = (src0 >> i) & 0xff; int src1_chan = (src1 >> i) & 0xff; if (src0_chan > src1_chan) dst |= (src0_chan - src1_chan) << i; } _dst_val[_i].i32 = dst; } } static void evaluate_usub_borrow(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = src0 < src1; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = src0 < src1; _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = src0 < src1; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = src0 < src1; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = src0 < src1; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_usub_sat(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { for (unsigned _i = 0; _i < num_components; _i++) { const uint1_t src0 = _src[0][_i].b; const uint1_t src1 = _src[1][_i].b; uint1_t dst = src0 < src1 ? 0 : src0 - src1; /* 1-bit integers get truncated */ _dst_val[_i].b = dst & 1; } break; } case 8: { for (unsigned _i = 0; _i < num_components; _i++) { const uint8_t src0 = _src[0][_i].u8; const uint8_t src1 = _src[1][_i].u8; uint8_t dst = src0 < src1 ? 0 : src0 - src1; _dst_val[_i].u8 = dst; } break; } case 16: { for (unsigned _i = 0; _i < num_components; _i++) { const uint16_t src0 = _src[0][_i].u16; const uint16_t src1 = _src[1][_i].u16; uint16_t dst = src0 < src1 ? 0 : src0 - src1; _dst_val[_i].u16 = dst; } break; } case 32: { for (unsigned _i = 0; _i < num_components; _i++) { const uint32_t src0 = _src[0][_i].u32; const uint32_t src1 = _src[1][_i].u32; uint32_t dst = src0 < src1 ? 0 : src0 - src1; _dst_val[_i].u32 = dst; } break; } case 64: { for (unsigned _i = 0; _i < num_components; _i++) { const uint64_t src0 = _src[0][_i].u64; const uint64_t src1 = _src[1][_i].u64; uint64_t dst = src0 < src1 ? 0 : src0 - src1; _dst_val[_i].u64 = dst; } break; } default: unreachable("unknown bit width"); } } static void evaluate_vec16(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct uint1_vec src0 = { _src[0][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src1 = { _src[1][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src2 = { _src[2][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src3 = { _src[3][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src4 = { _src[4][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src5 = { _src[5][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src6 = { _src[6][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src7 = { _src[7][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src8 = { _src[8][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src9 = { _src[9][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src10 = { _src[10][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src11 = { _src[11][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src12 = { _src[12][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src13 = { _src[13][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src14 = { _src[14][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src15 = { _src[15][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint1_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; dst.e = src4.x; dst.f = src5.x; dst.g = src6.x; dst.h = src7.x; dst.i = src8.x; dst.j = src9.x; dst.k = src10.x; dst.l = src11.x; dst.m = src12.x; dst.n = src13.x; dst.o = src14.x; dst.p = src15.x; /* 1-bit integers get truncated */ _dst_val[0].b = dst.x & 1; /* 1-bit integers get truncated */ _dst_val[1].b = dst.y & 1; /* 1-bit integers get truncated */ _dst_val[2].b = dst.z & 1; /* 1-bit integers get truncated */ _dst_val[3].b = dst.w & 1; /* 1-bit integers get truncated */ _dst_val[4].b = dst.e & 1; /* 1-bit integers get truncated */ _dst_val[5].b = dst.f & 1; /* 1-bit integers get truncated */ _dst_val[6].b = dst.g & 1; /* 1-bit integers get truncated */ _dst_val[7].b = dst.h & 1; /* 1-bit integers get truncated */ _dst_val[8].b = dst.i & 1; /* 1-bit integers get truncated */ _dst_val[9].b = dst.j & 1; /* 1-bit integers get truncated */ _dst_val[10].b = dst.k & 1; /* 1-bit integers get truncated */ _dst_val[11].b = dst.l & 1; /* 1-bit integers get truncated */ _dst_val[12].b = dst.m & 1; /* 1-bit integers get truncated */ _dst_val[13].b = dst.n & 1; /* 1-bit integers get truncated */ _dst_val[14].b = dst.o & 1; /* 1-bit integers get truncated */ _dst_val[15].b = dst.p & 1; break; } case 8: { const struct uint8_vec src0 = { _src[0][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src1 = { _src[1][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src2 = { _src[2][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src3 = { _src[3][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src4 = { _src[4][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src5 = { _src[5][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src6 = { _src[6][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src7 = { _src[7][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src8 = { _src[8][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src9 = { _src[9][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src10 = { _src[10][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src11 = { _src[11][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src12 = { _src[12][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src13 = { _src[13][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src14 = { _src[14][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src15 = { _src[15][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint8_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; dst.e = src4.x; dst.f = src5.x; dst.g = src6.x; dst.h = src7.x; dst.i = src8.x; dst.j = src9.x; dst.k = src10.x; dst.l = src11.x; dst.m = src12.x; dst.n = src13.x; dst.o = src14.x; dst.p = src15.x; _dst_val[0].u8 = dst.x; _dst_val[1].u8 = dst.y; _dst_val[2].u8 = dst.z; _dst_val[3].u8 = dst.w; _dst_val[4].u8 = dst.e; _dst_val[5].u8 = dst.f; _dst_val[6].u8 = dst.g; _dst_val[7].u8 = dst.h; _dst_val[8].u8 = dst.i; _dst_val[9].u8 = dst.j; _dst_val[10].u8 = dst.k; _dst_val[11].u8 = dst.l; _dst_val[12].u8 = dst.m; _dst_val[13].u8 = dst.n; _dst_val[14].u8 = dst.o; _dst_val[15].u8 = dst.p; break; } case 16: { const struct uint16_vec src0 = { _src[0][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src1 = { _src[1][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src2 = { _src[2][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src3 = { _src[3][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src4 = { _src[4][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src5 = { _src[5][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src6 = { _src[6][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src7 = { _src[7][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src8 = { _src[8][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src9 = { _src[9][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src10 = { _src[10][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src11 = { _src[11][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src12 = { _src[12][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src13 = { _src[13][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src14 = { _src[14][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src15 = { _src[15][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint16_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; dst.e = src4.x; dst.f = src5.x; dst.g = src6.x; dst.h = src7.x; dst.i = src8.x; dst.j = src9.x; dst.k = src10.x; dst.l = src11.x; dst.m = src12.x; dst.n = src13.x; dst.o = src14.x; dst.p = src15.x; _dst_val[0].u16 = dst.x; _dst_val[1].u16 = dst.y; _dst_val[2].u16 = dst.z; _dst_val[3].u16 = dst.w; _dst_val[4].u16 = dst.e; _dst_val[5].u16 = dst.f; _dst_val[6].u16 = dst.g; _dst_val[7].u16 = dst.h; _dst_val[8].u16 = dst.i; _dst_val[9].u16 = dst.j; _dst_val[10].u16 = dst.k; _dst_val[11].u16 = dst.l; _dst_val[12].u16 = dst.m; _dst_val[13].u16 = dst.n; _dst_val[14].u16 = dst.o; _dst_val[15].u16 = dst.p; break; } case 32: { const struct uint32_vec src0 = { _src[0][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src1 = { _src[1][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src2 = { _src[2][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src3 = { _src[3][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src4 = { _src[4][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src5 = { _src[5][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src6 = { _src[6][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src7 = { _src[7][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src8 = { _src[8][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src9 = { _src[9][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src10 = { _src[10][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src11 = { _src[11][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src12 = { _src[12][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src13 = { _src[13][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src14 = { _src[14][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src15 = { _src[15][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; dst.e = src4.x; dst.f = src5.x; dst.g = src6.x; dst.h = src7.x; dst.i = src8.x; dst.j = src9.x; dst.k = src10.x; dst.l = src11.x; dst.m = src12.x; dst.n = src13.x; dst.o = src14.x; dst.p = src15.x; _dst_val[0].u32 = dst.x; _dst_val[1].u32 = dst.y; _dst_val[2].u32 = dst.z; _dst_val[3].u32 = dst.w; _dst_val[4].u32 = dst.e; _dst_val[5].u32 = dst.f; _dst_val[6].u32 = dst.g; _dst_val[7].u32 = dst.h; _dst_val[8].u32 = dst.i; _dst_val[9].u32 = dst.j; _dst_val[10].u32 = dst.k; _dst_val[11].u32 = dst.l; _dst_val[12].u32 = dst.m; _dst_val[13].u32 = dst.n; _dst_val[14].u32 = dst.o; _dst_val[15].u32 = dst.p; break; } case 64: { const struct uint64_vec src0 = { _src[0][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src1 = { _src[1][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src2 = { _src[2][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src3 = { _src[3][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src4 = { _src[4][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src5 = { _src[5][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src6 = { _src[6][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src7 = { _src[7][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src8 = { _src[8][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src9 = { _src[9][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src10 = { _src[10][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src11 = { _src[11][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src12 = { _src[12][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src13 = { _src[13][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src14 = { _src[14][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src15 = { _src[15][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint64_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; dst.e = src4.x; dst.f = src5.x; dst.g = src6.x; dst.h = src7.x; dst.i = src8.x; dst.j = src9.x; dst.k = src10.x; dst.l = src11.x; dst.m = src12.x; dst.n = src13.x; dst.o = src14.x; dst.p = src15.x; _dst_val[0].u64 = dst.x; _dst_val[1].u64 = dst.y; _dst_val[2].u64 = dst.z; _dst_val[3].u64 = dst.w; _dst_val[4].u64 = dst.e; _dst_val[5].u64 = dst.f; _dst_val[6].u64 = dst.g; _dst_val[7].u64 = dst.h; _dst_val[8].u64 = dst.i; _dst_val[9].u64 = dst.j; _dst_val[10].u64 = dst.k; _dst_val[11].u64 = dst.l; _dst_val[12].u64 = dst.m; _dst_val[13].u64 = dst.n; _dst_val[14].u64 = dst.o; _dst_val[15].u64 = dst.p; break; } default: unreachable("unknown bit width"); } } static void evaluate_vec2(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct uint1_vec src0 = { _src[0][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src1 = { _src[1][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint1_vec dst; dst.x = src0.x; dst.y = src1.x; /* 1-bit integers get truncated */ _dst_val[0].b = dst.x & 1; /* 1-bit integers get truncated */ _dst_val[1].b = dst.y & 1; break; } case 8: { const struct uint8_vec src0 = { _src[0][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src1 = { _src[1][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint8_vec dst; dst.x = src0.x; dst.y = src1.x; _dst_val[0].u8 = dst.x; _dst_val[1].u8 = dst.y; break; } case 16: { const struct uint16_vec src0 = { _src[0][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src1 = { _src[1][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint16_vec dst; dst.x = src0.x; dst.y = src1.x; _dst_val[0].u16 = dst.x; _dst_val[1].u16 = dst.y; break; } case 32: { const struct uint32_vec src0 = { _src[0][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src1 = { _src[1][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = src0.x; dst.y = src1.x; _dst_val[0].u32 = dst.x; _dst_val[1].u32 = dst.y; break; } case 64: { const struct uint64_vec src0 = { _src[0][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src1 = { _src[1][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint64_vec dst; dst.x = src0.x; dst.y = src1.x; _dst_val[0].u64 = dst.x; _dst_val[1].u64 = dst.y; break; } default: unreachable("unknown bit width"); } } static void evaluate_vec3(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct uint1_vec src0 = { _src[0][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src1 = { _src[1][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src2 = { _src[2][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint1_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; /* 1-bit integers get truncated */ _dst_val[0].b = dst.x & 1; /* 1-bit integers get truncated */ _dst_val[1].b = dst.y & 1; /* 1-bit integers get truncated */ _dst_val[2].b = dst.z & 1; break; } case 8: { const struct uint8_vec src0 = { _src[0][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src1 = { _src[1][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src2 = { _src[2][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint8_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; _dst_val[0].u8 = dst.x; _dst_val[1].u8 = dst.y; _dst_val[2].u8 = dst.z; break; } case 16: { const struct uint16_vec src0 = { _src[0][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src1 = { _src[1][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src2 = { _src[2][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint16_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; _dst_val[0].u16 = dst.x; _dst_val[1].u16 = dst.y; _dst_val[2].u16 = dst.z; break; } case 32: { const struct uint32_vec src0 = { _src[0][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src1 = { _src[1][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src2 = { _src[2][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; _dst_val[0].u32 = dst.x; _dst_val[1].u32 = dst.y; _dst_val[2].u32 = dst.z; break; } case 64: { const struct uint64_vec src0 = { _src[0][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src1 = { _src[1][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src2 = { _src[2][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint64_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; _dst_val[0].u64 = dst.x; _dst_val[1].u64 = dst.y; _dst_val[2].u64 = dst.z; break; } default: unreachable("unknown bit width"); } } static void evaluate_vec4(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct uint1_vec src0 = { _src[0][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src1 = { _src[1][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src2 = { _src[2][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src3 = { _src[3][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint1_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; /* 1-bit integers get truncated */ _dst_val[0].b = dst.x & 1; /* 1-bit integers get truncated */ _dst_val[1].b = dst.y & 1; /* 1-bit integers get truncated */ _dst_val[2].b = dst.z & 1; /* 1-bit integers get truncated */ _dst_val[3].b = dst.w & 1; break; } case 8: { const struct uint8_vec src0 = { _src[0][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src1 = { _src[1][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src2 = { _src[2][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src3 = { _src[3][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint8_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; _dst_val[0].u8 = dst.x; _dst_val[1].u8 = dst.y; _dst_val[2].u8 = dst.z; _dst_val[3].u8 = dst.w; break; } case 16: { const struct uint16_vec src0 = { _src[0][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src1 = { _src[1][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src2 = { _src[2][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src3 = { _src[3][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint16_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; _dst_val[0].u16 = dst.x; _dst_val[1].u16 = dst.y; _dst_val[2].u16 = dst.z; _dst_val[3].u16 = dst.w; break; } case 32: { const struct uint32_vec src0 = { _src[0][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src1 = { _src[1][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src2 = { _src[2][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src3 = { _src[3][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; _dst_val[0].u32 = dst.x; _dst_val[1].u32 = dst.y; _dst_val[2].u32 = dst.z; _dst_val[3].u32 = dst.w; break; } case 64: { const struct uint64_vec src0 = { _src[0][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src1 = { _src[1][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src2 = { _src[2][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src3 = { _src[3][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint64_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; _dst_val[0].u64 = dst.x; _dst_val[1].u64 = dst.y; _dst_val[2].u64 = dst.z; _dst_val[3].u64 = dst.w; break; } default: unreachable("unknown bit width"); } } static void evaluate_vec5(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct uint1_vec src0 = { _src[0][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src1 = { _src[1][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src2 = { _src[2][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src3 = { _src[3][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src4 = { _src[4][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint1_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; dst.e = src4.x; /* 1-bit integers get truncated */ _dst_val[0].b = dst.x & 1; /* 1-bit integers get truncated */ _dst_val[1].b = dst.y & 1; /* 1-bit integers get truncated */ _dst_val[2].b = dst.z & 1; /* 1-bit integers get truncated */ _dst_val[3].b = dst.w & 1; /* 1-bit integers get truncated */ _dst_val[4].b = dst.e & 1; break; } case 8: { const struct uint8_vec src0 = { _src[0][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src1 = { _src[1][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src2 = { _src[2][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src3 = { _src[3][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src4 = { _src[4][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint8_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; dst.e = src4.x; _dst_val[0].u8 = dst.x; _dst_val[1].u8 = dst.y; _dst_val[2].u8 = dst.z; _dst_val[3].u8 = dst.w; _dst_val[4].u8 = dst.e; break; } case 16: { const struct uint16_vec src0 = { _src[0][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src1 = { _src[1][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src2 = { _src[2][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src3 = { _src[3][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src4 = { _src[4][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint16_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; dst.e = src4.x; _dst_val[0].u16 = dst.x; _dst_val[1].u16 = dst.y; _dst_val[2].u16 = dst.z; _dst_val[3].u16 = dst.w; _dst_val[4].u16 = dst.e; break; } case 32: { const struct uint32_vec src0 = { _src[0][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src1 = { _src[1][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src2 = { _src[2][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src3 = { _src[3][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src4 = { _src[4][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; dst.e = src4.x; _dst_val[0].u32 = dst.x; _dst_val[1].u32 = dst.y; _dst_val[2].u32 = dst.z; _dst_val[3].u32 = dst.w; _dst_val[4].u32 = dst.e; break; } case 64: { const struct uint64_vec src0 = { _src[0][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src1 = { _src[1][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src2 = { _src[2][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src3 = { _src[3][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src4 = { _src[4][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint64_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; dst.e = src4.x; _dst_val[0].u64 = dst.x; _dst_val[1].u64 = dst.y; _dst_val[2].u64 = dst.z; _dst_val[3].u64 = dst.w; _dst_val[4].u64 = dst.e; break; } default: unreachable("unknown bit width"); } } static void evaluate_vec8(nir_const_value *_dst_val, UNUSED unsigned num_components, unsigned bit_size, UNUSED nir_const_value **_src, UNUSED unsigned execution_mode) { switch (bit_size) { case 1: { const struct uint1_vec src0 = { _src[0][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src1 = { _src[1][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src2 = { _src[2][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src3 = { _src[3][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src4 = { _src[4][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src5 = { _src[5][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src6 = { _src[6][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint1_vec src7 = { _src[7][0].b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint1_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; dst.e = src4.x; dst.f = src5.x; dst.g = src6.x; dst.h = src7.x; /* 1-bit integers get truncated */ _dst_val[0].b = dst.x & 1; /* 1-bit integers get truncated */ _dst_val[1].b = dst.y & 1; /* 1-bit integers get truncated */ _dst_val[2].b = dst.z & 1; /* 1-bit integers get truncated */ _dst_val[3].b = dst.w & 1; /* 1-bit integers get truncated */ _dst_val[4].b = dst.e & 1; /* 1-bit integers get truncated */ _dst_val[5].b = dst.f & 1; /* 1-bit integers get truncated */ _dst_val[6].b = dst.g & 1; /* 1-bit integers get truncated */ _dst_val[7].b = dst.h & 1; break; } case 8: { const struct uint8_vec src0 = { _src[0][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src1 = { _src[1][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src2 = { _src[2][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src3 = { _src[3][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src4 = { _src[4][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src5 = { _src[5][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src6 = { _src[6][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint8_vec src7 = { _src[7][0].u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint8_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; dst.e = src4.x; dst.f = src5.x; dst.g = src6.x; dst.h = src7.x; _dst_val[0].u8 = dst.x; _dst_val[1].u8 = dst.y; _dst_val[2].u8 = dst.z; _dst_val[3].u8 = dst.w; _dst_val[4].u8 = dst.e; _dst_val[5].u8 = dst.f; _dst_val[6].u8 = dst.g; _dst_val[7].u8 = dst.h; break; } case 16: { const struct uint16_vec src0 = { _src[0][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src1 = { _src[1][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src2 = { _src[2][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src3 = { _src[3][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src4 = { _src[4][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src5 = { _src[5][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src6 = { _src[6][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint16_vec src7 = { _src[7][0].u16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint16_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; dst.e = src4.x; dst.f = src5.x; dst.g = src6.x; dst.h = src7.x; _dst_val[0].u16 = dst.x; _dst_val[1].u16 = dst.y; _dst_val[2].u16 = dst.z; _dst_val[3].u16 = dst.w; _dst_val[4].u16 = dst.e; _dst_val[5].u16 = dst.f; _dst_val[6].u16 = dst.g; _dst_val[7].u16 = dst.h; break; } case 32: { const struct uint32_vec src0 = { _src[0][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src1 = { _src[1][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src2 = { _src[2][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src3 = { _src[3][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src4 = { _src[4][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src5 = { _src[5][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src6 = { _src[6][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint32_vec src7 = { _src[7][0].u32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint32_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; dst.e = src4.x; dst.f = src5.x; dst.g = src6.x; dst.h = src7.x; _dst_val[0].u32 = dst.x; _dst_val[1].u32 = dst.y; _dst_val[2].u32 = dst.z; _dst_val[3].u32 = dst.w; _dst_val[4].u32 = dst.e; _dst_val[5].u32 = dst.f; _dst_val[6].u32 = dst.g; _dst_val[7].u32 = dst.h; break; } case 64: { const struct uint64_vec src0 = { _src[0][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src1 = { _src[1][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src2 = { _src[2][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src3 = { _src[3][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src4 = { _src[4][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src5 = { _src[5][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src6 = { _src[6][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; const struct uint64_vec src7 = { _src[7][0].u64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; struct uint64_vec dst; dst.x = src0.x; dst.y = src1.x; dst.z = src2.x; dst.w = src3.x; dst.e = src4.x; dst.f = src5.x; dst.g = src6.x; dst.h = src7.x; _dst_val[0].u64 = dst.x; _dst_val[1].u64 = dst.y; _dst_val[2].u64 = dst.z; _dst_val[3].u64 = dst.w; _dst_val[4].u64 = dst.e; _dst_val[5].u64 = dst.f; _dst_val[6].u64 = dst.g; _dst_val[7].u64 = dst.h; break; } default: unreachable("unknown bit width"); } } void nir_eval_const_opcode(nir_op op, nir_const_value *dest, unsigned num_components, unsigned bit_width, nir_const_value **src, unsigned float_controls_execution_mode) { switch (op) { case nir_op_amul: evaluate_amul(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16all_fequal16: evaluate_b16all_fequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16all_fequal2: evaluate_b16all_fequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16all_fequal3: evaluate_b16all_fequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16all_fequal4: evaluate_b16all_fequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16all_fequal5: evaluate_b16all_fequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16all_fequal8: evaluate_b16all_fequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16all_iequal16: evaluate_b16all_iequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16all_iequal2: evaluate_b16all_iequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16all_iequal3: evaluate_b16all_iequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16all_iequal4: evaluate_b16all_iequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16all_iequal5: evaluate_b16all_iequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16all_iequal8: evaluate_b16all_iequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16any_fnequal16: evaluate_b16any_fnequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16any_fnequal2: evaluate_b16any_fnequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16any_fnequal3: evaluate_b16any_fnequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16any_fnequal4: evaluate_b16any_fnequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16any_fnequal5: evaluate_b16any_fnequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16any_fnequal8: evaluate_b16any_fnequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16any_inequal16: evaluate_b16any_inequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16any_inequal2: evaluate_b16any_inequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16any_inequal3: evaluate_b16any_inequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16any_inequal4: evaluate_b16any_inequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16any_inequal5: evaluate_b16any_inequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16any_inequal8: evaluate_b16any_inequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b16csel: evaluate_b16csel(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b2b1: evaluate_b2b1(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b2b16: evaluate_b2b16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b2b32: evaluate_b2b32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b2b8: evaluate_b2b8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b2f16: evaluate_b2f16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b2f32: evaluate_b2f32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b2f64: evaluate_b2f64(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b2i1: evaluate_b2i1(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b2i16: evaluate_b2i16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b2i32: evaluate_b2i32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b2i64: evaluate_b2i64(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b2i8: evaluate_b2i8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32all_fequal16: evaluate_b32all_fequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32all_fequal2: evaluate_b32all_fequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32all_fequal3: evaluate_b32all_fequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32all_fequal4: evaluate_b32all_fequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32all_fequal5: evaluate_b32all_fequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32all_fequal8: evaluate_b32all_fequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32all_iequal16: evaluate_b32all_iequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32all_iequal2: evaluate_b32all_iequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32all_iequal3: evaluate_b32all_iequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32all_iequal4: evaluate_b32all_iequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32all_iequal5: evaluate_b32all_iequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32all_iequal8: evaluate_b32all_iequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32any_fnequal16: evaluate_b32any_fnequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32any_fnequal2: evaluate_b32any_fnequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32any_fnequal3: evaluate_b32any_fnequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32any_fnequal4: evaluate_b32any_fnequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32any_fnequal5: evaluate_b32any_fnequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32any_fnequal8: evaluate_b32any_fnequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32any_inequal16: evaluate_b32any_inequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32any_inequal2: evaluate_b32any_inequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32any_inequal3: evaluate_b32any_inequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32any_inequal4: evaluate_b32any_inequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32any_inequal5: evaluate_b32any_inequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32any_inequal8: evaluate_b32any_inequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b32csel: evaluate_b32csel(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8all_fequal16: evaluate_b8all_fequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8all_fequal2: evaluate_b8all_fequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8all_fequal3: evaluate_b8all_fequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8all_fequal4: evaluate_b8all_fequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8all_fequal5: evaluate_b8all_fequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8all_fequal8: evaluate_b8all_fequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8all_iequal16: evaluate_b8all_iequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8all_iequal2: evaluate_b8all_iequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8all_iequal3: evaluate_b8all_iequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8all_iequal4: evaluate_b8all_iequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8all_iequal5: evaluate_b8all_iequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8all_iequal8: evaluate_b8all_iequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8any_fnequal16: evaluate_b8any_fnequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8any_fnequal2: evaluate_b8any_fnequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8any_fnequal3: evaluate_b8any_fnequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8any_fnequal4: evaluate_b8any_fnequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8any_fnequal5: evaluate_b8any_fnequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8any_fnequal8: evaluate_b8any_fnequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8any_inequal16: evaluate_b8any_inequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8any_inequal2: evaluate_b8any_inequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8any_inequal3: evaluate_b8any_inequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8any_inequal4: evaluate_b8any_inequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8any_inequal5: evaluate_b8any_inequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8any_inequal8: evaluate_b8any_inequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_b8csel: evaluate_b8csel(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ball_fequal16: evaluate_ball_fequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ball_fequal2: evaluate_ball_fequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ball_fequal3: evaluate_ball_fequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ball_fequal4: evaluate_ball_fequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ball_fequal5: evaluate_ball_fequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ball_fequal8: evaluate_ball_fequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ball_iequal16: evaluate_ball_iequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ball_iequal2: evaluate_ball_iequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ball_iequal3: evaluate_ball_iequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ball_iequal4: evaluate_ball_iequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ball_iequal5: evaluate_ball_iequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ball_iequal8: evaluate_ball_iequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bany_fnequal16: evaluate_bany_fnequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bany_fnequal2: evaluate_bany_fnequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bany_fnequal3: evaluate_bany_fnequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bany_fnequal4: evaluate_bany_fnequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bany_fnequal5: evaluate_bany_fnequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bany_fnequal8: evaluate_bany_fnequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bany_inequal16: evaluate_bany_inequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bany_inequal2: evaluate_bany_inequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bany_inequal3: evaluate_bany_inequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bany_inequal4: evaluate_bany_inequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bany_inequal5: evaluate_bany_inequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bany_inequal8: evaluate_bany_inequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bcsel: evaluate_bcsel(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bfi: evaluate_bfi(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bfm: evaluate_bfm(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bit_count: evaluate_bit_count(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bitfield_insert: evaluate_bitfield_insert(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bitfield_reverse: evaluate_bitfield_reverse(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_bitfield_select: evaluate_bitfield_select(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_cube_face_coord_amd: evaluate_cube_face_coord_amd(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_cube_face_index_amd: evaluate_cube_face_index_amd(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_cube_r600: evaluate_cube_r600(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_extract_i16: evaluate_extract_i16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_extract_i8: evaluate_extract_i8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_extract_u16: evaluate_extract_u16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_extract_u8: evaluate_extract_u8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2b1: evaluate_f2b1(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2b16: evaluate_f2b16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2b32: evaluate_f2b32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2b8: evaluate_f2b8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2f16: evaluate_f2f16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2f16_rtne: evaluate_f2f16_rtne(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2f16_rtz: evaluate_f2f16_rtz(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2f32: evaluate_f2f32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2f64: evaluate_f2f64(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2fmp: evaluate_f2fmp(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2i1: evaluate_f2i1(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2i16: evaluate_f2i16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2i32: evaluate_f2i32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2i64: evaluate_f2i64(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2i8: evaluate_f2i8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2imp: evaluate_f2imp(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2u1: evaluate_f2u1(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2u16: evaluate_f2u16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2u32: evaluate_f2u32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2u64: evaluate_f2u64(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2u8: evaluate_f2u8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_f2ump: evaluate_f2ump(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fabs: evaluate_fabs(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fadd: evaluate_fadd(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fall_equal16: evaluate_fall_equal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fall_equal2: evaluate_fall_equal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fall_equal3: evaluate_fall_equal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fall_equal4: evaluate_fall_equal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fall_equal5: evaluate_fall_equal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fall_equal8: evaluate_fall_equal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fany_nequal16: evaluate_fany_nequal16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fany_nequal2: evaluate_fany_nequal2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fany_nequal3: evaluate_fany_nequal3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fany_nequal4: evaluate_fany_nequal4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fany_nequal5: evaluate_fany_nequal5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fany_nequal8: evaluate_fany_nequal8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fceil: evaluate_fceil(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fclamp_pos_mali: evaluate_fclamp_pos_mali(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fcos: evaluate_fcos(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fcos_r600: evaluate_fcos_r600(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fcsel: evaluate_fcsel(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fcsel_ge: evaluate_fcsel_ge(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fcsel_gt: evaluate_fcsel_gt(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fddx: evaluate_fddx(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fddx_coarse: evaluate_fddx_coarse(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fddx_fine: evaluate_fddx_fine(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fddx_must_abs_mali: evaluate_fddx_must_abs_mali(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fddy: evaluate_fddy(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fddy_coarse: evaluate_fddy_coarse(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fddy_fine: evaluate_fddy_fine(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fddy_must_abs_mali: evaluate_fddy_must_abs_mali(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fdiv: evaluate_fdiv(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fdot16: evaluate_fdot16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fdot16_replicated: evaluate_fdot16_replicated(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fdot2: evaluate_fdot2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fdot2_replicated: evaluate_fdot2_replicated(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fdot3: evaluate_fdot3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fdot3_replicated: evaluate_fdot3_replicated(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fdot4: evaluate_fdot4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fdot4_replicated: evaluate_fdot4_replicated(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fdot5: evaluate_fdot5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fdot5_replicated: evaluate_fdot5_replicated(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fdot8: evaluate_fdot8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fdot8_replicated: evaluate_fdot8_replicated(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fdph: evaluate_fdph(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fdph_replicated: evaluate_fdph_replicated(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_feq: evaluate_feq(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_feq16: evaluate_feq16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_feq32: evaluate_feq32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_feq8: evaluate_feq8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fexp2: evaluate_fexp2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ffloor: evaluate_ffloor(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ffma: evaluate_ffma(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ffract: evaluate_ffract(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fge: evaluate_fge(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fge16: evaluate_fge16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fge32: evaluate_fge32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fge8: evaluate_fge8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_find_lsb: evaluate_find_lsb(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fisfinite: evaluate_fisfinite(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fisfinite32: evaluate_fisfinite32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fisnormal: evaluate_fisnormal(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_flog2: evaluate_flog2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_flrp: evaluate_flrp(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_flt: evaluate_flt(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_flt16: evaluate_flt16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_flt32: evaluate_flt32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_flt8: evaluate_flt8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fmax: evaluate_fmax(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fmin: evaluate_fmin(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fmod: evaluate_fmod(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fmul: evaluate_fmul(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fneg: evaluate_fneg(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fneu: evaluate_fneu(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fneu16: evaluate_fneu16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fneu32: evaluate_fneu32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fneu8: evaluate_fneu8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fpow: evaluate_fpow(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fquantize2f16: evaluate_fquantize2f16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_frcp: evaluate_frcp(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_frem: evaluate_frem(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_frexp_exp: evaluate_frexp_exp(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_frexp_sig: evaluate_frexp_sig(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fround_even: evaluate_fround_even(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_frsq: evaluate_frsq(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fsat: evaluate_fsat(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fsat_signed_mali: evaluate_fsat_signed_mali(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fsign: evaluate_fsign(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fsin: evaluate_fsin(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fsin_agx: evaluate_fsin_agx(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fsin_r600: evaluate_fsin_r600(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fsqrt: evaluate_fsqrt(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fsub: evaluate_fsub(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fsum2: evaluate_fsum2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fsum3: evaluate_fsum3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_fsum4: evaluate_fsum4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ftrunc: evaluate_ftrunc(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_i2b1: evaluate_i2b1(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_i2b16: evaluate_i2b16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_i2b32: evaluate_i2b32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_i2b8: evaluate_i2b8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_i2f16: evaluate_i2f16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_i2f32: evaluate_i2f32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_i2f64: evaluate_i2f64(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_i2fmp: evaluate_i2fmp(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_i2i1: evaluate_i2i1(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_i2i16: evaluate_i2i16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_i2i32: evaluate_i2i32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_i2i64: evaluate_i2i64(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_i2i8: evaluate_i2i8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_i2imp: evaluate_i2imp(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_i32csel_ge: evaluate_i32csel_ge(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_i32csel_gt: evaluate_i32csel_gt(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_iabs: evaluate_iabs(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_iadd: evaluate_iadd(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_iadd3: evaluate_iadd3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_iadd_sat: evaluate_iadd_sat(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_iand: evaluate_iand(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ibfe: evaluate_ibfe(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ibitfield_extract: evaluate_ibitfield_extract(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_idiv: evaluate_idiv(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ieq: evaluate_ieq(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ieq16: evaluate_ieq16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ieq32: evaluate_ieq32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ieq8: evaluate_ieq8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ifind_msb: evaluate_ifind_msb(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ifind_msb_rev: evaluate_ifind_msb_rev(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ige: evaluate_ige(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ige16: evaluate_ige16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ige32: evaluate_ige32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ige8: evaluate_ige8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ihadd: evaluate_ihadd(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ilt: evaluate_ilt(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ilt16: evaluate_ilt16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ilt32: evaluate_ilt32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ilt8: evaluate_ilt8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_imad24_ir3: evaluate_imad24_ir3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_imadsh_mix16: evaluate_imadsh_mix16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_imax: evaluate_imax(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_imin: evaluate_imin(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_imod: evaluate_imod(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_imul: evaluate_imul(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_imul24: evaluate_imul24(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_imul24_relaxed: evaluate_imul24_relaxed(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_imul_2x32_64: evaluate_imul_2x32_64(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_imul_32x16: evaluate_imul_32x16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_imul_high: evaluate_imul_high(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ine: evaluate_ine(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ine16: evaluate_ine16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ine32: evaluate_ine32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ine8: evaluate_ine8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ineg: evaluate_ineg(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_inot: evaluate_inot(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_insert_u16: evaluate_insert_u16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_insert_u8: evaluate_insert_u8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ior: evaluate_ior(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_irem: evaluate_irem(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_irhadd: evaluate_irhadd(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ishl: evaluate_ishl(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ishr: evaluate_ishr(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_isign: evaluate_isign(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_isub: evaluate_isub(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_isub_sat: evaluate_isub_sat(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ixor: evaluate_ixor(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ldexp: evaluate_ldexp(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_mov: evaluate_mov(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_pack_32_2x16: evaluate_pack_32_2x16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_pack_32_2x16_split: evaluate_pack_32_2x16_split(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_pack_32_4x8: evaluate_pack_32_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_pack_32_4x8_split: evaluate_pack_32_4x8_split(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_pack_64_2x32: evaluate_pack_64_2x32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_pack_64_2x32_split: evaluate_pack_64_2x32_split(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_pack_64_4x16: evaluate_pack_64_4x16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_pack_double_2x32_dxil: evaluate_pack_double_2x32_dxil(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_pack_half_2x16: evaluate_pack_half_2x16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_pack_half_2x16_split: evaluate_pack_half_2x16_split(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_pack_snorm_2x16: evaluate_pack_snorm_2x16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_pack_snorm_4x8: evaluate_pack_snorm_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_pack_unorm_2x16: evaluate_pack_unorm_2x16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_pack_unorm_4x8: evaluate_pack_unorm_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_pack_uvec2_to_uint: evaluate_pack_uvec2_to_uint(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_pack_uvec4_to_uint: evaluate_pack_uvec4_to_uint(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_sad_u8x4: evaluate_sad_u8x4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_sdot_2x16_iadd: evaluate_sdot_2x16_iadd(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_sdot_2x16_iadd_sat: evaluate_sdot_2x16_iadd_sat(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_sdot_4x8_iadd: evaluate_sdot_4x8_iadd(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_sdot_4x8_iadd_sat: evaluate_sdot_4x8_iadd_sat(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_seq: evaluate_seq(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_sge: evaluate_sge(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_slt: evaluate_slt(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_sne: evaluate_sne(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_sudot_4x8_iadd: evaluate_sudot_4x8_iadd(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_sudot_4x8_iadd_sat: evaluate_sudot_4x8_iadd_sat(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_u2f16: evaluate_u2f16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_u2f32: evaluate_u2f32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_u2f64: evaluate_u2f64(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_u2fmp: evaluate_u2fmp(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_u2u1: evaluate_u2u1(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_u2u16: evaluate_u2u16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_u2u32: evaluate_u2u32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_u2u64: evaluate_u2u64(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_u2u8: evaluate_u2u8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_uabs_isub: evaluate_uabs_isub(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_uabs_usub: evaluate_uabs_usub(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_uadd_carry: evaluate_uadd_carry(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_uadd_sat: evaluate_uadd_sat(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ubfe: evaluate_ubfe(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ubitfield_extract: evaluate_ubitfield_extract(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_uclz: evaluate_uclz(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_udiv: evaluate_udiv(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_udot_2x16_uadd: evaluate_udot_2x16_uadd(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_udot_2x16_uadd_sat: evaluate_udot_2x16_uadd_sat(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_udot_4x8_uadd: evaluate_udot_4x8_uadd(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_udot_4x8_uadd_sat: evaluate_udot_4x8_uadd_sat(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ufind_msb: evaluate_ufind_msb(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ufind_msb_rev: evaluate_ufind_msb_rev(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_uge: evaluate_uge(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_uge16: evaluate_uge16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_uge32: evaluate_uge32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_uge8: evaluate_uge8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_uhadd: evaluate_uhadd(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ult: evaluate_ult(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ult16: evaluate_ult16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ult32: evaluate_ult32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ult8: evaluate_ult8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_umad24: evaluate_umad24(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_umad24_relaxed: evaluate_umad24_relaxed(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_umax: evaluate_umax(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_umax_4x8_vc4: evaluate_umax_4x8_vc4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_umin: evaluate_umin(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_umin_4x8_vc4: evaluate_umin_4x8_vc4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_umod: evaluate_umod(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_umul24: evaluate_umul24(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_umul24_relaxed: evaluate_umul24_relaxed(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_umul_2x32_64: evaluate_umul_2x32_64(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_umul_32x16: evaluate_umul_32x16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_umul_high: evaluate_umul_high(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_umul_low: evaluate_umul_low(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_umul_unorm_4x8_vc4: evaluate_umul_unorm_4x8_vc4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_32_2x16: evaluate_unpack_32_2x16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_32_2x16_split_x: evaluate_unpack_32_2x16_split_x(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_32_2x16_split_y: evaluate_unpack_32_2x16_split_y(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_32_4x8: evaluate_unpack_32_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_64_2x32: evaluate_unpack_64_2x32(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_64_2x32_split_x: evaluate_unpack_64_2x32_split_x(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_64_2x32_split_y: evaluate_unpack_64_2x32_split_y(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_64_4x16: evaluate_unpack_64_4x16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_double_2x32_dxil: evaluate_unpack_double_2x32_dxil(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_half_2x16: evaluate_unpack_half_2x16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_half_2x16_flush_to_zero: evaluate_unpack_half_2x16_flush_to_zero(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_half_2x16_split_x: evaluate_unpack_half_2x16_split_x(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_half_2x16_split_x_flush_to_zero: evaluate_unpack_half_2x16_split_x_flush_to_zero(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_half_2x16_split_y: evaluate_unpack_half_2x16_split_y(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_half_2x16_split_y_flush_to_zero: evaluate_unpack_half_2x16_split_y_flush_to_zero(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_snorm_2x16: evaluate_unpack_snorm_2x16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_snorm_4x8: evaluate_unpack_snorm_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_unorm_2x16: evaluate_unpack_unorm_2x16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_unpack_unorm_4x8: evaluate_unpack_unorm_4x8(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_urhadd: evaluate_urhadd(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_urol: evaluate_urol(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_uror: evaluate_uror(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_usadd_4x8_vc4: evaluate_usadd_4x8_vc4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ushr: evaluate_ushr(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_ussub_4x8_vc4: evaluate_ussub_4x8_vc4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_usub_borrow: evaluate_usub_borrow(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_usub_sat: evaluate_usub_sat(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_vec16: evaluate_vec16(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_vec2: evaluate_vec2(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_vec3: evaluate_vec3(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_vec4: evaluate_vec4(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_vec5: evaluate_vec5(dest, num_components, bit_width, src, float_controls_execution_mode); return; case nir_op_vec8: evaluate_vec8(dest, num_components, bit_width, src, float_controls_execution_mode); return; default: unreachable("shouldn't get here"); } }