/* * Copyright © 2019 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #ifndef GEN_MI_BUILDER_H #define GEN_MI_BUILDER_H #include "util/bitscan.h" #include "util/fast_idiv_by_const.h" #include "util/u_math.h" #ifndef GEN_MI_BUILDER_NUM_ALLOC_GPRS /** The number of GPRs the MI builder is allowed to allocate * * This may be set by a user of this API so that it can reserve some GPRs at * the top end for its own use. */ #define GEN_MI_BUILDER_NUM_ALLOC_GPRS 16 #endif /** These must be defined by the user of the builder * * void *__gen_get_batch_dwords(__gen_user_data *user_data, * unsigned num_dwords); * * __gen_address_type * __gen_address_offset(__gen_address_type addr, uint64_t offset); * */ /* * Start of the actual MI builder */ #define __genxml_cmd_length(cmd) cmd ## _length #define __genxml_cmd_header(cmd) cmd ## _header #define __genxml_cmd_pack(cmd) cmd ## _pack #define gen_mi_builder_pack(b, cmd, dst, name) \ for (struct cmd name = { __genxml_cmd_header(cmd) }, \ *_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \ __genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name), \ _dst = NULL) #define gen_mi_builder_emit(b, cmd, name) \ gen_mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name) enum gen_mi_value_type { GEN_MI_VALUE_TYPE_IMM, GEN_MI_VALUE_TYPE_MEM32, GEN_MI_VALUE_TYPE_MEM64, GEN_MI_VALUE_TYPE_REG32, GEN_MI_VALUE_TYPE_REG64, }; struct gen_mi_value { enum gen_mi_value_type type; union { uint64_t imm; __gen_address_type addr; uint32_t reg; }; #if GEN_GEN >= 7 || GEN_IS_HASWELL bool invert; #endif }; #if GEN_GEN >= 9 #define GEN_MI_BUILDER_MAX_MATH_DWORDS 256 #else #define GEN_MI_BUILDER_MAX_MATH_DWORDS 64 #endif struct gen_mi_builder { __gen_user_data *user_data; #if GEN_GEN >= 8 || GEN_IS_HASWELL uint32_t gprs; uint8_t gpr_refs[GEN_MI_BUILDER_NUM_ALLOC_GPRS]; unsigned num_math_dwords; uint32_t math_dwords[GEN_MI_BUILDER_MAX_MATH_DWORDS]; #endif }; static inline void gen_mi_builder_init(struct gen_mi_builder *b, __gen_user_data *user_data) { memset(b, 0, sizeof(*b)); b->user_data = user_data; #if GEN_GEN >= 8 || GEN_IS_HASWELL b->gprs = 0; b->num_math_dwords = 0; #endif } static inline void gen_mi_builder_flush_math(struct gen_mi_builder *b) { #if GEN_GEN >= 8 || GEN_IS_HASWELL if (b->num_math_dwords == 0) return; uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data, 1 + b->num_math_dwords); gen_mi_builder_pack(b, GENX(MI_MATH), dw, math) { math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias); } memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t)); b->num_math_dwords = 0; #endif } #define _GEN_MI_BUILDER_GPR_BASE 0x2600 /* The actual hardware limit on GPRs */ #define _GEN_MI_BUILDER_NUM_HW_GPRS 16 #if GEN_GEN >= 8 || GEN_IS_HASWELL static inline bool gen_mi_value_is_gpr(struct gen_mi_value val) { return (val.type == GEN_MI_VALUE_TYPE_REG32 || val.type == GEN_MI_VALUE_TYPE_REG64) && val.reg >= _GEN_MI_BUILDER_GPR_BASE && val.reg < _GEN_MI_BUILDER_GPR_BASE + _GEN_MI_BUILDER_NUM_HW_GPRS * 8; } static inline bool _gen_mi_value_is_allocated_gpr(struct gen_mi_value val) { return (val.type == GEN_MI_VALUE_TYPE_REG32 || val.type == GEN_MI_VALUE_TYPE_REG64) && val.reg >= _GEN_MI_BUILDER_GPR_BASE && val.reg < _GEN_MI_BUILDER_GPR_BASE + GEN_MI_BUILDER_NUM_ALLOC_GPRS * 8; } static inline uint32_t _gen_mi_value_as_gpr(struct gen_mi_value val) { assert(gen_mi_value_is_gpr(val)); assert(val.reg % 8 == 0); return (val.reg - _GEN_MI_BUILDER_GPR_BASE) / 8; } static inline struct gen_mi_value gen_mi_new_gpr(struct gen_mi_builder *b) { unsigned gpr = ffs(~b->gprs) - 1; assert(gpr < GEN_MI_BUILDER_NUM_ALLOC_GPRS); assert(b->gpr_refs[gpr] == 0); b->gprs |= (1u << gpr); b->gpr_refs[gpr] = 1; return (struct gen_mi_value) { .type = GEN_MI_VALUE_TYPE_REG64, .reg = _GEN_MI_BUILDER_GPR_BASE + gpr * 8, }; } #endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */ /** Take a reference to a gen_mi_value * * The MI builder uses reference counting to automatically free ALU GPRs for * re-use in calculations. All gen_mi_* math functions consume the reference * they are handed for each source and return a reference to a value which the * caller must consume. In particular, if you pas the same value into a * single gen_mi_* math function twice (say to add a number to itself), you * are responsible for calling gen_mi_value_ref() to get a second reference * because the gen_mi_* math function will consume it twice. */ static inline struct gen_mi_value gen_mi_value_ref(struct gen_mi_builder *b, struct gen_mi_value val) { #if GEN_GEN >= 8 || GEN_IS_HASWELL if (_gen_mi_value_is_allocated_gpr(val)) { unsigned gpr = _gen_mi_value_as_gpr(val); assert(gpr < GEN_MI_BUILDER_NUM_ALLOC_GPRS); assert(b->gprs & (1u << gpr)); assert(b->gpr_refs[gpr] < UINT8_MAX); b->gpr_refs[gpr]++; } #endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */ return val; } /** Drop a reference to a gen_mi_value * * See also gen_mi_value_ref. */ static inline void gen_mi_value_unref(struct gen_mi_builder *b, struct gen_mi_value val) { #if GEN_GEN >= 8 || GEN_IS_HASWELL if (_gen_mi_value_is_allocated_gpr(val)) { unsigned gpr = _gen_mi_value_as_gpr(val); assert(gpr < GEN_MI_BUILDER_NUM_ALLOC_GPRS); assert(b->gprs & (1u << gpr)); assert(b->gpr_refs[gpr] > 0); if (--b->gpr_refs[gpr] == 0) b->gprs &= ~(1u << gpr); } #endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */ } static inline struct gen_mi_value gen_mi_imm(uint64_t imm) { return (struct gen_mi_value) { .type = GEN_MI_VALUE_TYPE_IMM, .imm = imm, }; } static inline struct gen_mi_value gen_mi_reg32(uint32_t reg) { struct gen_mi_value val = { .type = GEN_MI_VALUE_TYPE_REG32, .reg = reg, }; #if GEN_GEN >= 8 || GEN_IS_HASWELL assert(!_gen_mi_value_is_allocated_gpr(val)); #endif return val; } static inline struct gen_mi_value gen_mi_reg64(uint32_t reg) { struct gen_mi_value val = { .type = GEN_MI_VALUE_TYPE_REG64, .reg = reg, }; #if GEN_GEN >= 8 || GEN_IS_HASWELL assert(!_gen_mi_value_is_allocated_gpr(val)); #endif return val; } static inline struct gen_mi_value gen_mi_mem32(__gen_address_type addr) { return (struct gen_mi_value) { .type = GEN_MI_VALUE_TYPE_MEM32, .addr = addr, }; } static inline struct gen_mi_value gen_mi_mem64(__gen_address_type addr) { return (struct gen_mi_value) { .type = GEN_MI_VALUE_TYPE_MEM64, .addr = addr, }; } static inline struct gen_mi_value gen_mi_value_half(struct gen_mi_value value, bool top_32_bits) { switch (value.type) { case GEN_MI_VALUE_TYPE_IMM: if (top_32_bits) value.imm >>= 32; else value.imm &= 0xffffffffu; return value; case GEN_MI_VALUE_TYPE_MEM32: assert(!top_32_bits); return value; case GEN_MI_VALUE_TYPE_MEM64: if (top_32_bits) value.addr = __gen_address_offset(value.addr, 4); value.type = GEN_MI_VALUE_TYPE_MEM32; return value; case GEN_MI_VALUE_TYPE_REG32: assert(!top_32_bits); return value; case GEN_MI_VALUE_TYPE_REG64: if (top_32_bits) value.reg += 4; value.type = GEN_MI_VALUE_TYPE_REG32; return value; } unreachable("Invalid gen_mi_value type"); } static inline void _gen_mi_copy_no_unref(struct gen_mi_builder *b, struct gen_mi_value dst, struct gen_mi_value src) { #if GEN_GEN >= 7 || GEN_IS_HASWELL /* TODO: We could handle src.invert by emitting a bit of math if we really * wanted to. */ assert(!dst.invert && !src.invert); #endif gen_mi_builder_flush_math(b); switch (dst.type) { case GEN_MI_VALUE_TYPE_IMM: unreachable("Cannot copy to an immediate"); case GEN_MI_VALUE_TYPE_MEM64: case GEN_MI_VALUE_TYPE_REG64: /* If the destination is 64 bits, we have to copy in two halves */ _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, false), gen_mi_value_half(src, false)); switch (src.type) { case GEN_MI_VALUE_TYPE_IMM: case GEN_MI_VALUE_TYPE_MEM64: case GEN_MI_VALUE_TYPE_REG64: /* TODO: Use MI_STORE_DATA_IMM::StoreQWord when we have it */ _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, true), gen_mi_value_half(src, true)); break; default: _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, true), gen_mi_imm(0)); break; } break; case GEN_MI_VALUE_TYPE_MEM32: switch (src.type) { case GEN_MI_VALUE_TYPE_IMM: gen_mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) { sdi.Address = dst.addr; sdi.ImmediateData = src.imm; } break; case GEN_MI_VALUE_TYPE_MEM32: case GEN_MI_VALUE_TYPE_MEM64: #if GEN_GEN >= 8 gen_mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) { cmm.DestinationMemoryAddress = dst.addr; cmm.SourceMemoryAddress = src.addr; } #elif GEN_IS_HASWELL { struct gen_mi_value tmp = gen_mi_new_gpr(b); _gen_mi_copy_no_unref(b, tmp, src); _gen_mi_copy_no_unref(b, dst, tmp); gen_mi_value_unref(b, tmp); } #else unreachable("Cannot do mem <-> mem copy on IVB and earlier"); #endif break; case GEN_MI_VALUE_TYPE_REG32: case GEN_MI_VALUE_TYPE_REG64: gen_mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) { srm.RegisterAddress = src.reg; srm.MemoryAddress = dst.addr; } break; default: unreachable("Invalid gen_mi_value type"); } break; case GEN_MI_VALUE_TYPE_REG32: switch (src.type) { case GEN_MI_VALUE_TYPE_IMM: gen_mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) { lri.RegisterOffset = dst.reg; lri.DataDWord = src.imm; } break; case GEN_MI_VALUE_TYPE_MEM32: case GEN_MI_VALUE_TYPE_MEM64: gen_mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) { lrm.RegisterAddress = dst.reg; lrm.MemoryAddress = src.addr; } break; case GEN_MI_VALUE_TYPE_REG32: case GEN_MI_VALUE_TYPE_REG64: #if GEN_GEN >= 8 || GEN_IS_HASWELL gen_mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) { lrr.SourceRegisterAddress = src.reg; lrr.DestinationRegisterAddress = dst.reg; } #else unreachable("Cannot do reg <-> reg copy on IVB and earlier"); #endif break; default: unreachable("Invalid gen_mi_value type"); } break; default: unreachable("Invalid gen_mi_value type"); } } /** Store the value in src to the value represented by dst * * If the bit size of src and dst mismatch, this function does an unsigned * integer cast. If src has more bits than dst, it takes the bottom bits. If * src has fewer bits then dst, it fills the top bits with zeros. * * This function consumes one reference for each of src and dst. */ static inline void gen_mi_store(struct gen_mi_builder *b, struct gen_mi_value dst, struct gen_mi_value src) { _gen_mi_copy_no_unref(b, dst, src); gen_mi_value_unref(b, src); gen_mi_value_unref(b, dst); } static inline void gen_mi_memset(struct gen_mi_builder *b, __gen_address_type dst, uint32_t value, uint32_t size) { #if GEN_GEN >= 8 || GEN_IS_HASWELL assert(b->num_math_dwords == 0); #endif /* This memset operates in units of dwords. */ assert(size % 4 == 0); for (uint32_t i = 0; i < size; i += 4) { gen_mi_store(b, gen_mi_mem32(__gen_address_offset(dst, i)), gen_mi_imm(value)); } } /* NOTE: On IVB, this function stomps GEN7_3DPRIM_BASE_VERTEX */ static inline void gen_mi_memcpy(struct gen_mi_builder *b, __gen_address_type dst, __gen_address_type src, uint32_t size) { #if GEN_GEN >= 8 || GEN_IS_HASWELL assert(b->num_math_dwords == 0); #endif /* This memcpy operates in units of dwords. */ assert(size % 4 == 0); for (uint32_t i = 0; i < size; i += 4) { struct gen_mi_value dst_val = gen_mi_mem32(__gen_address_offset(dst, i)); struct gen_mi_value src_val = gen_mi_mem32(__gen_address_offset(src, i)); #if GEN_GEN >= 8 || GEN_IS_HASWELL gen_mi_store(b, dst_val, src_val); #else /* IVB does not have a general purpose register for command streamer * commands. Therefore, we use an alternate temporary register. */ struct gen_mi_value tmp_reg = gen_mi_reg32(0x2440); /* GEN7_3DPRIM_BASE_VERTEX */ gen_mi_store(b, tmp_reg, src_val); gen_mi_store(b, dst_val, tmp_reg); #endif } } /* * MI_MATH Section. Only available on Haswell+ */ #if GEN_GEN >= 8 || GEN_IS_HASWELL static inline void _gen_mi_builder_push_math(struct gen_mi_builder *b, const uint32_t *dwords, unsigned num_dwords) { assert(num_dwords < GEN_MI_BUILDER_MAX_MATH_DWORDS); if (b->num_math_dwords + num_dwords > GEN_MI_BUILDER_MAX_MATH_DWORDS) gen_mi_builder_flush_math(b); memcpy(&b->math_dwords[b->num_math_dwords], dwords, num_dwords * sizeof(*dwords)); b->num_math_dwords += num_dwords; } static inline uint32_t _gen_mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2) { struct GENX(MI_MATH_ALU_INSTRUCTION) instr = { .Operand2 = operand2, .Operand1 = operand1, .ALUOpcode = opcode, }; uint32_t dw; GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr); return dw; } static inline struct gen_mi_value gen_mi_value_to_gpr(struct gen_mi_builder *b, struct gen_mi_value val) { if (gen_mi_value_is_gpr(val)) return val; /* Save off the invert flag because it makes copy() grumpy */ bool invert = val.invert; val.invert = false; struct gen_mi_value tmp = gen_mi_new_gpr(b); _gen_mi_copy_no_unref(b, tmp, val); tmp.invert = invert; return tmp; } static inline uint32_t _gen_mi_math_load_src(struct gen_mi_builder *b, unsigned src, struct gen_mi_value *val) { if (val->type == GEN_MI_VALUE_TYPE_IMM && (val->imm == 0 || val->imm == UINT64_MAX)) { uint64_t imm = val->invert ? ~val->imm : val->imm; return _gen_mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0); } else { *val = gen_mi_value_to_gpr(b, *val); return _gen_mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD, src, _gen_mi_value_as_gpr(*val)); } } static inline struct gen_mi_value gen_mi_math_binop(struct gen_mi_builder *b, uint32_t opcode, struct gen_mi_value src0, struct gen_mi_value src1, uint32_t store_op, uint32_t store_src) { struct gen_mi_value dst = gen_mi_new_gpr(b); uint32_t dw[4]; dw[0] = _gen_mi_math_load_src(b, MI_ALU_SRCA, &src0); dw[1] = _gen_mi_math_load_src(b, MI_ALU_SRCB, &src1); dw[2] = _gen_mi_pack_alu(opcode, 0, 0); dw[3] = _gen_mi_pack_alu(store_op, _gen_mi_value_as_gpr(dst), store_src); _gen_mi_builder_push_math(b, dw, 4); gen_mi_value_unref(b, src0); gen_mi_value_unref(b, src1); return dst; } static inline struct gen_mi_value gen_mi_inot(struct gen_mi_builder *b, struct gen_mi_value val) { /* TODO These currently can't be passed into gen_mi_copy */ val.invert = !val.invert; return val; } static inline struct gen_mi_value gen_mi_iadd(struct gen_mi_builder *b, struct gen_mi_value src0, struct gen_mi_value src1) { return gen_mi_math_binop(b, MI_ALU_ADD, src0, src1, MI_ALU_STORE, MI_ALU_ACCU); } static inline struct gen_mi_value gen_mi_iadd_imm(struct gen_mi_builder *b, struct gen_mi_value src, uint64_t N) { if (N == 0) return src; return gen_mi_iadd(b, src, gen_mi_imm(N)); } static inline struct gen_mi_value gen_mi_isub(struct gen_mi_builder *b, struct gen_mi_value src0, struct gen_mi_value src1) { return gen_mi_math_binop(b, MI_ALU_SUB, src0, src1, MI_ALU_STORE, MI_ALU_ACCU); } static inline struct gen_mi_value gen_mi_ult(struct gen_mi_builder *b, struct gen_mi_value src0, struct gen_mi_value src1) { /* Compute "less than" by subtracting and storing the carry bit */ return gen_mi_math_binop(b, MI_ALU_SUB, src0, src1, MI_ALU_STORE, MI_ALU_CF); } static inline struct gen_mi_value gen_mi_uge(struct gen_mi_builder *b, struct gen_mi_value src0, struct gen_mi_value src1) { /* Compute "less than" by subtracting and storing the carry bit */ return gen_mi_math_binop(b, MI_ALU_SUB, src0, src1, MI_ALU_STOREINV, MI_ALU_CF); } static inline struct gen_mi_value gen_mi_iand(struct gen_mi_builder *b, struct gen_mi_value src0, struct gen_mi_value src1) { return gen_mi_math_binop(b, MI_ALU_AND, src0, src1, MI_ALU_STORE, MI_ALU_ACCU); } static inline struct gen_mi_value gen_mi_imul_imm(struct gen_mi_builder *b, struct gen_mi_value src, uint32_t N) { if (N == 0) { gen_mi_value_unref(b, src); return gen_mi_imm(0); } if (N == 1) return src; src = gen_mi_value_to_gpr(b, src); struct gen_mi_value res = gen_mi_value_ref(b, src); unsigned top_bit = 31 - __builtin_clz(N); for (int i = top_bit - 1; i >= 0; i--) { res = gen_mi_iadd(b, res, gen_mi_value_ref(b, res)); if (N & (1 << i)) res = gen_mi_iadd(b, res, gen_mi_value_ref(b, src)); } gen_mi_value_unref(b, src); return res; } static inline struct gen_mi_value gen_mi_ishl_imm(struct gen_mi_builder *b, struct gen_mi_value src, uint32_t shift) { struct gen_mi_value res = gen_mi_value_to_gpr(b, src); for (unsigned i = 0; i < shift; i++) res = gen_mi_iadd(b, res, gen_mi_value_ref(b, res)); return res; } static inline struct gen_mi_value gen_mi_ushr32_imm(struct gen_mi_builder *b, struct gen_mi_value src, uint32_t shift) { /* We right-shift by left-shifting by 32 - shift and taking the top 32 bits * of the result. This assumes the top 32 bits are zero. */ if (shift > 64) return gen_mi_imm(0); if (shift > 32) { struct gen_mi_value tmp = gen_mi_new_gpr(b); _gen_mi_copy_no_unref(b, gen_mi_value_half(tmp, false), gen_mi_value_half(src, true)); _gen_mi_copy_no_unref(b, gen_mi_value_half(tmp, true), gen_mi_imm(0)); gen_mi_value_unref(b, src); src = tmp; shift -= 32; } assert(shift <= 32); struct gen_mi_value tmp = gen_mi_ishl_imm(b, src, 32 - shift); struct gen_mi_value dst = gen_mi_new_gpr(b); _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, false), gen_mi_value_half(tmp, true)); _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, true), gen_mi_imm(0)); gen_mi_value_unref(b, tmp); return dst; } static inline struct gen_mi_value gen_mi_udiv32_imm(struct gen_mi_builder *b, struct gen_mi_value N, uint32_t D) { /* We implicitly assume that N is only a 32-bit value */ if (D == 0) { /* This is invalid but we should do something */ return gen_mi_imm(0); } else if (util_is_power_of_two_or_zero(D)) { return gen_mi_ushr32_imm(b, N, util_logbase2(D)); } else { struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32); assert(m.multiplier <= UINT32_MAX); if (m.pre_shift) N = gen_mi_ushr32_imm(b, N, m.pre_shift); /* Do the 32x32 multiply into gpr0 */ N = gen_mi_imul_imm(b, N, m.multiplier); if (m.increment) N = gen_mi_iadd(b, N, gen_mi_imm(m.multiplier)); N = gen_mi_ushr32_imm(b, N, 32); if (m.post_shift) N = gen_mi_ushr32_imm(b, N, m.post_shift); return N; } } #endif /* MI_MATH section */ #endif /* GEN_MI_BUILDER_H */