/* * Copyright © 2017 Gert Wollny * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ /* A short overview on how the array merging works: * * Inputs: * - per array information: live range, access mask, size * - the program * * Output: * - the program with updated array addressing * * Pseudo algorithm: * * repeat * for all pairs of arrays: * if they have non-overlapping live ranges and equal access masks: * - pick shorter array * - merge its live range into the longer array * - set its merge target array to the longer array * - mark the shorter array as processed * * for all pairs of arrays: * if they have overlapping live ranges use in sum at most four components: * - pick shorter array * - evaluate reswizzle map to move its components into the components * that are not used by the longer array * - set its merge target array to the longer array * - mark the shorter array as processed * - bail out loop * until no more successfull merges were found * * for all pairs of arrays: * if they have non-overlapping live ranges: * - pick shorter array * - merge its live range into the longer array * - set its merge target array to the longer array * - mark the shorter array as processed * * Finalize remapping map so that target arrays are always final, i.e. have * themselfes no merge target set. * * Example: * ID | Length | Live range | access mask | target id | reswizzle * ================================================================ * 1 3 3-10 x___ 0 ____ * 2 4 13-20 x___ 0 ____ * 3 8 3-20 x___ 0 ____ * 4 6 21-40 xy__ 0 ____ * 5 7 12-30 xy__ 0 ____ * * 1. merge live ranges 1 and 2 * * ID | Length | Live range | access mask | target id | reswizzle * ================================================================ * 1 - - x___ 2 ____ * 2 4 3-20 x___ 0 ____ * 3 8 3-20 x___ 0 ____ * 4 6 21-40 xy__ 0 ____ * 5 7 12-30 xy__ 0 ____ * * * 3. interleave 2 and 3 * * ID | Length | Live range | access mask | target id | reswizzle * ================================================================ * 1 - - x___ 2 ____ * 2 - - x___ 3 _x__ * 3 8 3-20 xy__ 0 ____ * 4 6 21-40 xy__ 0 ____ * 5 7 12-30 xy__ 0 ____ * * 3. merge live ranges 3 and 4 * * ID | Length | Live range | access mask | target id | reswizzle * ================================================================ * 1 - - x___ 2 ____ * 2 - - x___ 3 _x__ * 3 8 3-40 xy__ 0 ____ * 4 - - xy__ 3 ____ * 5 7 3-21 xy__ 0 ____ * * 4. interleave 3 and 5 * * ID | Length | Live range | access mask | target id | reswizzle * ================================================================ * 1 - - x___ 2 ____ * 2 - - x___ 3 _x__ * 3 8 3-40 xy__ 0 ____ * 4 - - xy__ 3 ____ * 5 - - xy__ 3 __xy * * 5. finalize remapping * (Array 1 has been merged with 2 that was later interleaved, so * the reswizzeling must be propagated. * * ID | Length | Live range | new access mask | target id | reswizzle * ================================================================ * 1 - - _y__ 3 _x__ * 2 - - _y__ 3 _x__ * 3 8 3-40 xy__ 0 ____ * 4 - - xy__ 3 ____ * 5 - - __zw 3 __xy * */ #include "program/prog_instruction.h" #include "util/u_math.h" #include #include #include #include #include "st_glsl_to_tgsi_array_merge.h" #if __cplusplus >= 201402L #include using std::unique_ptr; using std::make_unique; #endif #define ARRAY_MERGE_DEBUG 0 #if ARRAY_MERGE_DEBUG > 0 #define ARRAY_MERGE_DUMP(x) do std::cerr << x; while (0) #define ARRAY_MERGE_DUMP_BLOCK(x) do { x } while (0) #else #define ARRAY_MERGE_DUMP(x) #define ARRAY_MERGE_DUMP_BLOCK(x) #endif static const char xyzw[] = "xyzw"; array_live_range::array_live_range(): id(0), length(0), first_access(0), last_access(0), component_access_mask(0), used_component_count(0), target_array(nullptr) { init_swizzles(); } array_live_range::array_live_range(unsigned aid, unsigned alength): id(aid), length(alength), first_access(0), last_access(0), component_access_mask(0), used_component_count(0), target_array(nullptr) { init_swizzles(); } array_live_range::array_live_range(unsigned aid, unsigned alength, int begin, int end, int sw): id(aid), length(alength), first_access(begin), last_access(end), component_access_mask(sw), used_component_count(util_bitcount(sw)), target_array(nullptr) { init_swizzles(); } void array_live_range::init_swizzles() { for (int i = 0; i < 4; ++i) swizzle_map[i] = i; } void array_live_range::set_live_range(int _begin, int _end) { set_begin(_begin); set_end(_end); } void array_live_range::set_access_mask(int mask) { component_access_mask = mask; used_component_count = util_bitcount(mask); } void array_live_range::merge(array_live_range *a, array_live_range *b) { if (a->array_length() < b->array_length()) b->merge_live_range_from(a); else a->merge_live_range_from(b); } void array_live_range::interleave(array_live_range *a, array_live_range *b) { if (a->array_length() < b->array_length()) a->interleave_into(b); else b->interleave_into(a); } void array_live_range::interleave_into(array_live_range *other) { for (int i = 0; i < 4; ++i) { swizzle_map[i] = -1; } int trgt_access_mask = other->access_mask(); int summary_access_mask = trgt_access_mask; int src_swizzle_bit = 1; int next_free_swizzle_bit = 1; int k = 0; unsigned i; unsigned last_src_bit = util_last_bit(component_access_mask); for (i = 0; i <= last_src_bit ; ++i, src_swizzle_bit <<= 1) { /* Jump over empty src component slots (e.g. x__w). This is just a * safety measure and it is tested for, but it is very likely that the * emitted code always uses slots staring from x without leaving holes * (i.e. always xy__ not x_z_ or _yz_ etc). */ if (!(src_swizzle_bit & component_access_mask)) continue; /* Find the next free access slot in the target. */ while ((trgt_access_mask & next_free_swizzle_bit) && k < 4) { next_free_swizzle_bit <<= 1; ++k; } assert(k < 4 && "Interleaved array would have more then four components"); /* Set the mapping for this component. */ swizzle_map[i] = k; trgt_access_mask |= next_free_swizzle_bit; /* Update the joined access mask if we didn't just fill the mapping.*/ if (src_swizzle_bit & component_access_mask) summary_access_mask |= next_free_swizzle_bit; } other->set_access_mask(summary_access_mask); other->merge_live_range_from(this); ARRAY_MERGE_DUMP_BLOCK( std::cerr << "Interleave " << id << " into " << other->id << ", swz:"; for (unsigned i = 0; i < 4; ++i) { std::cerr << ((swizzle_map[i] >= 0) ? xyzw[swizzle_map[i]] : '_'); } std::cerr << '\n'; ); } void array_live_range::merge_live_range_from(array_live_range *other) { other->set_target(this); if (other->begin() < first_access) first_access = other->begin(); if (other->end() > last_access) last_access = other->end(); } int8_t array_live_range::remap_one_swizzle(int8_t idx) const { // needs testing if (target_array) { idx = swizzle_map[idx]; if (idx >= 0) idx = target_array->remap_one_swizzle(idx); } return idx; } void array_live_range::set_target(array_live_range *target) { target_array = target; } void array_live_range::print(std::ostream& os) const { os << "[id:" << id << ", length:" << length << ", (b:" << first_access << ", e:" << last_access << "), sw:" << (int)component_access_mask << ", nc:" << (int)used_component_count << "]"; } bool array_live_range::time_doesnt_overlap(const array_live_range& other) const { return (other.last_access < first_access || last_access < other.first_access); } namespace tgsi_array_merge { array_remapping::array_remapping(): target_id(0) { for (int i = 0; i < 4; ++i) { read_swizzle_map[i] = i; } } array_remapping::array_remapping(int trgt_array_id, const int8_t swizzle[]): target_id(trgt_array_id) { for (int i = 0; i < 4; ++i) { read_swizzle_map[i] = swizzle[i]; } } void array_remapping::init_from(const array_live_range& range) { target_id = range.is_mapped() ? range.final_target()->array_id(): 0; for (int i = 0; i < 4; ++i) read_swizzle_map[i] = range.remap_one_swizzle(i); } int array_remapping::map_writemask(int write_mask) const { assert(is_valid()); int result_write_mask = 0; for (int i = 0; i < 4; ++i) { if (1 << i & write_mask) { assert(read_swizzle_map[i] >= 0); result_write_mask |= 1 << read_swizzle_map[i]; } } return result_write_mask; } uint16_t array_remapping::move_read_swizzles(uint16_t original_swizzle) const { assert(is_valid()); /* Since * * dst.zw = src.xy in glsl actually is MOV dst.__zw src.__xy * * when interleaving the arrays the source swizzles must be moved * according to the changed dst write mask. */ uint16_t out_swizzle = 0; for (int idx = 0; idx < 4; ++idx) { uint16_t orig_swz = GET_SWZ(original_swizzle, idx); int new_idx = read_swizzle_map[idx]; if (new_idx >= 0) out_swizzle |= orig_swz << 3 * new_idx; } return out_swizzle; } uint16_t array_remapping::map_swizzles(uint16_t old_swizzle) const { uint16_t out_swizzle = 0; for (int idx = 0; idx < 4; ++idx) { uint16_t swz = read_swizzle_map[GET_SWZ(old_swizzle, idx)]; out_swizzle |= swz << 3 * idx; } return out_swizzle; } void array_remapping::print(std::ostream& os) const { if (is_valid()) { os << "[aid: " << target_id << " swz: "; for (int i = 0; i < 4; ++i) os << (read_swizzle_map[i] >= 0 ? xyzw[read_swizzle_map[i]] : '_'); os << "]"; } else { os << "[unused]"; } } /* Required by the unit tests */ bool operator == (const array_remapping& lhs, const array_remapping& rhs) { if (lhs.target_id != rhs.target_id) return false; if (lhs.target_id == 0) return true; for (int i = 0; i < 4; ++i) { if (lhs.read_swizzle_map[i] != rhs.read_swizzle_map[i]) return false; } return true; } static bool sort_by_begin(const array_live_range& lhs, const array_live_range& rhs) { return lhs.begin() < rhs.begin(); } /* Helper class to evaluate merging and interleaving of arrays */ class array_merge_evaluator { public: typedef int (*array_merger)(array_live_range& range_1, array_live_range& range_2); array_merge_evaluator(int _narrays, array_live_range *_ranges, bool _restart); /** Run the merge strategy on all arrays * @returns number of successfull merges */ int run(); private: virtual int do_run(array_live_range& range_1, array_live_range& range_2) = 0; int narrays; array_live_range *ranges; bool restart; }; array_merge_evaluator::array_merge_evaluator(int _narrays, array_live_range *_ranges, bool _restart): narrays(_narrays), ranges(_ranges), restart(_restart) { } int array_merge_evaluator::run() { int remaps = 0; for (int i = 0; i < narrays; ++i) { if (ranges[i].is_mapped()) continue; for (int j = i + 1; j < narrays; ++j) { if (!ranges[j].is_mapped()) { ARRAY_MERGE_DUMP("try merge " << i << " id:" << ranges[i].array_id() << " and " << j << " id: "<< ranges[j].array_id() << "\n"); int n = do_run(ranges[i], ranges[j]); if (restart && n) return n; remaps += n; } } } return remaps; } /* Merge live ranges if possible at all */ class merge_live_range_always: public array_merge_evaluator { public: merge_live_range_always(int _narrays, array_live_range *_ranges): array_merge_evaluator(_narrays, _ranges, false) { } protected: int do_run(array_live_range& range_1, array_live_range& range_2){ if (range_2.time_doesnt_overlap(range_1)) { ARRAY_MERGE_DUMP("merge " << range_2 << " into " << range_1 << "\n"); array_live_range::merge(&range_1,&range_2); return 1; } return 0; } }; /* Merge live ranges only if they use the same swizzle */ class merge_live_range_equal_swizzle: public merge_live_range_always { public: merge_live_range_equal_swizzle(int _narrays, array_live_range *_ranges): merge_live_range_always(_narrays, _ranges) { } private: int do_run(array_live_range& range_1, array_live_range& range_2){ if (range_1.access_mask() == range_2.access_mask()) { return merge_live_range_always::do_run(range_1, range_2); } return 0; } }; /* Interleave arrays if possible */ class interleave_live_range: public array_merge_evaluator { public: interleave_live_range(int _narrays, array_live_range *_ranges): array_merge_evaluator(_narrays, _ranges, true) { } private: int do_run(array_live_range& range_1, array_live_range& range_2){ if ((range_2.used_components() + range_1.used_components() <= 4) && !range_1.time_doesnt_overlap(range_2)) { ARRAY_MERGE_DUMP("Interleave " << range_2 << " into " << range_1 << "\n"); array_live_range::interleave(&range_1, &range_2); return 1; } return 0; } }; /* Estimate the array merging: First in a loop, arrays with equal access mask * are merged, then interleave arrays that together use at most four components, * and have overlapping live ranges. Finally arrays are merged regardless of * access mask. * @param[in] narrays number of arrays * @param[in,out] alt array life times, the merge target life time will be * updated with the new life time. * @param[in,out] remapping track the arraay index remapping and reswizzeling. * @returns number of merged arrays */ bool get_array_remapping(int narrays, array_live_range *ranges, array_remapping *remapping) { int total_remapped = 0; int n_remapped; /* Sort by "begin of live range" so that we don't have to restart searching * after every merge. */ std::sort(ranges, ranges + narrays, sort_by_begin); merge_live_range_equal_swizzle merge_evaluator_es(narrays, ranges); interleave_live_range interleave_lr(narrays, ranges); do { n_remapped = merge_evaluator_es.run(); /* try only one array interleave, if successfull, another * live_range merge is tried. The test MergeAndInterleave5 * (mesa/st/tests/test_glsl_to_tgsi_array_merge.cpp) * shows that this can result in more arrays being merged/interleaved. */ n_remapped += interleave_lr.run(); total_remapped += n_remapped; ARRAY_MERGE_DUMP("Remapped " << n_remapped << " arrays\n"); } while (n_remapped > 0); total_remapped += merge_live_range_always(narrays, ranges).run(); ARRAY_MERGE_DUMP("Remapped a total of " << total_remapped << " arrays\n"); /* Resolve the remapping chain */ for (int i = 1; i <= narrays; ++i) { ARRAY_MERGE_DUMP("Map " << i << ":"); remapping[ranges[i-1].array_id()].init_from(ranges[i-1]); } return total_remapped > 0; } /* Remap the arrays in a TGSI program according to the given mapping. * @param narrays number of arrays * @param array_sizes array of arrays sizes * @param map the array remapping information * @param instructions TGSI program * @returns number of arrays after remapping */ int remap_arrays(int narrays, unsigned *array_sizes, exec_list *instructions, array_remapping *map) { /* re-calculate arrays */ #if __cplusplus < 201402L int *idx_map = new int[narrays + 1]; unsigned *old_sizes = new unsigned[narrays]; #else unique_ptr idx_map = make_unique(narrays + 1); unique_ptr old_sizes = make_unique(narrays); #endif memcpy(&old_sizes[0], &array_sizes[0], sizeof(unsigned) * narrays); /* Evaluate mapping for the array indices and update array sizes */ int new_narrays = 0; for (int i = 1; i <= narrays; ++i) { if (!map[i].is_valid()) { ++new_narrays; array_sizes[new_narrays-1] = old_sizes[i-1]; idx_map[i] = new_narrays; } } /* Map the array ids of merged arrays. */ for (int i = 1; i <= narrays; ++i) { if (map[i].is_valid()) { map[i].set_target_id(idx_map[map[i].target_array_id()]); } } /* Map the array ids of merge targets that got only renumbered. */ for (int i = 1; i <= narrays; ++i) { if (!map[i].is_valid()) { map[i].set_target_id(idx_map[i]); } } /* Update the array ids and swizzles in the registers */ foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) { for (unsigned j = 0; j < num_inst_src_regs(inst); j++) { st_src_reg& src = inst->src[j]; if (src.file == PROGRAM_ARRAY && src.array_id > 0) { array_remapping& m = map[src.array_id]; if (m.is_valid()) { src.array_id = m.target_array_id(); src.swizzle = m.map_swizzles(src.swizzle); } } } for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) { st_src_reg& src = inst->tex_offsets[j]; if (src.file == PROGRAM_ARRAY && src.array_id > 0) { array_remapping& m = map[src.array_id]; if (m.is_valid()) { src.array_id = m.target_array_id(); src.swizzle = m.map_swizzles(src.swizzle); } } } for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) { st_dst_reg& dst = inst->dst[j]; if (dst.file == PROGRAM_ARRAY && dst.array_id > 0) { array_remapping& m = map[dst.array_id]; if (m.is_valid()) { assert(j == 0 && "remapping can only be done for single dest ops"); dst.array_id = m.target_array_id(); dst.writemask = m.map_writemask(dst.writemask); /* If the target component is moved, then the source swizzles * must be moved accordingly. */ for (unsigned j = 0; j < num_inst_src_regs(inst); j++) { st_src_reg& src = inst->src[j]; src.swizzle = m.move_read_swizzles(src.swizzle); } } } } st_src_reg& res = inst->resource; if (res.file == PROGRAM_ARRAY && res.array_id > 0) { array_remapping& m = map[res.array_id]; if (m.is_valid()) { res.array_id = m.target_array_id(); res.swizzle = m.map_swizzles(res.swizzle); } } } #if __cplusplus < 201402L delete[] old_sizes; delete[] idx_map; #endif return new_narrays; } } using namespace tgsi_array_merge; int merge_arrays(int narrays, unsigned *array_sizes, exec_list *instructions, class array_live_range *arr_live_ranges) { array_remapping *map= new array_remapping[narrays + 1]; if (get_array_remapping(narrays, arr_live_ranges, map)) narrays = remap_arrays(narrays, array_sizes, instructions, map); delete[] map; return narrays; }