38void transpose(
const Input_* input,
size_t nrow,
size_t ncol,
size_t input_stride, Output_* output,
size_t output_stride) {
39 if ((nrow == 1 && output_stride == 1) || (ncol == 1 && input_stride == 1)) {
40 std::copy_n(input, nrow * ncol, output);
46 constexpr size_t block = 16;
48 while (col_start < ncol) {
49 size_t col_end = col_start + std::min(block, ncol - col_start);
52 while (row_start < nrow) {
53 size_t row_end = row_start + std::min(block, nrow - row_start);
54 for (
size_t c = col_start; c < col_end; ++c) {
55 for (
size_t r = row_start; r < row_end; ++r) {
56 output[c * output_stride + r] = input[r * input_stride + c];
void transpose(const Input_ *input, size_t nrow, size_t ncol, size_t input_stride, Output_ *output, size_t output_stride)
Definition transpose.hpp:38