38void transpose(
const Input_* input, std::size_t nrow, std::size_t ncol, std::size_t input_stride, Output_* output, std::size_t output_stride) {
39 if ((nrow == 1 && output_stride == 1) || (ncol == 1 && input_stride == 1)) {
40 std::copy_n(input, nrow * ncol, output);
46 constexpr std::size_t block = 16;
47 std::size_t col_start = 0;
48 while (col_start < ncol) {
49 std::size_t col_end = col_start + std::min(block, ncol - col_start);
51 std::size_t row_start = 0;
52 while (row_start < nrow) {
53 std::size_t row_end = row_start + std::min(block, nrow - row_start);
54 for (std::size_t c = col_start; c < col_end; ++c) {
55 for (std::size_t r = row_start; r < row_end; ++r) {
56 output[c * output_stride + r] = input[r * input_stride + c];
void transpose(const Input_ *input, std::size_t nrow, std::size_t ncol, std::size_t input_stride, Output_ *output, std::size_t output_stride)
Definition transpose.hpp:38