tatami/transpose_8hpp_source.html

#ifndef TATAMI_TRANSPOSE_HPP

#define TATAMI_TRANSPOSE_HPP


#include <algorithm>


namespace tatami {


template<typename Input_, typename Output_>


void transpose(const Input_* input, std::size_t nrow, std::size_t ncol, std::size_t input_stride, Output_* output, std::size_t output_stride) {

    if ((nrow == 1 && output_stride == 1) || (ncol == 1 && input_stride == 1)) {

        std::copy_n(input, nrow * ncol, output);

        return;

    }


    // Using a blockwise strategy to perform the transposition,

    // in order to be more input-friendly.

    constexpr std::size_t block = 16;

    std::size_t col_start = 0;

    while (col_start < ncol) {

        std::size_t col_end = col_start + std::min(block, ncol - col_start);


        std::size_t row_start = 0;

        while (row_start < nrow) {

            std::size_t row_end = row_start + std::min(block, nrow - row_start);

            for (std::size_t c = col_start; c < col_end; ++c) {

                for (std::size_t r = row_start; r < row_end; ++r) {

                    output[c * output_stride + r] = input[r * input_stride + c];

                }

            }


            row_start = row_end;

        }

        col_start = col_end;

    }

}

void transpose(const Input_* input, std::size_t nrow, std::size_t ncol, std::size_t input_stride, Output_* output, std::size_t output_stride) {…}


template<typename Input_, typename Output_>


void transpose(const Input_* input, std::size_t nrow, std::size_t ncol, Output_* output) {

    transpose(input, nrow, ncol, ncol, output, nrow);

}

void transpose(const Input_* input, std::size_t nrow, std::size_t ncol, Output_* output) {…}


// COMMENT:

// I tried really hard to make an in-place version, but it's too frigging complicated for non-square matrices.

// It can be done, but I can't see a way to do it efficiently as you end up hopping all over the matrix (a la in-place reordering).

// There doesn't seem to be any opportunity to do it in blocks for cache-friendliness;

// the displaced values from the original matrix don't form a corrresponding block in the transposed matrix.

// Perhaps this is a skill issue but at least Eigen agrees with my assessment, as they just make a new copy when the matrix is not square.

// (See https://gitlab.com/libeigen/eigen/-/blob/master/Eigen/src/Core/Transpose.h#L293 for the relevant code.)

// I'm not going to optimize the square case because tatami rarely, if ever, deals in situations with square matrices.


}


#endif

tatami
Flexible representations for matrix data.
Definition Extractor.hpp:15

tatami::transpose
void transpose(const Input_ *input, std::size_t nrow, std::size_t ncol, std::size_t input_stride, Output_ *output, std::size_t output_stride)
Definition transpose.hpp:38