46 const Index_ dim = (row ? mat.
nrow() : mat.
ncol());
47 const Index_ otherdim = (row ? mat.
ncol() : mat.
nrow());
53 const bool count_zero = condition(0);
60 for (Index_ x = 0; x < len; ++x) {
61 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
63 for (Index_ j = 0; j < range.number; ++j) {
64 target += condition(range.value[j]);
67 target += otherdim - range.number;
69 output[x + start] = target;
78 for (Index_ x = 0; x < len; ++x) {
79 auto ptr = ext->fetch(xbuffer.data());
81 for (Index_ j = 0; j < otherdim; ++j) {
82 target += condition(ptr[j]);
84 output[x + start] = target;
91 auto threaded_output = sanisizer::create<std::vector<std::optional<std::vector<Output_> > > >(num_threads <= 1 ? 0 : num_threads - 1);
92 const auto get_output_ptr = [&](
const int thread, std::optional<std::vector<Output_> >& tmp_output) -> Output_* {
95 return tmp_output->data();
100 const auto save_output = [&](
const int thread, std::optional<std::vector<Output_> >& tmp_output) ->
void {
102 threaded_output[thread - 1] = std::move(tmp_output);
110 bool count_zero = condition(0);
113 std::optional<std::vector<Output_> > tmp_output;
114 auto curoutput = get_output_ptr(thread, tmp_output);
121 for (Index_ x = 0; x < len; ++x) {
122 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
123 for (Index_ j = 0; j < range.number; ++j) {
124 auto idx = range.index[j];
125 curoutput[idx] += condition(range.value[j]);
131 for (Index_ d = 0; d < dim; ++d) {
132 curoutput[d] += len - nonzeros[d];
136 save_output(thread, tmp_output);
137 }, otherdim, num_threads);
141 std::optional<std::vector<Output_> > tmp_output;
142 auto curoutput = get_output_ptr(thread, tmp_output);
147 for (Index_ x = 0; x < len; ++x) {
148 auto ptr = ext->fetch(xbuffer.data());
149 for (Index_ j = 0; j < dim; ++j) {
150 curoutput[j] += condition(ptr[j]);
154 save_output(thread, tmp_output);
155 }, otherdim, num_threads);
160 std::fill(output, output + dim, 0);
162 for (
int thread = 1; thread < num_used; ++thread) {
163 const auto& curout = *(threaded_output[thread - 1]);
164 for (Index_ d = 0; d < dim; ++d) {
165 output[d] += curout[d];