45 auto dim = (row ? p->
nrow() : p->
ncol());
46 auto otherdim = (row ? p->
ncol() : p->
nrow());
47 std::fill(output, output + dim, 0);
53 bool count_zero = condition(0);
56 std::vector<Value_> xbuffer(otherdim);
57 std::vector<Index_> ibuffer(otherdim);
60 for (Index_ x = 0; x < len; ++x) {
61 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
63 for (Index_ j = 0; j < range.number; ++j) {
64 target += condition(range.value[j]);
67 target += otherdim - range.number;
69 output[x + start] = target;
75 std::vector<Value_> xbuffer(otherdim);
78 for (Index_ x = 0; x < len; ++x) {
79 auto ptr = ext->fetch(xbuffer.data());
81 for (Index_ j = 0; j < otherdim; ++j) {
82 target += condition(ptr[j]);
84 output[x + start] = target;
90 num_threads = subpar::sanitize_num_workers(num_threads, otherdim);
91 std::vector<Output_*> threaded_output_ptrs(num_threads, output);
92 std::vector<std::vector<Output_> > threaded_output(num_threads - 1);
93 for (
int t = 1; t < num_threads; ++t) {
94 auto& curout = threaded_output[t - 1];
96 threaded_output_ptrs[t] = curout.data();
102 bool count_zero = condition(0);
105 std::vector<Value_> xbuffer(dim);
106 std::vector<Index_> ibuffer(dim);
109 auto curoutput = threaded_output_ptrs[thread];
110 std::vector<Index_> nonzeros(dim);
112 for (Index_ x = 0; x < len; ++x) {
113 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
114 for (Index_ j = 0; j < range.number; ++j) {
115 auto idx = range.index[j];
116 curoutput[idx] += condition(range.value[j]);
122 for (
int d = 0; d < dim; ++d) {
123 curoutput[d] += len - nonzeros[d];
126 }, otherdim, num_threads);
130 std::vector<Value_> xbuffer(dim);
132 auto curoutput = threaded_output_ptrs[thread];
134 for (Index_ x = 0; x < len; ++x) {
135 auto ptr = ext->fetch(xbuffer.data());
136 for (Index_ j = 0; j < dim; ++j) {
137 curoutput[j] += condition(ptr[j]);
140 }, otherdim, num_threads);
143 for (
int t = 1; t < num_threads; ++t) {
144 auto curoutput = threaded_output_ptrs[t];
145 for (Index_ d = 0; d < dim; ++d) {
146 output[d] += curoutput[d];