66 Index_ dim = (row ? mat.
nrow() : mat.
ncol());
67 Index_ otherdim = (row ? mat.
ncol() : mat.
nrow());
73 std::vector<Value_> xbuffer(otherdim);
74 std::vector<Index_> ibuffer(otherdim);
75 std::vector<Output_> tmp(num_groups);
77 for (Index_ i = 0; i < len; ++i) {
78 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
79 std::fill(tmp.begin(), tmp.end(),
static_cast<Output_
>(0));
81 internal::nanable_ifelse<Value_>(
84 for (int j = 0; j < range.number; ++j) {
85 auto val = range.value[j];
86 if (!std::isnan(val)) {
87 tmp[group[range.index[j]]] += val;
92 for (
int j = 0; j < range.number; ++j) {
93 tmp[group[range.index[j]]] += range.value[j];
98 for (
decltype(num_groups) g = 0; g < num_groups; ++g) {
99 output[g][i + start] = tmp[g];
112 std::vector<sums::RunningSparse<Output_, Value_, Index_> > runners;
113 runners.reserve(num_groups);
114 std::vector<LocalOutputBuffer<Output_> > local_output;
115 local_output.reserve(num_groups);
117 for (
decltype(num_groups) g = 0; g < num_groups; ++g) {
118 local_output.emplace_back(thread, start, len, output[g]);
119 runners.emplace_back(local_output.back().data(), sopt.
skip_nan, start);
123 std::vector<Value_> xbuffer(len);
124 std::vector<Index_> ibuffer(len);
126 for (
int i = 0; i < otherdim; ++i) {
127 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
128 runners[group[i]].add(range.value, range.index, range.number);
131 for (
decltype(num_groups) g = 0; g < num_groups; ++g) {
132 local_output[g].transfer();
138 if (mat.prefer_rows() == row) {
141 std::vector<Value_> xbuffer(otherdim);
142 std::vector<Output_> tmp(num_groups);
144 for (Index_ i = 0; i < len; ++i) {
145 auto ptr = ext->fetch(xbuffer.data());
146 std::fill(tmp.begin(), tmp.end(),
static_cast<Output_
>(0));
148 internal::nanable_ifelse<Value_>(
151 for (Index_ j = 0; j < otherdim; ++j) {
153 if (!std::isnan(val)) {
154 tmp[group[j]] += val;
159 for (Index_ j = 0; j < otherdim; ++j) {
160 tmp[group[j]] += ptr[j];
165 for (
decltype(num_groups) g = 0; g < num_groups; ++g) {
166 output[g][i + start] = tmp[g];
169 }, dim, sopt.num_threads);
173 std::vector<sums::RunningDense<Output_, Value_, Index_> > runners;
174 runners.reserve(num_groups);
175 std::vector<LocalOutputBuffer<Output_> > local_output;
176 local_output.reserve(num_groups);
178 for (
decltype(num_groups) g = 0; g < num_groups; ++g) {
179 local_output.emplace_back(thread, start, len, output[g]);
180 runners.emplace_back(len, local_output.back().data(), sopt.skip_nan);
183 std::vector<Value_> xbuffer(len);
186 for (
int i = 0; i < otherdim; ++i) {
187 auto ptr = ext->fetch(xbuffer.data());
188 runners[group[i]].add(ptr);
191 for (
decltype(num_groups) g = 0; g < num_groups; ++g) {
192 local_output[g].transfer();
194 }, dim, sopt.num_threads);