73 Index_ my_nrow, my_ncol;
74 std::string my_file_name, my_value_name, my_index_name;
75 std::vector<hsize_t> pointers;
78 std::size_t my_slab_cache_size;
79 Index_ my_max_non_zeros;
80 CompressedSparseMatrix_internal::ChunkCacheSizes my_chunk_cache_sizes;
99 std::string file_name,
100 std::string value_name,
101 std::string index_name,
102 std::string pointer_name,
108 my_file_name(std::move(file_name)),
109 my_value_name(std::move(value_name)),
110 my_index_name(std::move(index_name)),
112 my_slab_cache_size(options.maximum_cache_size)
116 Index_ primary_dim = my_csr ? my_nrow : my_ncol;
117 Index_ secondary_dim = my_csr ? my_ncol : my_nrow;
119 auto dim_as_str = [](
bool row) -> std::string {
128 H5::H5File file_handle(my_file_name, H5F_ACC_RDONLY);
129 auto dhandle = open_and_check_dataset<false>(file_handle, my_value_name);
130 hsize_t nonzeros = get_array_dimensions<1>(dhandle,
"value_name")[0];
132 auto ihandle = open_and_check_dataset<true>(file_handle, my_index_name);
133 if (get_array_dimensions<1>(ihandle,
"index_name")[0] != nonzeros) {
134 throw std::runtime_error(
"number of non-zero elements is not consistent between 'value_name' and 'index_name'");
137 auto phandle = open_and_check_dataset<true>(file_handle, pointer_name);
138 auto ptr_size = get_array_dimensions<1>(phandle,
"pointer_name")[0];
139 if (ptr_size == 0 || !sanisizer::is_equal(ptr_size - 1, primary_dim)) {
140 throw std::runtime_error(
"'pointer_name' dataset should have length equal to the number of " + dim_as_str(my_csr) +
" plus 1");
143 auto dparms = dhandle.getCreatePlist();
144 if (dparms.getLayout() == H5D_CHUNKED) {
145 hsize_t dchunk_length;
146 dparms.getChunk(1, &dchunk_length);
147 my_chunk_cache_sizes.value = CompressedSparseMatrix_internal::compute_chunk_cache_size(nonzeros, dchunk_length, dhandle.getDataType().getSize());
150 auto iparms = ihandle.getCreatePlist();
151 if (iparms.getLayout() == H5D_CHUNKED) {
152 hsize_t ichunk_length;
153 iparms.getChunk(1, &ichunk_length);
154 my_chunk_cache_sizes.index = CompressedSparseMatrix_internal::compute_chunk_cache_size(nonzeros, ichunk_length, ihandle.getDataType().getSize());
158 pointers.resize(sanisizer::cast<
decltype(pointers.size())>(ptr_size));
159 phandle.read(pointers.data(), H5::PredType::NATIVE_HSIZE);
160 if (pointers[0] != 0) {
161 throw std::runtime_error(
"first index pointer should be zero");
163 if (pointers.back() != nonzeros) {
164 throw std::runtime_error(
"last index pointer should be equal to the number of non-zero elements");
168 my_max_non_zeros = 0;
169 for (Index_ i = 0; i < primary_dim; ++i) {
170 if (pointers[i+1] < pointers[i]) {
171 throw std::runtime_error(
"pointers should be ordered");
173 auto diff = pointers[i+1] - pointers[i];
174 if (sanisizer::is_greater_than(diff, secondary_dim)) {
175 throw std::runtime_error(
"differences between pointers should be no greater than the number of " + dim_as_str(!my_csr));
177 if (sanisizer::is_greater_than(diff, my_max_non_zeros)) {
178 my_max_non_zeros = diff;
193 CompressedSparseMatrix(Index_ nrow, Index_ ncol, std::string file_name, std::string value_name, std::string index_name, std::string pointer_name,
bool csr) :
197 Index_ nrow()
const {
201 Index_ ncol()
const {
205 bool is_sparse()
const {
209 double is_sparse_proportion()
const {
213 bool prefer_rows()
const {
217 double prefer_rows_proportion()
const {
218 return static_cast<double>(my_csr);
221 bool uses_oracle(
bool)
const {
233 CompressedSparseMatrix_internal::MatrixDetails<Index_> details()
const {
234 return CompressedSparseMatrix_internal::MatrixDetails<Index_>(
238 (my_csr ? my_nrow : my_ncol),
239 (my_csr ? my_ncol : my_nrow),
247 template<
bool oracle_>
250 return std::make_unique<CompressedSparseMatrix_internal::PrimaryFullDense<oracle_, Value_, Index_, CachedValue_, CachedIndex_> >(
251 details(), std::move(oracle)
254 return std::make_unique<CompressedSparseMatrix_internal::SecondaryFullDense<oracle_, Value_, Index_, CachedValue_> >(
255 details(), std::move(oracle)
260 template<
bool oracle_>
263 return std::make_unique<CompressedSparseMatrix_internal::PrimaryBlockDense<oracle_, Value_, Index_, CachedValue_, CachedIndex_> >(
264 details(), std::move(oracle), block_start, block_length
267 return std::make_unique<CompressedSparseMatrix_internal::SecondaryBlockDense<oracle_, Value_, Index_, CachedValue_> >(
268 details(), std::move(oracle), block_start, block_length
273 template<
bool oracle_>
276 return std::make_unique<CompressedSparseMatrix_internal::PrimaryIndexDense<oracle_, Value_, Index_, CachedValue_, CachedIndex_> >(
277 details(), std::move(oracle), std::move(indices_ptr)
280 return std::make_unique<CompressedSparseMatrix_internal::SecondaryIndexDense<oracle_, Value_, Index_, CachedValue_> >(
281 details(), std::move(oracle), std::move(indices_ptr)
287 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row,
const tatami::Options& opt)
const {
288 return populate_dense<false>(row,
false, opt);
291 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
292 return populate_dense<false>(row,
false, block_start, block_length, opt);
296 return populate_dense<false>(row,
false, std::move(indices_ptr), opt);
303 template<
bool oracle_>
306 return std::make_unique<CompressedSparseMatrix_internal::PrimaryFullSparse<oracle_, Value_, Index_, CachedValue_, CachedIndex_> >(
310 return std::make_unique<CompressedSparseMatrix_internal::SecondaryFullSparse<oracle_, Value_, Index_, CachedValue_> >(
316 template<
bool oracle_>
319 return std::make_unique<CompressedSparseMatrix_internal::PrimaryBlockSparse<oracle_, Value_, Index_, CachedValue_, CachedIndex_> >(
323 return std::make_unique<CompressedSparseMatrix_internal::SecondaryBlockSparse<oracle_, Value_, Index_, CachedValue_> >(
329 template<
bool oracle_>
332 return std::make_unique<CompressedSparseMatrix_internal::PrimaryIndexSparse<oracle_, Value_, Index_, CachedValue_, CachedIndex_> >(
336 return std::make_unique<CompressedSparseMatrix_internal::SecondaryIndexSparse<oracle_, Value_, Index_, CachedValue_> >(
343 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row,
const tatami::Options& opt)
const {
344 return populate_sparse<false>(row,
false, opt);
347 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
348 return populate_sparse<false>(row,
false, block_start, block_length, opt);
352 return populate_sparse<false>(row,
false, std::move(indices_ptr), opt);
360 return populate_dense<true>(row, std::move(oracle), opt);
363 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
bool row, std::shared_ptr<
const tatami::Oracle<Index_> > oracle, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
364 return populate_dense<true>(row, std::move(oracle), block_start, block_length, opt);
368 return populate_dense<true>(row, std::move(oracle), std::move(indices_ptr), opt);
376 return populate_sparse<true>(row, std::move(oracle), opt);
379 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
bool row, std::shared_ptr<
const tatami::Oracle<Index_> > oracle, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
380 return populate_sparse<true>(row, std::move(oracle), block_start, block_length, opt);
384 return populate_sparse<true>(row, std::move(oracle), std::move(indices_ptr), opt);