72 Index_ my_nrow, my_ncol;
73 std::string my_file_name, my_value_name, my_index_name;
74 std::vector<hsize_t> pointers;
78 size_t my_slab_cache_size;
79 size_t my_max_non_zeros;
80 size_t my_chunk_cache_size;
99 my_file_name(std::move(file_name)),
100 my_value_name(std::move(value_name)),
101 my_index_name(std::move(index_name)),
103 my_slab_cache_size(options.maximum_cache_size)
106 H5::H5File file_handle(my_file_name, H5F_ACC_RDONLY);
107 auto dhandle = open_and_check_dataset<false>(file_handle, my_value_name);
108 hsize_t nonzeros = get_array_dimensions<1>(dhandle,
"value_name")[0];
110 auto ihandle = open_and_check_dataset<true>(file_handle, my_index_name);
111 if (get_array_dimensions<1>(ihandle,
"index_name")[0] != nonzeros) {
112 throw std::runtime_error(
"number of non-zero elements is not consistent between 'value_name' and 'index_name'");
115 auto phandle = open_and_check_dataset<true>(file_handle, pointer_name);
116 size_t ptr_size = get_array_dimensions<1>(phandle,
"pointer_name")[0];
117 size_t dim_p1 =
static_cast<size_t>(my_csr ? my_nrow : my_ncol) + 1;
118 if (ptr_size != dim_p1) {
119 throw std::runtime_error(
"'pointer_name' dataset should have length equal to the number of " + (my_csr ? std::string(
"rows") : std::string(
"columns")) +
" plus 1");
129 hsize_t dchunk_length = 0;
130 size_t dchunk_element_size = 0;
131 auto dparms = dhandle.getCreatePlist();
132 if (dparms.getLayout() == H5D_CHUNKED) {
133 dparms.getChunk(1, &dchunk_length);
134 dchunk_element_size = dhandle.getDataType().getSize();
137 hsize_t ichunk_length = 0;
138 size_t ichunk_element_size = 0;
139 auto iparms = ihandle.getCreatePlist();
140 if (iparms.getLayout() == H5D_CHUNKED) {
141 iparms.getChunk(1, &ichunk_length);
142 ichunk_element_size = ihandle.getDataType().getSize();
145 auto non_overflow_double_min = [nonzeros](hsize_t chunk_length) ->
size_t {
148 if (chunk_length < nonzeros) {
151 return chunk_length + std::min(chunk_length, nonzeros - chunk_length);
155 my_chunk_cache_size = std::max(
156 non_overflow_double_min(ichunk_length) * ichunk_element_size,
157 non_overflow_double_min(dchunk_length) * dchunk_element_size
161 pointers.resize(dim_p1);
162 phandle.read(pointers.data(), H5::PredType::NATIVE_HSIZE);
163 if (pointers[0] != 0) {
164 throw std::runtime_error(
"first index pointer should be zero");
166 if (pointers.back() != nonzeros) {
167 throw std::runtime_error(
"last index pointer should be equal to the number of non-zero elements");
171 my_max_non_zeros = 0;
172 for (
size_t i = 1; i < pointers.size(); ++i) {
173 hsize_t diff = pointers[i] - pointers[i-1];
174 if (diff > my_max_non_zeros) {
175 my_max_non_zeros = diff;
190 CompressedSparseMatrix(Index_ ncsr, Index_ ncol, std::string file_name, std::string value_name, std::string index_name, std::string pointer_name,
bool csr) :
194 Index_ nrow()
const {
198 Index_ ncol()
const {
202 bool is_sparse()
const {
206 double is_sparse_proportion()
const {
210 bool prefer_rows()
const {
214 double prefer_rows_proportion()
const {
215 return static_cast<double>(my_csr);
218 bool uses_oracle(
bool)
const {
230 CompressedSparseMatrix_internal::MatrixDetails<Index_> details()
const {
231 return CompressedSparseMatrix_internal::MatrixDetails<Index_>(
235 (my_csr ? my_nrow : my_ncol),
236 (my_csr ? my_ncol : my_nrow),
244 template<
bool oracle_>
247 return std::make_unique<CompressedSparseMatrix_internal::PrimaryFullDense<oracle_, Value_, Index_, CachedValue_, CachedIndex_> >(
248 details(), std::move(oracle)
251 return std::make_unique<CompressedSparseMatrix_internal::SecondaryFullDense<oracle_, Value_, Index_, CachedValue_> >(
252 details(), std::move(oracle)
257 template<
bool oracle_>
260 return std::make_unique<CompressedSparseMatrix_internal::PrimaryBlockDense<oracle_, Value_, Index_, CachedValue_, CachedIndex_> >(
261 details(), std::move(oracle), block_start, block_length
264 return std::make_unique<CompressedSparseMatrix_internal::SecondaryBlockDense<oracle_, Value_, Index_, CachedValue_> >(
265 details(), std::move(oracle), block_start, block_length
270 template<
bool oracle_>
273 return std::make_unique<CompressedSparseMatrix_internal::PrimaryIndexDense<oracle_, Value_, Index_, CachedValue_, CachedIndex_> >(
274 details(), std::move(oracle), std::move(indices_ptr)
277 return std::make_unique<CompressedSparseMatrix_internal::SecondaryIndexDense<oracle_, Value_, Index_, CachedValue_> >(
278 details(), std::move(oracle), std::move(indices_ptr)
284 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row,
const tatami::Options& opt)
const {
285 return populate_dense<false>(row,
false, opt);
288 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
289 return populate_dense<false>(row,
false, block_start, block_length, opt);
293 return populate_dense<false>(row,
false, std::move(indices_ptr), opt);
300 template<
bool oracle_>
303 return std::make_unique<CompressedSparseMatrix_internal::PrimaryFullSparse<oracle_, Value_, Index_, CachedValue_, CachedIndex_> >(
307 return std::make_unique<CompressedSparseMatrix_internal::SecondaryFullSparse<oracle_, Value_, Index_, CachedValue_> >(
313 template<
bool oracle_>
316 return std::make_unique<CompressedSparseMatrix_internal::PrimaryBlockSparse<oracle_, Value_, Index_, CachedValue_, CachedIndex_> >(
320 return std::make_unique<CompressedSparseMatrix_internal::SecondaryBlockSparse<oracle_, Value_, Index_, CachedValue_> >(
326 template<
bool oracle_>
329 return std::make_unique<CompressedSparseMatrix_internal::PrimaryIndexSparse<oracle_, Value_, Index_, CachedValue_, CachedIndex_> >(
333 return std::make_unique<CompressedSparseMatrix_internal::SecondaryIndexSparse<oracle_, Value_, Index_, CachedValue_> >(
340 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row,
const tatami::Options& opt)
const {
341 return populate_sparse<false>(row,
false, opt);
344 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
345 return populate_sparse<false>(row,
false, block_start, block_length, opt);
349 return populate_sparse<false>(row,
false, std::move(indices_ptr), opt);
357 return populate_dense<true>(row, std::move(oracle), opt);
360 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
bool row, std::shared_ptr<
const tatami::Oracle<Index_> > oracle, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
361 return populate_dense<true>(row, std::move(oracle), block_start, block_length, opt);
365 return populate_dense<true>(row, std::move(oracle), std::move(indices_ptr), opt);
373 return populate_sparse<true>(row, std::move(oracle), opt);
376 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
bool row, std::shared_ptr<
const tatami::Oracle<Index_> > oracle, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
377 return populate_sparse<true>(row, std::move(oracle), block_start, block_length, opt);
381 return populate_sparse<true>(row, std::move(oracle), std::move(indices_ptr), opt);