Raptor 3.0.0-rc.1
A fast and space-efficient pre-filter for querying very large collections of nucleotide sequences
 
index.hpp
Go to the documentation of this file.
1// --------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md
6// --------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <sharg/exceptions.hpp>
16
20
21namespace raptor
22{
23
24namespace index_structure
25{
26
27using ibf = seqan3::interleaved_bloom_filter<seqan3::data_layout::uncompressed>;
28using ibf_compressed = seqan3::interleaved_bloom_filter<seqan3::data_layout::compressed>;
29using hibf = hierarchical_interleaved_bloom_filter<seqan3::data_layout::uncompressed>;
30using hibf_compressed = hierarchical_interleaved_bloom_filter<seqan3::data_layout::compressed>;
31
32template <typename return_t, typename input_t>
33concept compressible_from = (std::same_as<return_t, ibf_compressed> && std::same_as<input_t, ibf>)
34 || (std::same_as<return_t, hibf_compressed> && std::same_as<input_t, hibf>);
35
36template <typename index_t>
37concept is_ibf = std::same_as<index_t, index_structure::ibf> || std::same_as<index_t, index_structure::ibf_compressed>;
38
39template <typename index_t>
40concept is_hibf =
41 std::same_as<index_t, index_structure::hibf> || std::same_as<index_t, index_structure::hibf_compressed>;
42
43template <typename index_t>
45 std::same_as<index_t, index_structure::ibf_compressed> || std::same_as<index_t, index_structure::hibf_compressed>;
46
47} // namespace index_structure
48
49template <seqan3::data_layout data_layout_mode_>
50class index_upgrader;
51
52template <typename data_t = index_structure::ibf>
54{
55private:
56 template <typename friend_data_t>
57 friend class raptor_index;
58
59 uint64_t window_size_{};
60 seqan3::shape shape_{};
61 uint8_t parts_{};
64 double fpr_{};
66 data_t ibf_{};
67
68public:
69 static constexpr seqan3::data_layout data_layout_mode = data_t::data_layout_mode;
70 static constexpr uint32_t version{2u};
71
72 raptor_index() = default;
73 raptor_index(raptor_index const &) = default;
74 raptor_index(raptor_index &&) = default;
75 raptor_index & operator=(raptor_index const &) = default;
76 raptor_index & operator=(raptor_index &&) = default;
77 ~raptor_index() = default;
78
79 explicit raptor_index(window const window_size,
80 seqan3::shape const shape,
81 uint8_t const parts,
82 bool const compressed,
83 std::vector<std::vector<std::string>> const & bin_path,
84 double const fpr,
85 data_t && ibf) :
86 window_size_{window_size.v},
87 shape_{shape},
88 parts_{parts},
89 compressed_{compressed},
90 bin_path_{bin_path},
91 fpr_{fpr},
92 ibf_{std::move(ibf)}
93 {}
94
95 explicit raptor_index(build_arguments const & arguments) :
96 window_size_{arguments.window_size},
97 shape_{arguments.shape},
98 parts_{arguments.parts},
99 compressed_{arguments.compressed},
100 bin_path_{arguments.bin_path},
101 fpr_{arguments.fpr},
102 ibf_{seqan3::bin_count{arguments.bins},
103 seqan3::bin_size{arguments.bits / arguments.parts},
104 seqan3::hash_function_count{arguments.hash}}
105 {
106 static_assert(data_layout_mode == seqan3::data_layout::uncompressed);
107 }
108
109 template <typename other_data_t>
110 explicit raptor_index(raptor_index<other_data_t> const & other)
111 {
113 window_size_ = other.window_size_;
114 shape_ = other.shape_;
115 parts_ = other.parts_;
116 compressed_ = true;
117 bin_path_ = other.bin_path_;
118 ibf_ = data_t{other.ibf_};
119 fpr_ = other.fpr_;
120 }
121
122 template <typename other_data_t>
124 {
126 window_size_ = std::move(other.window_size_);
127 shape_ = std::move(other.shape_);
128 parts_ = std::move(other.parts_);
129 compressed_ = true;
130 bin_path_ = std::move(other.bin_path_);
131 fpr_ = std::move(other.fpr_);
132 ibf_ = std::move(data_t{std::move(other.ibf_)});
133 }
134
135 uint64_t window_size() const
136 {
137 return window_size_;
138 }
139
140 seqan3::shape shape() const
141 {
142 return shape_;
143 }
144
145 uint8_t parts() const
146 {
147 return parts_;
148 }
149
150 bool compressed() const
151 {
152 return compressed_;
153 }
154
155 std::vector<std::vector<std::string>> const & bin_path() const
156 {
157 return bin_path_;
158 }
159
160 double fpr() const
161 {
162 return fpr_;
163 }
164
165 bool is_hibf() const
166 {
167 return is_hibf_;
168 }
169
170 data_t & ibf()
171 {
172 return ibf_;
173 }
174
175 data_t const & ibf() const
176 {
177 return ibf_;
178 }
179
188 template <seqan3::cereal_archive archive_t>
189 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
190 {
191 uint32_t parsed_version{raptor_index<>::version};
192 archive(parsed_version);
193 if (parsed_version == raptor_index<>::version)
194 {
195 try
196 {
197 archive(window_size_);
198 archive(shape_);
199 archive(parts_);
200 bool const type_is_compressed{compressed_};
201 archive(compressed_);
202 if (type_is_compressed != compressed_)
203 throw sharg::parser_error{"Data layouts of serialised and specified index differ."};
204 archive(bin_path_);
205 archive(fpr_);
206 archive(is_hibf_);
207 archive(ibf_);
208 }
209 catch (std::exception const & e)
210 {
211 throw sharg::parser_error{"Cannot read index: " + std::string{e.what()}};
212 }
213 }
214 else
215 {
216 throw sharg::parser_error{"Unsupported index version. Check raptor upgrade."}; // GCOVR_EXCL_LINE
217 }
218 }
219
220 /* \brief Serialisation support function. Do not load the actual data.
221 * \tparam archive_t Type of `archive`; must satisfy seqan3::cereal_input_archive.
222 * \param[in] archive The archive being serialised from/to.
223 * \param[in] version Index version.
224 *
225 * \attention These functions are never called directly.
226 * \sa https://docs.seqan.de/seqan/3.2.0/group__io.html#serialisation
227 */
228 template <seqan3::cereal_input_archive archive_t>
229 void load_parameters(archive_t & archive)
230 {
231 uint32_t parsed_version{};
232 archive(parsed_version);
233 if (parsed_version == version)
234 {
235 try
236 {
237 archive(window_size_);
238 archive(shape_);
239 archive(parts_);
240 archive(compressed_);
241 archive(bin_path_);
242 archive(fpr_);
243 archive(is_hibf_);
244 }
245 // GCOVR_EXCL_START
246 catch (std::exception const & e)
247 {
248 throw sharg::parser_error{"Cannot read index: " + std::string{e.what()}};
249 }
250 // GCOVR_EXCL_STOP
251 }
252 else
253 {
254 throw sharg::parser_error{"Unsupported index version. Check raptor upgrade."}; // GCOVR_EXCL_LINE
255 }
256 }
257
259 template <seqan3::cereal_input_archive archive_t>
260 void load_old_parameters(archive_t & archive)
261 {
262 uint32_t parsed_version{};
263 archive(parsed_version);
264 if (parsed_version == 1u)
265 {
266 try
267 {
268 archive(window_size_);
269 archive(shape_);
270 archive(parts_);
271 archive(compressed_);
272 archive(bin_path_);
273 }
274 // GCOVR_EXCL_START
275 catch (std::exception const & e)
276 {
277 throw sharg::parser_error{"Cannot read index: " + std::string{e.what()}};
278 }
279 // GCOVR_EXCL_STOP
280 }
281 else
282 {
283 throw sharg::parser_error{"Unsupported index version. Use Raptor 2.0's upgrade first."}; // LCOV_EXCL_LINE
284 }
285 }
287
288private:
289 template <seqan3::data_layout data_layout_mode_>
290 friend class index_upgrader;
291
294 template <seqan3::cereal_archive archive_t>
295 void load_old_index(archive_t & archive)
296 {
297 uint32_t parsed_version{};
298 archive(parsed_version);
299 if (parsed_version == 1u)
300 {
301 try
302 {
303 archive(window_size_);
304 archive(shape_);
305 archive(parts_);
306 archive(compressed_);
307 archive(bin_path_);
308 archive(ibf_);
309 }
310 // GCOVR_EXCL_START
311 catch (std::exception const & e)
312 {
313 throw sharg::parser_error{"Cannot read index: " + std::string{e.what()}};
314 }
315 // GCOVR_EXCL_STOP
316 }
317 else
318 {
319 throw sharg::parser_error{"Unsupported index version. Use Raptor 2.0's upgrade first."}; // LCOV_EXCL_LINE
320 }
321 }
323};
324
325} // namespace raptor
Provides raptor::build_arguments.
Definition: index_upgrader.hpp:23
Definition: index.hpp:54
void load_old_parameters(archive_t &archive)
Load parameters from old index format for use with raptor upgrade.
Definition: index.hpp:260
void load_old_index(archive_t &archive)
Load old index format for use with raptor upgrade.
Definition: index.hpp:295
Definition: index.hpp:40
Definition: index.hpp:37
Provides raptor::hierarchical_interleaved_bloom_filter.
Provides raptor::window.
Definition: build_arguments.hpp:28
Strong type for passing the window size.
Definition: strong_types.hpp:22
T what(T... args)