SeqAn3 3.3.0-rc.1
The Modern C++ library for sequence analysis.
 
Loading...
Searching...
No Matches
wuss.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <cmath>
16#include <limits>
17#include <vector>
18
22
23// ------------------------------------------------------------------
24// wuss
25// ------------------------------------------------------------------
26
27namespace seqan3
28{
29
59template <uint8_t SIZE = 51>
60class wuss : public alphabet_base<wuss<SIZE>, SIZE>
61{
62 static_assert(SIZE >= 15 && SIZE <= 67 && SIZE % 2 == 1,
63 "The wuss<> alphabet size must be an odd number in range 15..67.");
64
65private:
67 using base_t = alphabet_base<wuss<SIZE>, SIZE>;
68
70 friend base_t;
71
72protected:
73 using typename base_t::char_type;
74 using typename base_t::rank_type;
75
76public:
78 using base_t::to_char;
79 using base_t::to_rank;
80
84 constexpr wuss() noexcept = default;
85 constexpr wuss(wuss const &) noexcept = default;
86 constexpr wuss(wuss &&) noexcept = default;
87 constexpr wuss & operator=(wuss const &) noexcept = default;
88 constexpr wuss & operator=(wuss &&) noexcept = default;
89 ~wuss() noexcept = default;
90
92
101 constexpr bool is_pair_open() const noexcept
102 {
103 return interaction_tab[to_rank()] < 0;
104 }
105
111 constexpr bool is_pair_close() const noexcept
112 {
113 return interaction_tab[to_rank()] > 0;
114 }
115
121 constexpr bool is_unpaired() const noexcept
122 {
123 return interaction_tab[to_rank()] == 0;
124 }
125
131 // formula: (alphabet size - 7 unpaired characters) / 2, as every bracket exists as opening/closing pair
132 static constexpr uint8_t max_pseudoknot_depth{static_cast<uint8_t>((alphabet_size - 7) / 2)};
133
142 constexpr std::optional<uint8_t> pseudoknot_id() const noexcept
143 {
144 if (interaction_tab[to_rank()] != 0)
145 return std::abs(interaction_tab[to_rank()]) - 1;
146 else
147 return std::nullopt; // unpaired
148 }
150
151private:
153 static constexpr char_type rank_to_char(rank_type const rank)
154 {
155 return rank_to_char_table[rank];
156 }
157
159 static constexpr rank_type char_to_rank(char_type const chr)
160 {
161 using index_t = std::make_unsigned_t<char_type>;
162 return char_to_rank_table[static_cast<index_t>(chr)];
163 }
164
165 // clang-format off
167 static constexpr std::array<char_type, alphabet_size> rank_to_char_table
168 {
169 []() constexpr {
170 std::array<char_type, alphabet_size> chars{'.', ':', ',', '-', '_', '~', ';', '<', '(', '[', '{', '>', ')',
171 ']', '}'};
172
173 // pseudoknot letters
174 for (rank_type rnk = 15u; rnk + 1u < alphabet_size; rnk += 2u)
175 {
176 char_type const off = static_cast<char_type>((rnk - 15u) / 2u);
177 chars[rnk] = 'A' + off;
178 chars[rnk + 1u] = 'a' + off;
179 }
180
181 return chars;
182 }()
183 };
184
186 static constexpr std::array<rank_type, 256> char_to_rank_table
187 {
188 []() constexpr {
189 std::array<rank_type, 256> rank_table{};
190
191 rank_table.fill(6u);
192
193 // set alphabet values
194 for (rank_type rnk = 0u; rnk < alphabet_size; ++rnk)
195 rank_table[rank_to_char_table[rnk]] = rnk;
196
197 return rank_table;
198 }()
199 };
200
204 static constexpr std::array<int8_t, SIZE> interaction_tab
205 {
206 []() constexpr {
207 static_assert(static_cast<int16_t>(std::numeric_limits<int8_t>::max()) >= SIZE);
208 static_assert(- static_cast<int16_t>(std::numeric_limits<int8_t>::min()) >= SIZE);
209
210 std::array<int8_t, alphabet_size> interaction_table{};
211 int8_t cnt_open = 0;
212 int8_t cnt_close = 0;
213
214 for (rank_type rnk = 0u; rnk <= 6u; ++rnk)
215 interaction_table[rnk] = 0;
216
217 for (rank_type rnk = 7u; rnk <= 10u; ++rnk)
218 interaction_table[rnk] = --cnt_open;
219
220 for (rank_type rnk = 11u; rnk <= 14u; ++rnk)
221 interaction_table[rnk] = ++cnt_close;
222
223 for (rank_type rnk = 15u; rnk + 1u < alphabet_size; rnk += 2u)
224 {
225 interaction_table[rnk] = --cnt_open;
226 interaction_table[rnk + 1u] = ++cnt_close;
227 }
228
229 return interaction_table;
230 }()
231 };
232};
233// clang-format on
234
239
240inline namespace literals
241{
242
256constexpr wuss51 operator""_wuss51(char const ch) noexcept
257{
258 return wuss51{}.assign_char(ch);
259}
260
272SEQAN3_WORKAROUND_LITERAL std::vector<wuss51> operator""_wuss51(char const * str, std::size_t len)
273{
275 vec.resize(len);
276
277 for (size_t idx = 0ul; idx < len; ++idx)
278 vec[idx].assign_char(str[idx]);
279
280 return vec;
281}
283
284} // namespace literals
285
286} // namespace seqan3
Provides seqan3::rna_structure_alphabet.
Provides seqan3::alphabet_base.
A CRTP-base that makes defining a custom alphabet easier.
Definition: alphabet_base.hpp:57
constexpr derived_type & assign_char(char_type const chr) noexcept
Assign from a character, implicitly converts invalid characters.
Definition: alphabet_base.hpp:163
constexpr rank_type to_rank() const noexcept
Return the letter's numeric value (rank in the alphabet).
Definition: alphabet_base.hpp:137
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition: alphabet_base.hpp:80
static constexpr detail::min_viable_uint_t< size > alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: alphabet_base.hpp:199
constexpr char_type to_char() const noexcept
Return the letter as a character of char_type.
Definition: alphabet_base.hpp:115
std::conditional_t< std::same_as< char_t, void >, char, char_t > char_type
The char representation; conditional needed to make semi alphabet definitions legal.
Definition: alphabet_base.hpp:72
The WUSS structure alphabet of the characters .<>:,-_~;()[]{}AaBbCcDd...
Definition: wuss.hpp:61
static constexpr uint8_t max_pseudoknot_depth
The ability of this alphabet to represent pseudoknots, i.e. crossing interactions,...
Definition: wuss.hpp:132
constexpr bool is_pair_close() const noexcept
Check whether the character represents a leftward interaction in an RNA structure.
Definition: wuss.hpp:111
constexpr bool is_pair_open() const noexcept
Check whether the character represents a rightward interaction in an RNA structure.
Definition: wuss.hpp:101
constexpr std::optional< uint8_t > pseudoknot_id() const noexcept
Get an identifier for a pseudoknotted interaction, where opening and closing brackets of the same typ...
Definition: wuss.hpp:142
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition: alphabet_base.hpp:80
static constexpr detail::min_viable_uint_t< size > alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: alphabet_base.hpp:199
constexpr wuss() noexcept=default
Defaulted.
constexpr bool is_unpaired() const noexcept
Check whether the character represents an unpaired position in an RNA structure.
Definition: wuss.hpp:121
std::conditional_t< std::same_as< char_t, void >, char, char_t > char_type
The char representation; conditional needed to make semi alphabet definitions legal.
Definition: alphabet_base.hpp:72
T fill(T... args)
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: alphabet/concept.hpp:155
T max(T... args)
T min(T... args)
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
@ off
Automatic update notifications should be disabled.
#define SEQAN3_WORKAROUND_LITERAL
Our char literals returning std::vector should be constexpr if constexpr std::vector is supported.
Definition: platform.hpp:282
T resize(T... args)
Provides utilities for modifying characters.