SeqAn3 3.3.0-rc.1
The Modern C++ library for sequence analysis.
 
Loading...
Searching...
No Matches
aa20.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <vector>
16
20
21namespace seqan3
22{
23
63class aa20 : public aminoacid_base<aa20, 20>
64{
65private:
68
70 friend base_t;
73 friend base_t::base_t;
75
76public:
80 constexpr aa20() noexcept = default;
81 constexpr aa20(aa20 const &) noexcept = default;
82 constexpr aa20(aa20 &&) noexcept = default;
83 constexpr aa20 & operator=(aa20 const &) noexcept = default;
84 constexpr aa20 & operator=(aa20 &&) noexcept = default;
85 ~aa20() noexcept = default;
86
87 using base_t::base_t;
89
90private:
92 static constexpr char_type rank_to_char_table[alphabet_size]{'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
93 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'};
94
96 static constexpr char_type rank_to_char(rank_type const rank)
97 {
98 return rank_to_char_table[rank];
99 }
100
102 static constexpr rank_type char_to_rank(char_type const chr)
103 {
104 using index_t = std::make_unsigned_t<char_type>;
105 return char_to_rank_table[static_cast<index_t>(chr)];
106 }
107
108 // clang-format off
110 static constexpr std::array<rank_type, 256> char_to_rank_table
111 {
112 []() constexpr {
114
115 // initialize with 'S' because that appears most frequently
116 ret.fill(15u);
117
118 // reverse mapping for characters and their lowercase
119 for (rank_type rnk = 0u; rnk < alphabet_size; ++rnk)
120 {
121 ret[static_cast<rank_type>(rank_to_char_table[rnk])] = rnk;
122 ret[static_cast<rank_type>(to_lower(rank_to_char_table[rnk]))] = rnk;
123 }
124
125 ret['B'] = ret['D'];
126 ret['b'] = ret['D']; // Convert b (either D/N) to D, since D occurs more frequently.
127 ret['J'] = ret['L'];
128 ret['j'] = ret['L']; // Convert j (either I/L) to L, since L occurs more frequently.
129 ret['O'] = ret['L'];
130 ret['o'] = ret['L']; // Convert Pyrrolysine to lysine.
131 ret['U'] = ret['C'];
132 ret['u'] = ret['C']; // Convert Selenocysteine to cysteine.
133 ret['X'] = ret['S'];
134 ret['x'] = ret['S']; // Convert unknown amino acids to serine.
135 ret['Z'] = ret['E'];
136 ret['z'] = ret['E']; // Convert z (either E/Q) to E, since E occurs more frequently.
137 ret['*'] = ret['W']; // The most common stop codon is UGA. This is most similar to a Tryptophan.
138 return ret;
139 }()
140 };
141};
142// clang-format on
143
144// ------------------------------------------------------------------
145// containers
146// ------------------------------------------------------------------
147
154
155// ------------------------------------------------------------------
156// literals
157// ------------------------------------------------------------------
158inline namespace literals
159{
160
174constexpr aa20 operator""_aa20(char const c) noexcept
175{
176 return aa20{}.assign_char(c);
177}
178
190SEQAN3_WORKAROUND_LITERAL aa20_vector operator""_aa20(char const * const s, size_t const n)
191{
192 aa20_vector r;
193 r.resize(n);
194
195 for (size_t i = 0; i < n; ++i)
196 r[i].assign_char(s[i]);
197
198 return r;
199}
201
202} // namespace literals
203
204} // namespace seqan3
Provides seqan3::aminoacid_alphabet.
Provides seqan3::aminoacid_base.
The canonical amino acid alphabet..
Definition: aa20.hpp:64
constexpr aa20() noexcept=default
Defaulted.
constexpr derived_type & assign_char(char_type const chr) noexcept
Assign from a character, implicitly converts invalid characters.
Definition: alphabet_base.hpp:163
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition: alphabet_base.hpp:80
static constexpr detail::min_viable_uint_t< size > alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: alphabet_base.hpp:199
std::conditional_t< std::same_as< char_t, void >, char, char_t > char_type
The char representation; conditional needed to make semi alphabet definitions legal.
Definition: alphabet_base.hpp:72
A CRTP-base that refines seqan3::alphabet_base and is used by the amino acids.
Definition: aminoacid_base.hpp:32
T fill(T... args)
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
constexpr char_type to_lower(char_type const c) noexcept
Converts 'A'-'Z' to 'a'-'z' respectively; other characters are returned as is.
Definition: transform.hpp:83
#define SEQAN3_WORKAROUND_LITERAL
Our char literals returning std::vector should be constexpr if constexpr std::vector is supported.
Definition: platform.hpp:282
T resize(T... args)
Provides utilities for modifying characters.