SeqAn3 3.3.0-rc.1
The Modern C++ library for sequence analysis.
 
Loading...
Searching...
No Matches
io/sam_file/detail/cigar.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <algorithm>
16#include <concepts>
17#include <ranges>
18#include <seqan3/std/charconv>
19#include <sstream>
20
29
30namespace seqan3::detail
31{
34struct view_equality_fn
35{
37 template <std::ranges::forward_range rng1_type, std::ranges::forward_range rng2_type>
38 constexpr bool operator()(rng1_type && rng1, rng2_type && rng2) const
39 {
40 return std::ranges::equal(rng1, rng2);
41 }
42};
43
51inline void update_alignment_lengths(int32_t & ref_length,
52 int32_t & seq_length,
53 char const cigar_operation,
54 uint32_t const cigar_count)
55{
56 switch (cigar_operation)
57 {
58 case 'M':
59 case '=':
60 case 'X':
61 ref_length += cigar_count, seq_length += cigar_count;
62 break;
63 case 'D':
64 case 'N':
65 ref_length += cigar_count;
66 break;
67 case 'I':
68 seq_length += cigar_count;
69 break;
70 case 'S':
71 case 'H':
72 case 'P':
73 break; // no op (soft-clipping or padding does not increase either length)
74 default:
75 throw format_error{"Illegal cigar operation: " + std::string{cigar_operation}};
76 }
77}
78
91{
92 std::vector<seqan3::cigar> cigar_vector{};
93
94 if (cigar_str == "*")
95 return cigar_vector;
96
97 uint32_t cigar_count{};
98 char const * ptr = cigar_str.data();
99 char const * const end = ptr + cigar_str.size();
100
101 while (ptr < end)
102 {
103 auto const res = std::from_chars(ptr, end, cigar_count); // reads number up to next character
104
105 if (res.ec != std::errc{})
106 throw format_error{"Corrupted cigar string."};
107
108 ptr = res.ptr + 1; // skip cigar operation character
109
110 cigar_vector.emplace_back(cigar_count, seqan3::assign_char_strictly_to(*res.ptr, seqan3::cigar::operation{}));
111 }
112
113 return cigar_vector;
114}
115
121[[nodiscard]] inline std::string get_cigar_string(std::vector<cigar> const & cigar_vector)
122{
123 std::string result{};
124 std::ranges::for_each(cigar_vector,
125 [&result](auto & cig)
126 {
127 result.append(static_cast<std::string_view>(cig.to_string()));
128 });
129 return result;
130}
131
165template <seqan3::aligned_sequence ref_seq_type, seqan3::aligned_sequence query_seq_type>
166[[nodiscard]] inline std::string get_cigar_string(ref_seq_type && ref_seq,
167 query_seq_type && query_seq,
168 uint32_t const query_start_pos = 0,
169 uint32_t const query_end_pos = 0,
170 bool const extended_cigar = false)
171{
172 return get_cigar_string(std::tie(ref_seq, query_seq), query_start_pos, query_end_pos, extended_cigar);
173}
174
177struct access_restrictor_fn
178{
180 template <typename chr_t>
181 [[noreturn]] chr_t operator()(chr_t) const
182 {
183 throw std::logic_error{"Access is not allowed because there is no sequence information."};
184 }
185};
186
187} // namespace seqan3::detail
Provides the seqan3::cigar alphabet.
The <charconv> header from C++17's standard library.
The actual implementation of seqan3::cigar::operation for documentation purposes only....
Definition: cigar_operation.hpp:48
T data(T... args)
T end(T... args)
T equal(T... args)
T for_each(T... args)
T from_chars(T... args)
constexpr auto assign_char_strictly_to
Assign a character to an alphabet object, throw if the character is not valid.
Definition: alphabet/concept.hpp:734
@ ref_seq
The (reference) "sequence" information, usually a range of nucleotides or amino acids.
Provides seqan3::detail::pairwise_alignment and seqan3::detail::writable_pairwise_alignment.
#define SEQAN3_WORKAROUND_LITERAL
Our char literals returning std::vector should be constexpr if constexpr std::vector is supported.
Definition: platform.hpp:282
Provides character predicates for tokenisation.
Provides seqan3::views::single_pass_input.
T size(T... args)
Provides seqan3::views::take_until and seqan3::views::take_until_or_throw.
T tie(T... args)
Auxiliary for pretty printing of exception messages.
Provides seqan3::tuple_like.
Provides seqan3::views::zip.