Home | Libraries | People | FAQ | More |
boost::genetics::basic_dna_string — This class stores DNA strings compactly allowing 32 or more bases to be accessed in a single instruction.
// In header: <boost/genetics/dna_string.hpp> template<typename Traits> class basic_dna_string { public: // types typedef Traits::DnaArrayType array_type; typedef basic_dna_string< Traits > this_type; typedef Traits::DnaWordType word_type; // construct/copy/destruct basic_dna_string(); basic_dna_string(size_t); template<typename InIter> basic_dna_string(InIter, InIter); template<typename StrChar, typename StrTraits, typename StrAllocator> basic_dna_string(const std::basic_string< StrChar, StrTraits, StrAllocator > &, size_t = 0, size_t = ~(size_t) 0); template<typename charT> basic_dna_string(const charT *, size_t = 0, size_t = ~(size_t) 0); template<typename Mapper> basic_dna_string(Mapper &, typename Mapper::is_mapper * = 0); // public member functions void append(const char *); template<typename InIter> void append(InIter, InIter); int compare(size_t, size_t, const basic_dna_string &) const; template<typename CiTraits> int compare_inexact(size_t, size_t, const basic_dna_string< CiTraits > &, size_t = 0) const; size_t find(const this_type &, size_t = 0, size_t = ~(size_t) 0) const; template<typename String> size_t find_inexact(const String &, size_t = 0, size_t = ~(size_t) 0, size_t = 0) const; int get_code(size_t) const; word_type get_index(size_t, size_t) const; const array_type & get_values() const; operator std::string() const; bool operator!=(const basic_dna_string &) const; bool operator<(const basic_dna_string &) const; bool operator<=(const basic_dna_string &) const; bool operator==(const basic_dna_string &) const; bool operator>(const basic_dna_string &) const; bool operator>=(const basic_dna_string &) const; char operator[](size_t) const; void reserve(size_t); void resize(size_t); size_t size() const; basic_dna_string substr(size_t = 0, size_t = ~(size_t) 0, bool = false) const; void swap(basic_dna_string &); word_type window(size_t) const; void write_binary(writer &) const; // private member functions template<typename String, bool cpu_has_popcnt> size_t inexact_search(const String &, size_t, size_t, word_type, word_type, size_t, size_t, size_t) const; // public data members static const size_t bases_per_value; static const size_t npos; };
Like many of the container classes in this library it can be specialised into a standard (std::vector
) version for construction and a read-only mapped (mapped_vector
) version for high performance use.
basic_dna_string
public
construct/copy/destructbasic_dna_string();Default constructor.
basic_dna_string(size_t size);Construct and empty dna_string with size elements (all 'A')
template<typename InIter> basic_dna_string(InIter b, InIter e);Construct a dna_string from a range of memory.
template<typename StrChar, typename StrTraits, typename StrAllocator> basic_dna_string(const std::basic_string< StrChar, StrTraits, StrAllocator > & str, size_t pos = 0, size_t n = ~(size_t) 0);Construct a dna_string from a C++ string.
template<typename charT> basic_dna_string(const charT * str, size_t pos = 0, size_t n = ~(size_t) 0);Construct a dna_string from a substring.
template<typename Mapper> basic_dna_string(Mapper & map, typename Mapper::is_mapper * p = 0);Construct a dna_string from a mapper object (mapped_dna_string only).
basic_dna_string
public member functionsvoid append(const char * str);Append a C string.
template<typename InIter> void append(InIter b, InIter e);Append ascii characters (A, C, G, T) to the string.
int compare(size_t start_pos, size_t max_bases, const basic_dna_string & str) const;Compare two substrings exactly.
Parameters: |
|
template<typename CiTraits> int compare_inexact(size_t start_pos, size_t max_bases, const basic_dna_string< CiTraits > & str, size_t max_distance = 0) const;Compare two substrings with errors.
Parameters: |
|
||||||||
Template Parameters: |
|
size_t find(const this_type & str, size_t start_pos = 0, size_t max_bases = ~(size_t) 0) const;Brute force string search. For a more refined aproach, use two_stage_index. This method performs a linear search on DNA data taking O(max_bases) time.
Parameters: |
|
template<typename String> size_t find_inexact(const String & search_str, size_t start_pos = 0, size_t max_bases = ~(size_t) 0, size_t max_distance = 0) const;Brute force string search. For a more refined aproach, use two_stage_index. This method performs a linear search on DNA data taking O(max_bases) time.
Parameters: |
|
||||||||
Template Parameters: |
|
int get_code(size_t index) const;Get a value from 0..3 for a single base at offset "index".
word_type get_index(size_t pos, size_t num_index_chars) const;Get a right-justified word of values limited by num_index_chars. eg. get_index(pos, 3) gives AAA...AAAXXX Used by two_stage_index to index values.
const array_type & get_values() const;Back-door access to the values.
operator std::string() const;Convert to a C++ string.
bool operator!=(const basic_dna_string & rhs) const;Comparison operator.
bool operator<(const basic_dna_string & rhs) const;Comparison operator.
bool operator<=(const basic_dna_string & rhs) const;Comparison operator.
bool operator==(const basic_dna_string & rhs) const;Comparison operator.
bool operator>(const basic_dna_string & rhs) const;Comparison operator.
bool operator>=(const basic_dna_string & rhs) const;Comparison operator.
char operator[](size_t index) const;Read a single base (in ASCII) from a dna_string.
void reserve(size_t size);Reserve extra bases in the array for appending. This makes appending a little faster.
void resize(size_t size);Resize the string, appending 'A' to the end if expanding.
size_t size() const;Return the number of bases in this string.
basic_dna_string substr(size_t offset = 0, size_t length = ~(size_t) 0, bool rev_comp = false) const;Get a substring from a dna_string. The rev_comp parameter allows this to be the reverse complement of the substring.
void swap(basic_dna_string & rhs);Swap two dna_strings.
word_type window(size_t base) const;Get an unaligned word from the centre of the string (typically 32 values).
void write_binary(writer & wr) const;Write the structure in binary for later mapping.