Boost C++ Libraries Home Libraries People FAQ More

PrevUpHomeNext

Class template basic_dna_string

boost::genetics::basic_dna_string — This class stores DNA strings compactly allowing 32 or more bases to be accessed in a single instruction.

Synopsis

// In header: <boost/genetics/dna_string.hpp>

template<typename Traits> 
class basic_dna_string {
public:
  // types
  typedef Traits::DnaArrayType       array_type;
  typedef basic_dna_string< Traits > this_type; 
  typedef Traits::DnaWordType        word_type; 

  // construct/copy/destruct
  basic_dna_string();
  basic_dna_string(size_t);
  template<typename InIter> basic_dna_string(InIter, InIter);
  template<typename StrChar, typename StrTraits, typename StrAllocator> 
    basic_dna_string(const std::basic_string< StrChar, StrTraits, StrAllocator > &, 
                     size_t = 0, size_t = ~(size_t) 0);
  template<typename charT> 
    basic_dna_string(const charT *, size_t = 0, size_t = ~(size_t) 0);
  template<typename Mapper> 
    basic_dna_string(Mapper &, typename Mapper::is_mapper * = 0);

  // public member functions
  void append(const char *);
  template<typename InIter> void append(InIter, InIter);
  int compare(size_t, size_t, const basic_dna_string &) const;
  template<typename CiTraits> 
    int compare_inexact(size_t, size_t, const basic_dna_string< CiTraits > &, 
                        size_t = 0) const;
  size_t find(const this_type &, size_t = 0, size_t = ~(size_t) 0) const;
  template<typename String> 
    size_t find_inexact(const String &, size_t = 0, size_t = ~(size_t) 0, 
                        size_t = 0) const;
  int get_code(size_t) const;
  word_type get_index(size_t, size_t) const;
  const array_type & get_values() const;
  operator std::string() const;
  bool operator!=(const basic_dna_string &) const;
  bool operator<(const basic_dna_string &) const;
  bool operator<=(const basic_dna_string &) const;
  bool operator==(const basic_dna_string &) const;
  bool operator>(const basic_dna_string &) const;
  bool operator>=(const basic_dna_string &) const;
  char operator[](size_t) const;
  void reserve(size_t);
  void resize(size_t);
  size_t size() const;
  basic_dna_string 
  substr(size_t = 0, size_t = ~(size_t) 0, bool = false) const;
  void swap(basic_dna_string &);
  word_type window(size_t) const;
  void write_binary(writer &) const;

  // private member functions
  template<typename String, bool cpu_has_popcnt> 
    size_t inexact_search(const String &, size_t, size_t, word_type, 
                          word_type, size_t, size_t, size_t) const;

  // public data members
  static const size_t bases_per_value;
  static const size_t npos;
};

Description

Like many of the container classes in this library it can be specialised into a standard (std::vector) version for construction and a read-only mapped (mapped_vector) version for high performance use.

basic_dna_string public construct/copy/destruct

  1. basic_dna_string();
    Default constructor.
  2. basic_dna_string(size_t size);
    Construct and empty dna_string with size elements (all 'A')
  3. template<typename InIter> basic_dna_string(InIter b, InIter e);
    Construct a dna_string from a range of memory.
  4. template<typename StrChar, typename StrTraits, typename StrAllocator> 
      basic_dna_string(const std::basic_string< StrChar, StrTraits, StrAllocator > & str, 
                       size_t pos = 0, size_t n = ~(size_t) 0);
    Construct a dna_string from a C++ string.
  5. template<typename charT> 
      basic_dna_string(const charT * str, size_t pos = 0, size_t n = ~(size_t) 0);
    Construct a dna_string from a substring.
  6. template<typename Mapper> 
      basic_dna_string(Mapper & map, typename Mapper::is_mapper * p = 0);
    Construct a dna_string from a mapper object (mapped_dna_string only).

basic_dna_string public member functions

  1. void append(const char * str);
    Append a C string.
  2. template<typename InIter> void append(InIter b, InIter e);
    Append ascii characters (A, C, G, T) to the string.
  3. int compare(size_t start_pos, size_t max_bases, const basic_dna_string & str) const;
    Compare two substrings exactly.

    Parameters:

    max_bases

    maxiumum number of bases to search.

    start_pos

    Zero-based offset to start the search.

    str

    dna_string to compare with.

  4. template<typename CiTraits> 
      int compare_inexact(size_t start_pos, size_t max_bases, 
                          const basic_dna_string< CiTraits > & str, 
                          size_t max_distance = 0) const;
    Compare two substrings with errors.

    Parameters:

    max_bases

    maxiumum number of bases to search.

    max_distance

    number of allowable errors in the search.

    start_pos

    Zero-based offset to start the search.

    str

    dna_string to compare with.

    Template Parameters:

    CiTraits

    Traits of other string to compare with.

  5. size_t find(const this_type & str, size_t start_pos = 0, 
                size_t max_bases = ~(size_t) 0) const;
    Brute force string search. For a more refined aproach, use two_stage_index. This method performs a linear search on DNA data taking O(max_bases) time.

    Parameters:

    max_bases

    maxiumum number of bases to search.

    start_pos

    Zero-based offset to start the search.

  6. template<typename String> 
      size_t find_inexact(const String & search_str, size_t start_pos = 0, 
                          size_t max_bases = ~(size_t) 0, 
                          size_t max_distance = 0) const;
    Brute force string search. For a more refined aproach, use two_stage_index. This method performs a linear search on DNA data taking O(max_bases) time.

    Parameters:

    max_bases

    maxiumum number of bases to search.

    max_distance

    number of allowable errors in the search.

    search_str

    DNA string to search.

    start_pos

    Zero-based offset to start the search.

    Template Parameters:

    String

    DNA sequence string type, typically default dna_string.

  7. int get_code(size_t index) const;
    Get a value from 0..3 for a single base at offset "index".
  8. word_type get_index(size_t pos, size_t num_index_chars) const;
    Get a right-justified word of values limited by num_index_chars. eg. get_index(pos, 3) gives AAA...AAAXXX Used by two_stage_index to index values.
  9. const array_type & get_values() const;
    Back-door access to the values.
  10. operator std::string() const;
    Convert to a C++ string.
  11. bool operator!=(const basic_dna_string & rhs) const;
    Comparison operator.
  12. bool operator<(const basic_dna_string & rhs) const;
    Comparison operator.
  13. bool operator<=(const basic_dna_string & rhs) const;
    Comparison operator.
  14. bool operator==(const basic_dna_string & rhs) const;
    Comparison operator.
  15. bool operator>(const basic_dna_string & rhs) const;
    Comparison operator.
  16. bool operator>=(const basic_dna_string & rhs) const;
    Comparison operator.
  17. char operator[](size_t index) const;
    Read a single base (in ASCII) from a dna_string.
  18. void reserve(size_t size);
    Reserve extra bases in the array for appending. This makes appending a little faster.
  19. void resize(size_t size);
    Resize the string, appending 'A' to the end if expanding.
  20. size_t size() const;
    Return the number of bases in this string.
  21. basic_dna_string 
    substr(size_t offset = 0, size_t length = ~(size_t) 0, bool rev_comp = false) const;
    Get a substring from a dna_string. The rev_comp parameter allows this to be the reverse complement of the substring.
  22. void swap(basic_dna_string & rhs);
    Swap two dna_strings.
  23. word_type window(size_t base) const;
    Get an unaligned word from the centre of the string (typically 32 values).
  24. void write_binary(writer & wr) const;
    Write the structure in binary for later mapping.

basic_dna_string private member functions

  1. template<typename String, bool cpu_has_popcnt> 
      size_t inexact_search(const String & search_str, size_t pos, size_t nv, 
                            word_type s0, word_type s0mask, size_t max_distance, 
                            size_t max_bases, size_t last) const;

PrevUpHomeNext