gatb.core-API-0.0.0
|
Class representing a De Bruijn graph. More...
#include <Graph.hpp>
Public Member Functions | |
GraphTemplate & | operator= (const GraphTemplate &graph) |
GraphIterator< Node > | iterator () const |
GraphVector< Node > | neighbors (Node &node, Direction dir=DIR_END) const |
GraphVector< Node > | neighbors (const typename Node::Value &kmer) const |
GraphVector< Node > | successors (Node &node) const |
GraphVector< Node > | predecessors (Node &node) const |
std::set< BranchingNode_t< Node > > | neighbors (typename std::set< BranchingNode_t< Node > >::iterator first, typename std::set< BranchingNode_t< Node > >::iterator last) const |
GraphVector< std::pair< Edge, Edge > > | successorsEdge (const Node &node1, const Node &node2) const |
GraphVector< std::pair< Edge, Edge > > | predecessorsEdge (const Node &node1, const Node &node2) const |
Node | neighbor (Node &source, Direction dir, kmer::Nucleotide nt) const |
Node | neighbor (Node &source, Direction dir, kmer::Nucleotide nt, bool &exists) const |
Node | successor (Node &source, kmer::Nucleotide nt) const |
Node | predecessor (Node &source, kmer::Nucleotide nt) const |
size_t | indegree (Node &node) const |
size_t | outdegree (Node &node) const |
size_t | degree (Node &node, Direction dir) const |
int | simplePathAvance (Node &node, Direction dir, Edge &output) const |
bool | contains (const Node &item) const |
std::string | toString (const Node &node) const |
bool | isBranching (Node &node) const |
Node | buildNode (const tools::misc::Data &data, size_t offset=0) const |
Node | buildNode (const char *sequence) const |
Node | reverse (const Node &node) const |
BranchingNode_t< Node > | reverse (const BranchingNode_t< Node > &node) const |
GraphVector< Node > | mutate (const Node &node, size_t idx, int mode=0) const |
kmer::Nucleotide | getNT (const Node &node, size_t idx) const |
int | queryAbundance (Node &node) const |
int | queryNodeState (Node &node) const |
std::string | toString (const Edge &edge) const |
std::string | toString (const BranchingEdge_t< Node, Edge > &edge) const |
bool | isSimple (Edge &edge) const |
std::string | getName () const |
size_t | getKmerSize () const |
tools::misc::IProperties & | getInfo () const |
void | remove () |
Edge | reverse (const Edge &edge) const |
void | precomputeAdjacency (unsigned int nbCores=1, bool verbose=true) |
GraphTemplate (size_t kmerSize) | |
GraphTemplate (bank::IBank *bank, tools::misc::IProperties *params) | |
GraphTemplate (tools::misc::IProperties *params) | |
GraphTemplate (const std::string &uri) | |
Static Public Member Functions | |
static GraphTemplate | create (size_t kmerSize) |
static GraphTemplate | create (bank::IBank *bank, const char *fmt,...) |
static GraphTemplate | create (const char *fmt,...) |
static GraphTemplate | create (tools::misc::IProperties *options) |
static GraphTemplate | load (const std::string &uri) |
static tools::misc::IOptionsParser * | getOptionsParser (bool includeMandatory=true) |
static void | executeAlgorithm (gatb::core::tools::misc::impl::Algorithm &algorithm, gatb::core::tools::storage::impl::Storage *storage, gatb::core::tools::misc::IProperties *props, gatb::core::tools::misc::IProperties &info) |
Public Attributes | |
tools::storage::impl::StorageMode_e | _storageMode |
tools::storage::impl::Storage * | _storage |
void * | _variant |
size_t | _kmerSize |
size_t | _nbSolidKmers |
tools::misc::impl::Properties | _info |
Static Public Attributes | |
static const tools::storage::impl::StorageMode_e | PRODUCT_MODE_DEFAULT = tools::storage::impl::STORAGE_HDF5 |
Friends | |
template<typename , typename , typename > | |
struct | build_visitor_solid |
Class representing a De Bruijn graph.
This class is the entry point for managing De Bruijn class in gatb-core.
Getting a Graph object can be done through :
Once a client has a Graph object (with create or load), it is possible to goes through the graph in different ways.
The first possibility is to use a Node iterator on the globality of the graph. For instance, all the nodes can be iterated this way, or only branching nodes.
The second possibility is to navigate starting from a specific node. For instance, the neighbors of the starting node can be reached.
Note: the Graph class doesn't provide means to mark nodes (ie remember which nodes have been visited); this feature could be let to subclasses or other helpers classes.
Some utility methods may be useful for debugging (like ascii representation of a node or an edge).
The underlying structure of the graph is taken from Minia:
Once a graph is built (from a set of reads), it is saved in a file (likely HDF5 format). It is so possible to get a Graph object by loading the file instead of re-build it.
Note: branching nodes are computed during the graph building; they are also saved in the graph output file.
GraphTemplate | ( | size_t | kmerSize | ) |
Constructor for empty graph.
GraphTemplate | ( | bank::IBank * | bank, |
tools::misc::IProperties * | params | ||
) |
Constructor. Use for GraphTemplate creation (ie. DSK + debloom) and filesystem save.
GraphTemplate | ( | tools::misc::IProperties * | params | ) |
Constructor. Use for GraphTemplate creation (ie. DSK + debloom) and filesystem save.
GraphTemplate | ( | const std::string & | uri | ) |
Constructor. Use for reading from filesystem.
Node buildNode | ( | const tools::misc::Data & | data, |
size_t | offset = 0 |
||
) | const |
Build a fake node (ie. not necessarily in the De Bruijn graph). Mainly for test purpose.
[in] | data | : a string like structure for the sequence from which the kmer of the node is extracted |
[in] | offset | : starting offset in the data |
Node buildNode | ( | const char * | sequence | ) | const |
Build a fake node (ie. not necessarily in the De Bruijn graph). Mainly for test purpose.
[in] | sequence | : a sequence of nucleotides in ASCII format |
bool contains | ( | const Node & | item | ) | const |
Tells whether or not a node belongs to the graph.
[in] | item | : the node |
|
inlinestatic |
Build an empty graph.
[in] | kmerSize | kmer size |
|
static |
Build a graph from a given bank.
[in] | bank | : bank to get the reads from |
[in] | fmt | : printf-like format for the command line string |
|
static |
Build a graph from user options.
[in] | fmt | printf-like format |
|
inlinestatic |
Build a graph from scratch.
[in] | options | : user parameters for building the graph. |
size_t degree | ( | Node & | node, |
Direction | dir | ||
) | const |
Get the degree of the node (either incoming or outcoming).
[in] | node | : the node |
[in] | dir | : direction of the degree |
|
static |
Algorithm configuration.
|
inline |
Get information about the graph (gathered during its creation).
|
inline |
Get the size of the kmers.
|
inline |
Return the name of the graph.
Nucleotide getNT | ( | const Node & | node, |
size_t | idx | ||
) | const |
Return a nucleotide at position 'idx' of a given node
[in] | node | : the node we want to extract a nucleotide from |
[in] | idx | : the position of the nucleotide to be extracted |
|
static |
Get a parser object that knows the user options for building a graph.
size_t indegree | ( | Node & | node | ) | const |
Get the incoming degree of the node.
[in] | node | : the node |
bool isBranching | ( | Node & | node | ) | const |
Tells whether the provided node is branching or not.
[in] | node | : the node to be asked |
bool isSimple | ( | Edge & | edge | ) | const |
Tells whether the provided edge is simple: outdegree(from)==1 and indegree(to)==1
[in] | edge | : the edge to be asked |
|
inline |
Creates an iterator over nodes of the graph. this used to be a templated method but I'm now untemplating it, because of nested templates specialization so call iteratorBranching if you want an iterator over BranchingNode's
|
inlinestatic |
Load a graph from some URI.
[in] | uri | : the uri to get the graph from |
|
inline |
Return a specific neighbor from a given node. The neighbor is defined by a direction and the transition nucleotide. IMPORTANT: this method will not check that the neighbor node belongs to the graph: it merely computes the next kmer but doesn't check the Bloom filter. It is supposed that the client has already asked for the neighbors and so knows the valid transitions.
[in] | source | : the source neighbor |
[in] | dir | : the direction of the transition |
[in] | nt | : the nucleotide of the transition |
|
inline |
Return a specific neighbor from a given node. The neighbor is defined by a direction and the transition nucleotide. IMPORTANT: this method will check that the neighbor node belongs to the graph. If the neighbor is not in the graph, the 'exists' parameter is set to false, true otherwise.
[in] | source | : the source neighbor |
[in] | dir | : the direction of the transition |
[in] | nt | : the nucleotide of the transition |
[out] | exists | : yes means that the neighbor is in the graph, false otherwise |
Returns a vector of neighbors of the provided node.
[in] | node | : the node whose neighbors are wanted |
[in] | direction | : the direction of the neighbors. If not set, out and in neighbors are computed. |
|
inline |
Returns a vector of neighbors of the provided kmer. It has to be understood as the following:
[in] | kmer | : the kmer whose neighbors are wanted. |
std::set< BranchingNode_t< Node > > neighbors | ( | typename std::set< BranchingNode_t< Node > >::iterator | first, |
typename std::set< BranchingNode_t< Node > >::iterator | last | ||
) | const |
Returns a set of neighbors for each node iterated with the provided two iterators
[in] | first | : beginning of the iteration |
[in] | last | : end of the iteration |
GraphTemplate< Node, Edge, GraphDataVariant > & operator= | ( | const GraphTemplate< Node, Edge, GraphDataVariant > & | graph | ) |
Affectation overload.
size_t outdegree | ( | Node & | node | ) | const |
Get the outcoming degree of the node.
[in] | node | : the node |
void precomputeAdjacency | ( | unsigned int | nbCores = 1 , |
bool | verbose = true |
||
) |
cache adjacency information from the Bloom filter to an array, 8 bits per node, for faster traversal queries
|
inline |
Shortcut for neighbor with dir==DIR_INCOMING.
Shortcut for 'neighbors' method with direction==DIR_INCOMING.
[in] | node | : the node whose neighbors are wanted |
|
inline |
Returns the predecessors of two nodes, ie with the same transition nucleotide from both nodes.
[in] | node1 | : first node |
[in] | node2 | : sedond node |
int queryAbundance | ( | Node & | node | ) | const |
Return the abundance of a node by querying the perfect hash function
[in] | node | : the node |
int queryNodeState | ( | Node & | node | ) | const |
Return the state of a node by querying the perfect hash function. A node state is either normal, marked, or deleted.
[in] | node | : the node or a node index (unsigned long) from the MPHF |
void remove | ( | ) |
Remove physically a graph.
Return the reverse complement node of the provided one. param[in] node : the node to be reverted
BranchingNode_t< Node > reverse | ( | const BranchingNode_t< Node > & | node | ) | const |
Return the reverse complement node of the provided one. param[in] node : the node to be reverted
Reverse an edge. param[in] edge: the edge to be reverted
Simple paths traversal invariant: the input kmer has no in-branching.
|
inline |
Shortcut for neighbor with dir==DIR_OUTCOMING.
Shortcut for 'neighbors' method with direction==DIR_OUTCOMING.
[in] | node | : the node whose neighbors are wanted |
|
inline |
Returns the successors of two nodes, ie with the same transition nucleotide from both nodes.
[in] | node1 | : first node |
[in] | node2 | : sedond node |
std::string toString | ( | const Node & | node | ) | const |
Get the ascii string for the node, according to its strand.
[in] | node | the node to get the string from |
std::string toString | ( | const Edge & | edge | ) | const |
Get the ascii string for the edge
[in] | edge | : the edge to get the string from |
std::string toString | ( | const BranchingEdge_t< Node, Edge > & | edge | ) | const |
Get the ascii string for the branching edge
[in] | edge | : the edge to get the string from |
|
friend |
Friends.
Creation information.
size_t _kmerSize |
kmer size of the graph
size_t _nbSolidKmers |
nb solid kmers in the graph (only populated by GraphUnitigs
tools::storage::impl::Storage* _storage |
Storage.
tools::storage::impl::StorageMode_e _storageMode |
Kind of storage for the graph.
void* _variant |
Defined as a void* for hiding implementation in cpp file.
|
static |
Default storage kind.