File: assemblygraph.h

package info (click to toggle)
bandage 0.9.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 15,684 kB
  • sloc: cpp: 45,359; sh: 491; makefile: 12
file content (219 lines) | stat: -rw-r--r-- 10,081 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
//Copyright 2017 Ryan Wick

//This file is part of Bandage

//Bandage is free software: you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.

//Bandage is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//GNU General Public License for more details.

//You should have received a copy of the GNU General Public License
//along with Bandage.  If not, see <http://www.gnu.org/licenses/>.


#ifndef ASSEMBLYGRAPH_H
#define ASSEMBLYGRAPH_H

#include <QObject>
#include <vector>

#include "../ogdf/basic/Graph.h"
#include "../ogdf/basic/GraphAttributes.h"
#include <QString>
#include <QMap>
#include "../program/globals.h"
#include "../ui/mygraphicsscene.h"
#include "path.h"
#include <QPair>

class DeBruijnNode;
class DeBruijnEdge;
class MyProgressDialog;

class AssemblyGraph : public QObject
{
    Q_OBJECT

public:
    AssemblyGraph();
    ~AssemblyGraph();

    //Nodes are stored in a map with a key of the node's name.
    QMap<QString, DeBruijnNode*> m_deBruijnGraphNodes;

    //Edges are stored in a map with a key of the starting and ending node
    //pointers.
    QMap<QPair<DeBruijnNode*, DeBruijnNode*>, DeBruijnEdge*> m_deBruijnGraphEdges;

    ogdf::Graph * m_ogdfGraph;
    ogdf::EdgeArray<double> * m_edgeArray;
    ogdf::GraphAttributes * m_graphAttributes;

    int m_kmer;
    int m_nodeCount;
    int m_edgeCount;
    long long m_totalLength;
    long long m_shortestContig;
    long long m_longestContig;
    double m_meanDepth;
    double m_firstQuartileDepth;
    double m_medianDepth;
    double m_thirdQuartileDepth;
    GraphFileType m_graphFileType;
    bool m_contiguitySearchDone;
    QString m_filename;
    QString m_depthTag;
    SequencesLoadedFromFasta m_sequencesLoadedFromFasta;

    void cleanUp();
    void createDeBruijnEdge(QString node1Name, QString node2Name,
                            int overlap = 0,
                            EdgeOverlapType overlapType = UNKNOWN_OVERLAP);
    void clearOgdfGraphAndResetNodes();
    static QByteArray getReverseComplement(QByteArray forwardSequence);
    void resetEdges();
    double getMeanDepth(bool drawnNodesOnly = false);
    double getMeanDepth(std::vector<DeBruijnNode *> nodes);
    double getMeanDepth(QList<DeBruijnNode *> nodes);
    void resetNodeContiguityStatus();
    void resetAllNodeColours();
    void clearAllBlastHitPointers();
    void determineGraphInfo();
    void clearGraphInfo();
    void buildDeBruijnGraphFromLastGraph(QString fullFileName);
    void buildDeBruijnGraphFromGfa(QString fullFileName, bool * unsupportedCigar, bool * customLabels,
                                   bool * customColours, QString *bandageOptionsError);
    void buildDeBruijnGraphFromFastg(QString fullFileName);
    void buildDeBruijnGraphFromTrinityFasta(QString fullFileName);
    int buildDeBruijnGraphFromAsqg(QString fullFileName);
    void buildDeBruijnGraphFromPlainFasta(QString fullFileName);
    void recalculateAllDepthsRelativeToDrawnMean();
    void recalculateAllNodeWidths();

    GraphFileType getGraphFileTypeFromFile(QString fullFileName);
    bool checkFileIsLastGraph(QString fullFileName);
    bool checkFileIsFastG(QString fullFileName);
    bool checkFileIsFasta(QString fullFileName);
    bool checkFileIsGfa(QString fullFileName);
    bool checkFileIsTrinityFasta(QString fullFileName);
    bool checkFileIsAsqg(QString fullFileName);
    bool checkFirstLineOfFile(QString fullFileName, QString regExp);

    bool loadGraphFromFile(QString filename);
    void buildOgdfGraphFromNodesAndEdges(std::vector<DeBruijnNode *> startingNodes,
                                         int nodeDistance);
    void addGraphicsItemsToScene(MyGraphicsScene * scene);

    QStringList splitCsv(QString line, QString sep=",");
    bool loadCSV(QString filename, QStringList * columns, QString * errormsg, bool * coloursLoaded);
    std::vector<DeBruijnNode *> getStartingNodes(QString * errorTitle,
                                                 QString * errorMessage,
                                                 bool doubleMode,
                                                 QString nodesList,
                                                 QString blastQueryName);

    bool checkIfStringHasNodes(QString nodesString);
    QString generateNodesNotFoundErrorMessage(std::vector<QString> nodesNotInGraph,
                                              bool exact);
    std::vector<DeBruijnNode *> getNodesFromString(QString nodeNamesString,
                                                   bool exactMatch,
                                                   std::vector<QString> * nodesNotInGraph = 0);
    void layoutGraph();

    void setAllEdgesExactOverlap(int overlap);
    void autoDetermineAllEdgesExactOverlap();

    static void readFastaOrFastqFile(QString filename, std::vector<QString> * names,
                                     std::vector<QByteArray> * sequences);
    static void readFastaFile(QString filename, std::vector<QString> * names,
                              std::vector<QByteArray> * sequences);
    static void readFastqFile(QString filename, std::vector<QString> * names,
                              std::vector<QByteArray> * sequences);

    int getDrawnNodeCount() const;
    void deleteNodes(std::vector<DeBruijnNode *> * nodes);
    void deleteEdges(std::vector<DeBruijnEdge *> * edges);
    void duplicateNodePair(DeBruijnNode * node, MyGraphicsScene * scene);
    bool mergeNodes(QList<DeBruijnNode *> nodes, MyGraphicsScene * scene,
                    bool recalulateDepth);
    void removeGraphicsItemEdges(const std::vector<DeBruijnEdge *> * edges,
                                 bool reverseComplement,
                                 MyGraphicsScene * scene);
    void removeGraphicsItemNodes(const std::vector<DeBruijnNode *> * nodes,
                                 bool reverseComplement,
                                 MyGraphicsScene * scene);
    int mergeAllPossible(MyGraphicsScene * scene = 0,
                         MyProgressDialog * progressDialog = 0);

    void saveEntireGraphToFasta(QString filename);
    void saveEntireGraphToFastaOnlyPositiveNodes(QString filename);
    bool saveEntireGraphToGfa(QString filename);
    bool saveVisibleGraphToGfa(QString filename);
    void changeNodeName(QString oldName, QString newName);
    NodeNameStatus checkNodeNameValidity(QString nodeName);
    void changeNodeDepth(std::vector<DeBruijnNode *> * nodes,
                             double newDepth);

    static QByteArray addNewlinesToSequence(QByteArray sequence, int interval = 70);
    int getDeadEndCount() const;
    void getNodeStats(int * n50, int * shortestNode, int * firstQuartile, int * median, int * thirdQuartile, int * longestNode) const;
    void getGraphComponentCountAndLargestComponentSize(int * componentCount, int * largestComponentLength) const;
    double getMedianDepthByBase() const;
    long long getEstimatedSequenceLength() const;
    long long getEstimatedSequenceLength(double medianDepthByBase) const;
    long long getTotalLengthMinusEdgeOverlaps() const;
    QPair<int, int> getOverlapRange() const;
    bool attemptToLoadSequencesFromFasta();
    long long getTotalLengthOrphanedNodes() const;
    bool useLinearLayout() const;


private:
    template<typename T> double getValueUsingFractionalIndex(std::vector<T> * v, double index) const;
    QString convertNormalNumberStringToBandageNodeName(QString number);
    void makeReverseComplementNodeIfNecessary(DeBruijnNode * node);
    void pointEachNodeToItsReverseComplement();
    QStringList removeNullStringsFromList(QStringList in);
    std::vector<DeBruijnNode *> getNodesFromListExact(QStringList nodesList, std::vector<QString> * nodesNotInGraph);
    std::vector<DeBruijnNode *> getNodesFromListPartial(QStringList nodesList, std::vector<QString> * nodesNotInGraph);
    std::vector<DeBruijnNode *> getNodesFromBlastHits(QString queryName);
    std::vector<DeBruijnNode *> getNodesInDepthRange(double min, double max);
    std::vector<int> makeOverlapCountVector();
    bool cigarContainsOnlyM(QString cigar);
    int getLengthFromSimpleCigar(QString cigar);
    int getLengthFromCigar(QString cigar);
    int getCigarCount(QString cigarCode, QString cigar);
    QString getOppositeNodeName(QString nodeName);
    void clearAllCsvData();
    QString getNodeNameFromString(QString string);
    QString getNewNodeName(QString oldNodeName);
    void duplicateGraphicsNode(DeBruijnNode * originalNode, DeBruijnNode * newNode, MyGraphicsScene * scene);
    bool canAddNodeToStartOfMergeList(QList<DeBruijnNode *> * mergeList,
                                      DeBruijnNode * potentialNode);
    bool canAddNodeToEndOfMergeList(QList<DeBruijnNode *> * mergeList,
                                    DeBruijnNode * potentialNode);
    QString getUniqueNodeName(QString baseName);
    void mergeGraphicsNodes(QList<DeBruijnNode *> * originalNodes,
                            QList<DeBruijnNode *> * revCompOriginalNodes,
                            DeBruijnNode * newNode, MyGraphicsScene * scene);
    bool mergeGraphicsNodes2(QList<DeBruijnNode *> * originalNodes,
                             DeBruijnNode * newNode, MyGraphicsScene * scene);
    void removeAllGraphicsEdgesFromNode(DeBruijnNode * node,
                                        bool reverseComplement,
                                        MyGraphicsScene * scene);
    QString cleanNodeName(QString name);
    double findDepthAtIndex(QList<DeBruijnNode *> * nodeList, long long targetIndex) const;
    bool allNodesStartWith(QString start) const;
    QString simplifyCanuNodeName(QString oldName) const;

signals:
    void setMergeTotalCount(int totalCount);
    void setMergeCompletedCount(int completedCount);
};

#endif // ASSEMBLYGRAPH_H