File: ContextGroupBy.cpp

package info (click to toggle)
bedtools 2.26.0%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 55,328 kB
  • sloc: cpp: 37,989; sh: 6,930; makefile: 2,225; python: 163
file content (140 lines) | stat: -rw-r--r-- 3,035 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
/*
 * ContextGroupBy.cpp
 *
 *  Created on: Mar 26, 2014
 *      Author: nek3d
 */


#include "ContextGroupBy.h"

ContextGroupBy::ContextGroupBy()
: _printFullCols(false),
  _ignoreCase(false)
{
	setSortedInput(true);
	_noEnforceCoordSort = true;
	setColumnOpsMethods(true);
	setExplicitBedOutput(true);

	//For columnOps, groupBy has default operation sum but no default column,
	///so we have to clear that.
	//
	_keyListOps->setColumns("");

}

ContextGroupBy::~ContextGroupBy()
{

}


bool ContextGroupBy::parseCmdArgs(int argc, char **argv, int skipFirstArgs)
{
	for (_i=_skipFirstArgs; _i < argc; _i++) {
		if (isUsed(_i - _skipFirstArgs)) {
			continue;
		}
		else if ((strcmp(_argv[_i], "-g") == 0) || (strcmp(_argv[_i], "-grp") == 0)) {
			if (!handle_g()) return false;
		}
		else if (strcmp(_argv[_i], "-inheader") == 0) {
			if (!handle_inheader()) return false;
		}
		else if (strcmp(_argv[_i], "-outheader") == 0) {
			if (!handle_outheader()) return false;
		}
		else if (strcmp(_argv[_i], "-header") == 0) {
			if (!handle_header()) return false;
		}
		else if (strcmp(_argv[_i], "-full") == 0) {
			if (!handle_full()) return false;
		}
		else if (strcmp(_argv[_i], "-ignorecase") == 0) {
			if (!handle_ignorecase()) return false;
		}
	}
	return ContextBase::parseCmdArgs(argc, argv, _skipFirstArgs);
}

bool ContextGroupBy::isValidState()
{
	// The user was required to have entered one or more columns
	if (_keyListOps->getColumns().empty()) {
		_errorMsg = "***** ERROR: -opCols parameter requires a value.";
		return false;
	}

	//default to stdin
	if (getNumInputFiles() == 0) {
		addInputFile("-");
	}
	//default grouping is cols 1,2,3
	if (_groupStr.empty()) _groupStr = "1,2,3";

	return ContextBase::isValidState();
}


bool ContextGroupBy::handle_g()
{
	if (_argc <= _i+1) {
		_errorMsg = "\n***** ERROR: -g option given, but columns to group not specified. *****";
		return false;
	}
	_groupStr = _argv[_i+1];
	markUsed(_i - _skipFirstArgs);
	_i++;
	markUsed(_i - _skipFirstArgs);
	return true;
}

bool ContextGroupBy::handle_inheader()
{
	_inheader = true;
	markUsed(_i - _skipFirstArgs);
	return true;
}

bool ContextGroupBy::handle_outheader() {
	setPrintHeader(true);
	markUsed(_i - _skipFirstArgs);
	return true;
}

bool ContextGroupBy::handle_header() {
	_inheader = true;
	setPrintHeader(true);
	markUsed(_i - _skipFirstArgs);
	return true;
}

bool ContextGroupBy::handle_full() {
	_printFullCols = true;
	markUsed(_i - _skipFirstArgs);
	return true;
}

bool ContextGroupBy::handle_ignorecase() {
	_ignoreCase = true;
	markUsed(_i - _skipFirstArgs);
	return true;
}

const string &ContextGroupBy::getDefaultHeader() {
	//groupBy does not support multiple databases.
	FileRecordMgr *frm = _files[0];
	int numFields = frm->getNumFields();
	_defaultHeader.clear();
	ostringstream s;
	for (int i=1; i <= numFields; i++) {
		s << "col_";
		s << i;
		s << "\t";
	}
	_defaultHeader.append(s.str());
	//change last tab into newline
	_defaultHeader[_defaultHeader.size()-1] = '\n';
	return _defaultHeader;
}