File: main.c

package info (click to toggle)
minimap 0.2-8
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 628 kB
  • sloc: ansic: 2,264; makefile: 121; sh: 13
file content (145 lines) | stat: -rw-r--r-- 6,025 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/resource.h>
#include <sys/time.h>
#include "minimap.h"

#define MM_VERSION "0.2-r123"

void liftrlimit()
{
#ifdef __linux__
	struct rlimit r;
	getrlimit(RLIMIT_AS, &r);
	r.rlim_cur = r.rlim_max;
	setrlimit(RLIMIT_AS, &r);
#endif
}

int main(int argc, char *argv[])
{
	mm_mapopt_t opt;
	int i, c, k = 15, w = -1, b = MM_IDX_DEF_B, n_threads = 3, keep_name = 1, is_idx = 0;
	int tbatch_size = 100000000;
	uint64_t ibatch_size = 4000000000ULL;
	float f = 0.001;
	bseq_file_t *fp = 0;
	char *fnw = 0;
	FILE *fpr = 0, *fpw = 0;

	liftrlimit();
	mm_realtime0 = realtime();
	mm_mapopt_init(&opt);

	while ((c = getopt(argc, argv, "w:k:B:b:t:r:c:f:Vv:NOg:I:d:lRPST:m:L:Dx:")) >= 0) {
		if (c == 'w') w = atoi(optarg);
		else if (c == 'k') k = atoi(optarg);
		else if (c == 'b') b = atoi(optarg);
		else if (c == 'r') opt.radius = atoi(optarg);
		else if (c == 'c') opt.min_cnt = atoi(optarg);
		else if (c == 'm') opt.merge_frac = atof(optarg);
		else if (c == 'f') f = atof(optarg);
		else if (c == 't') n_threads = atoi(optarg);
		else if (c == 'v') mm_verbose = atoi(optarg);
		else if (c == 'g') opt.max_gap = atoi(optarg);
		else if (c == 'N') keep_name = 0;
		else if (c == 'd') fnw = optarg;
		else if (c == 'l') is_idx = 1;
		else if (c == 'R') opt.flag |= MM_F_WITH_REP;
		else if (c == 'P') opt.flag &= ~MM_F_WITH_REP;
		else if (c == 'D') opt.flag |= MM_F_NO_SELF;
		else if (c == 'O') opt.flag |= MM_F_NO_ISO;
		else if (c == 'S') opt.flag |= MM_F_AVA | MM_F_NO_SELF;
		else if (c == 'T') opt.sdust_thres = atoi(optarg);
		else if (c == 'L') opt.min_match = atoi(optarg);
		else if (c == 'V') {
			puts(MM_VERSION);
			return 0;
		} else if (c == 'B' || c == 'I') {
			double x;
			char *p;
			x = strtod(optarg, &p);
			if (*p == 'G' || *p == 'g') x *= 1e9;
			else if (*p == 'M' || *p == 'm') x *= 1e6;
			else if (*p == 'K' || *p == 'k') x *= 1e3;
			if (c == 'B') tbatch_size = (uint64_t)(x + .499);
			else ibatch_size = (uint64_t)(x + .499);
		} else if (c == 'x') {
			if (strcmp(optarg, "ava10k") == 0) {
				opt.flag |= MM_F_AVA | MM_F_NO_SELF;
				opt.min_match = 100;
				opt.merge_frac = 0.0;
				w = 5;
			}
		}
	}
	if (w < 0) w = (int)(.6666667 * k + .499);

	if (argc == optind) {
		fprintf(stderr, "Usage: minimap [options] <target.fa> [query.fa] [...]\n");
		fprintf(stderr, "Options:\n");
		fprintf(stderr, "  Indexing:\n");
		fprintf(stderr, "    -k INT     k-mer size [%d]\n", k);
		fprintf(stderr, "    -w INT     minizer window size [{-k}*2/3]\n");
		fprintf(stderr, "    -I NUM     split index for every ~NUM input bases [4G]\n");
		fprintf(stderr, "    -d FILE    dump index to FILE []\n");
		fprintf(stderr, "    -l         the 1st argument is a index file (overriding -k, -w and -I)\n");
//		fprintf(stderr, "    -b INT     bucket bits [%d]\n", b); // most users would care about this
		fprintf(stderr, "  Mapping:\n");
		fprintf(stderr, "    -f FLOAT   filter out top FLOAT fraction of repetitive minimizers [%.3f]\n", f);
		fprintf(stderr, "    -r INT     bandwidth [%d]\n", opt.radius);
		fprintf(stderr, "    -m FLOAT   merge two chains if FLOAT fraction of minimizers are shared [%.2f]\n", opt.merge_frac);
		fprintf(stderr, "    -c INT     retain a mapping if it consists of >=INT minimizers [%d]\n", opt.min_cnt);
		fprintf(stderr, "    -L INT     min matching length [%d]\n", opt.min_match);
		fprintf(stderr, "    -g INT     split a mapping if there is a gap longer than INT [%d]\n", opt.max_gap);
		fprintf(stderr, "    -T INT     SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres);
//		fprintf(stderr, "    -D         skip self mappings but keep dual mappings\n"); // too confusing to expose to end users
		fprintf(stderr, "    -S         skip self and dual mappings\n");
		fprintf(stderr, "    -O         drop isolated hits before chaining (EXPERIMENTAL)\n");
		fprintf(stderr, "    -P         filtering potential repeats after mapping (EXPERIMENTAL)\n");
//		fprintf(stderr, "    -R         skip post-mapping repeat filtering\n"); // deprecated option for backward compatibility
		fprintf(stderr, "    -x STR     preset (recommended to be applied before other options) []\n");
		fprintf(stderr, "               ava10k: -Sw5 -L100 -m0 (PacBio/ONT all-vs-all read mapping)\n");
		fprintf(stderr, "  Input/Output:\n");
		fprintf(stderr, "    -t INT     number of threads [%d]\n", n_threads);
//		fprintf(stderr, "    -B NUM     process ~NUM bp in each batch [100M]\n");
//		fprintf(stderr, "    -v INT     verbose level [%d]\n", mm_verbose);
//		fprintf(stderr, "    -N         use integer as target names\n");
		fprintf(stderr, "    -V         show version number\n");
		fprintf(stderr, "\nSee minimap.1 for detailed description of the command-line options.\n");
		return 1;
	}

	if (is_idx) fpr = fopen(argv[optind], "rb");
	else fp = bseq_open(argv[optind]);
	if (fnw) fpw = fopen(fnw, "wb");
	for (;;) {
		mm_idx_t *mi = 0;
		if (fpr) mi = mm_idx_load(fpr);
		else if (!bseq_eof(fp))
			mi = mm_idx_gen(fp, w, k, b, tbatch_size, n_threads, ibatch_size, keep_name);
		if (mi == 0) break;
		if (mm_verbose >= 3)
			fprintf(stderr, "[M::%s::%.3f*%.2f] loaded/built the index for %d target sequence(s)\n",
					__func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), mi->n);
		mm_idx_set_max_occ(mi, f);
		if (mm_verbose >= 3)
			fprintf(stderr, "[M::%s] max occurrences of a minimizer to consider: %d\n", __func__, mi->max_occ);
		if (fpw) mm_idx_dump(fpw, mi);
		for (i = optind + 1; i < argc; ++i)
			mm_map_file(mi, argv[i], &opt, n_threads, tbatch_size);
		mm_idx_destroy(mi);
	}
	if (fpw) fclose(fpw);
	if (fpr) fclose(fpr);
	if (fp)  bseq_close(fp);

	fprintf(stderr, "[M::%s] Version: %s\n", __func__, MM_VERSION);
	fprintf(stderr, "[M::%s] CMD:", __func__);
	for (i = 0; i < argc; ++i)
		fprintf(stderr, " %s", argv[i]);
	fprintf(stderr, "\n[M::%s] Real time: %.3f sec; CPU: %.3f sec\n", __func__, realtime() - mm_realtime0, cputime());
	return 0;
}