File: vcf_file_index.cpp

package info (click to toggle)
vcftools 0.1.9-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 1,396 kB
  • sloc: perl: 10,233; cpp: 7,950; pascal: 751; makefile: 60; php: 43; sh: 12
file content (171 lines) | stat: -rw-r--r-- 3,931 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
/*
 * vcf_file.cpp
 *
 *  Created on: 3 Aug 2011
 *      Author: auton
 */

#include "vcf_file.h"

bool vcf_file::read_index_file(const string &index_filename)
{
	// Check index is newer than vcf file
	struct stat stat_idx, stat_vcf;
	stat(index_filename.c_str(), &stat_idx);
	stat(filename.c_str(), &stat_vcf);
	if (stat_vcf.st_mtime > stat_idx.st_mtime)
	{
		LOG.warning("VCF index is older than VCF file. Will regenerate.");
		return false;
	}

	LOG.printLOG("Reading Index file.\n");
	big_endian_machine = is_big_endian();
	gzFile in = gzopen(index_filename.c_str(), "rb");
	if (in == NULL)
		return false;

	char magic[7];
	idx_read(in, magic, 7, sizeof(char));
	if (strncmp(magic, "VCFIDX\1", 7) != 0)
	{	// Doesn't appear to be an index file
		gzclose(in);
		LOG.warning("Index file doesn't appear to be valid. Will (try to) overwrite.\n");
		return false;
	}

	uint32_t tmp;
	uint64_t tmp64;
	idx_read(in, &tmp, 1, sizeof(uint32_t));
	N_entries = tmp;
	idx_read(in, &tmp, 1, sizeof(uint32_t));
	N_indv = tmp;
	idx_read(in, &tmp, 1, sizeof(uint32_t));
	unsigned int l_meta = tmp;
	idx_read(in, &tmp, 1, sizeof(uint32_t));
	unsigned int l_indv = tmp;

	char *meta_buffer = new char [l_meta+1];
	char *indv_buffer = new char [l_indv+1];

	idx_read(in, meta_buffer, l_meta, 1);
	idx_read(in, indv_buffer, l_indv, 1);

	// Split the strings
	meta.resize(0);
	char * pch;
	pch = strtok(meta_buffer,"\n");
	while (pch != NULL)
	{
		meta.push_back(pch);
		pch = strtok(NULL, "\n");
	}

	indv.resize(0);
	pch = strtok (indv_buffer,"\n");
	while (pch != NULL)
	{
		indv.push_back(pch);
		pch = strtok (NULL, "\n");
	}

	delete [] indv_buffer;
	delete [] meta_buffer;

	entry_file_locations.resize(N_entries);
	for (unsigned int ui=0; ui<N_entries; ui++)
	{
		idx_read(in, &tmp64, 1, sizeof(uint64_t));
		entry_file_locations[ui] = tmp64;
	}

	gzclose(in);
	return true;
}


void vcf_file::write_index_file(const string &index_filename)
{
	LOG.printLOG("Writing Index file.\n");
	big_endian_machine = is_big_endian();

	gzFile out = gzopen(index_filename.c_str(), "wb");
	if (out == NULL)
	{
		LOG.warning("Could not write index file.\n");
		return;
	}

	unsigned int l_meta = 0;
	for (unsigned int ui=0; ui<meta.size(); ui++)
		l_meta += meta[ui].size() + 1;

	unsigned int l_indv = 0;
	for (unsigned int ui=0; ui<indv.size(); ui++)
		l_indv += indv[ui].size() + 1;

	idx_write(out, (char*)"VCFIDX\1", 7, 1);

	uint32_t tmp;
	uint64_t tmp64;
	tmp = N_entries; idx_write(out, &tmp, 1, sizeof(uint32_t));
	tmp = N_indv; idx_write(out, &tmp, 1, sizeof(uint32_t));
	tmp = l_meta; idx_write(out, &tmp, 1, sizeof(uint32_t));
	tmp = l_indv; idx_write(out, &tmp, 1, sizeof(uint32_t));

	for (unsigned int ui=0; ui<meta.size(); ui++)
	{
		string str = meta[ui] + "\n";
		idx_write(out, (char*)(str.c_str()), str.size(), 1);
	}

	for (unsigned int ui=0; ui<indv.size(); ui++)
	{
		string str = indv[ui] + "\n";
		idx_write(out, (char*)(str.c_str()), str.size(), 1);
	}

	for (unsigned int ui=0; ui<entry_file_locations.size(); ui++)
	{
		tmp64 = entry_file_locations[ui];
		idx_write(out, &tmp64, 1, sizeof(uint64_t));
	}

	gzclose(out);
}


inline void vcf_file::ByteSwap(unsigned char *b, int n) const
{
   int i = 0;
   int j = n-1;
   while (i<j)
   {
      swap(b[i], b[j]);
      i++, j--;
   }
}


int vcf_file::idx_read(gzFile &in, void *buffer, unsigned int len, size_t size)
{
	int ret = gzread(in, buffer, size*len);
	if ((big_endian_machine) && (size > 1))	// Note: don't bother swapping character arrays - index is defined as little endian.
	{
		unsigned int ui;
		for (ui=0; ui<len; ui++)
			ByteSwap((unsigned char *)buffer+(size*ui), size);
	}
	return ret;
}

void vcf_file::idx_write(gzFile &out, void *buffer, unsigned int len, size_t size)
{
	if ((big_endian_machine) && (size > 1))
	{
		unsigned int ui;
		for (ui=0; ui<len; ui++)
			ByteSwap((unsigned char *)buffer+(size*ui), size);
	}
	gzwrite(out, buffer, size*len);
}