File: vcftools.cpp

package info (click to toggle)
vcftools 0.1.9-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 1,396 kB
  • sloc: perl: 10,233; cpp: 7,950; pascal: 751; makefile: 60; php: 43; sh: 12
file content (109 lines) | stat: -rw-r--r-- 6,574 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
/*
 * vcftools.cpp
 *
 *  Created on: Aug 19, 2009
 *      Author: Adam Auton
 *      ($Revision: 249 $)
 */
#include "vcftools.h"

output_log LOG;

int main(int argc, char *argv[])
{
	time_t start,end;
	time(&start);

	// The following turns off sync between C and C++ streams.
	// Apparently it's faster to turn sync off, and as I don't use C streams, it's okay to turn off.
	ios_base::sync_with_stdio(false);

	parameters params(argc, argv);
	params.print_help();
	params.read_parameters();

	LOG.open(params.output_prefix);

	LOG.printLOG("\nVCFtools - " + VCFTOOLS_VERSION + "\n");
	LOG.printLOG("(C) Adam Auton 2009\n\n");

	params.print_params();

	vcf_file vcf(params.vcf_filename, params.vcf_compressed, params.chr_to_keep, params.chr_to_exclude, params.force_write_index);

	// Apply various filters as required.
	vcf.apply_filters(params);

	unsigned int N_indv = vcf.N_kept_individuals();
	unsigned int N_sites = vcf.N_kept_sites();
	LOG.printLOG("After filtering, kept " + output_log::int2str(N_indv) + " out of " + output_log::int2str(vcf.N_indv) + " Individuals\n");
	LOG.printLOG("After filtering, kept " + output_log::int2str(N_sites) + " out of a possible " + output_log::int2str(vcf.N_entries) + " Sites\n");
	if (N_sites == 0)
		LOG.error("No data left for analysis!");

	if (params.diff_file != "")
	{	// Merge files - cannot be run with other output options.
		vcf_file vcf_diff(params.diff_file, params.diff_file_compressed, params.chr_to_keep, params.chr_to_exclude, params.force_write_index);
		vcf_diff.apply_filters(params);	// Apply various filters as required.
		vcf.output_indv_in_files(params.output_prefix, vcf_diff);
		vcf.output_sites_in_files(params.output_prefix, vcf_diff);

		if (params.diff_site_discordance == true) vcf.output_discordance_by_site(params.output_prefix, vcf_diff);
		if (params.diff_discordance_matrix == true) vcf.output_discordance_matrix(params.output_prefix, vcf_diff);
		if (params.diff_indv_discordance == true) vcf.output_discordance_by_indv(params.output_prefix, vcf_diff);
		if (params.diff_switch_error == true) vcf.output_switch_error(params.output_prefix, vcf_diff);
	}

	vcf.output_INFO_for_each_site(params.output_prefix, params.INFO_to_extract);
	vcf.output_FORMAT_information(params.output_prefix, params.FORMAT_id_to_extract);
	if (params.output_indv_depth == true) vcf.output_individuals_by_mean_depth(params.output_prefix);
	if (params.output_geno_depth == true) vcf.output_genotype_depth(params.output_prefix);
	if (params.output_site_depth == true) vcf.output_site_depth(params.output_prefix, false);
	if (params.output_site_mean_depth == true) vcf.output_site_depth(params.output_prefix, true);
	if (params.output_freq == true) vcf.output_frequency(params.output_prefix, false, params.suppress_allele_output);
	if (params.output_counts == true) vcf.output_frequency(params.output_prefix, true, params.suppress_allele_output);
	if (params.plink_output == true) vcf.output_as_plink(params.output_prefix);
	if (params.plink_tped_output == true) vcf.output_as_plink_tped(params.output_prefix);
	if (params.output_HWE == true) vcf.output_hwe(params.output_prefix);
	if (params.output_SNP_density_bin_size > 0) vcf.output_SNP_density(params.output_prefix, params.output_SNP_density_bin_size);
	if (params.output_missingness == true) vcf.output_missingness(params.output_prefix);
	if (params.output_geno_rsq == true) vcf.output_genotype_r2(params.output_prefix, params.ld_snp_window_size, params.ld_bp_window_size, params.min_r2);
	if (params.output_interchromosomal_rsq == true) vcf.output_interchromosomal_genotype_r2(params.output_prefix, params.min_r2);
	if (params.output_hap_rsq == true) vcf.output_haplotype_r2(params.output_prefix, params.ld_snp_window_size, params.ld_bp_window_size, params.min_r2);
	if (params.output_het == true) vcf.output_het(params.output_prefix);
	if (params.output_site_quality == true) vcf.output_site_quality(params.output_prefix);
	if (params.output_012_matrix == true) vcf.output_as_012_matrix(params.output_prefix);
	if (params.output_as_IMPUTE == true) vcf.output_as_IMPUTE(params.output_prefix);
	if (params.output_BEAGLE_genotype_likelihoods == true) vcf.output_BEAGLE_genotype_likelihoods(params.output_prefix);
	if (params.output_as_ldhat_unphased == true) vcf.output_as_LDhat_unphased(params.output_prefix, params.chr_to_keep);
	if (params.output_as_ldhat_phased == true) vcf.output_as_LDhat_phased(params.output_prefix, params.chr_to_keep);
	if (params.output_singletons == true) vcf.output_singletons(params.output_prefix);
	if (params.output_site_pi == true) vcf.output_per_site_nucleotide_diversity(params.output_prefix);
	if (params.pi_window_size > 0) vcf.output_windowed_nucleotide_diversity(params.output_prefix, params.pi_window_size);
	if (params.output_Tajima_D_bin_size > 0) vcf.output_Tajima_D(params.output_prefix, params.output_Tajima_D_bin_size);
	if (params.output_TsTv_bin_size > 0) vcf.output_TsTv(params.output_prefix, params.output_TsTv_bin_size);
	if (params.output_TsTv_by_count) vcf.output_TsTv_by_count(params.output_prefix);
	if (params.output_TsTv_by_qual) vcf.output_TsTv_by_quality(params.output_prefix);
	if (params.recode == true) vcf.print(params.output_prefix, params.recode_INFO_to_keep, params.recode_all_INFO);
	if (params.recode_to_stream == true) vcf.print(std::cout, params.recode_INFO_to_keep, params.recode_all_INFO);
	if (params.output_filter_summary == true) vcf.output_FILTER_summary(params.output_prefix);
	if (params.output_filtered_sites == true) vcf.output_kept_and_removed_sites(params.output_prefix);
	if (params.output_LROH == true) vcf.output_LROH(params.output_prefix);
	if (params.output_relatedness == true) vcf.output_indv_relatedness(params.output_prefix);
	if (params.output_PCA == true) vcf.output_PCA(params.output_prefix, !params.PCA_no_normalisation, params.output_N_PCA_SNP_loadings);
	if (params.hapmap_fst_populations.size() > 0) vcf.output_hapmap_fst(params.output_prefix, params.hapmap_fst_populations);
	if (params.weir_fst_populations.size() > 0) vcf.output_weir_and_cockerham_fst(params.output_prefix, params.weir_fst_populations);

	if (params.hapmap_fst_file != "")
	{
		vcf_file vcf_fst(params.hapmap_fst_file, params.hapmap_fst_file_compressed, params.chr_to_keep);
		vcf_fst.apply_filters(params);	// Apply various filters as required.
		vcf.output_hapmap_fst_from_two_files(params.output_prefix, vcf_fst);
	}

	time(&end);
	double running_time = difftime(end,start);
	LOG.printLOG("Run Time = " + output_log::dbl2str_fixed(running_time, 2) + " seconds\n");
	LOG.close();
	return 0;
}