File: vcfprintaltdiscrepancy.r

package info (click to toggle)
libvcflib 1.0.12%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 70,520 kB
  • sloc: cpp: 39,837; python: 532; perl: 474; ansic: 317; ruby: 295; sh: 254; lisp: 148; makefile: 123; javascript: 94
file content (38 lines) | stat: -rwxr-xr-x 1,881 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env Rscript --vanilla --slave
# Show ALT discrepancies in a table

# get the input VCF tabular format, assert that sites must have AC > 0
vcf <- subset(read.table(pipe('cat /dev/stdin'), header=T), AC > 0)

tag <- commandArgs(TRUE)[1]

tag.genotypes_alternate_count <- paste(tag, '.genotypes.alternate_count', sep='')
tag.non_reference_discrepancy_count <- paste(tag, '.site.non_reference_discrepancy.count', sep='')
tag.non_reference_discrepancy_normalizer <- paste(tag, '.site.non_reference_discrepancy.normalizer', sep='')
tag.non_reference_sensitivity_count <- paste(tag, '.site.non_reference_sensitivity.count', sep='')
tag.non_reference_sensitivity_normalizer <- paste(tag, '.site.non_reference_sensitivity.normalizer', sep='')
tag.alternate_positive_discrepancy <- paste(tag, '.site.alternate_positive_discrepancy', sep='')
tag.alternate_negative_discrepancy <- paste(tag, '.site.alternate_negative_discrepancy', sep='')
tag.has_variant <- paste(tag, '.has_variant', sep='')

vcf.numberOfSites <- length(vcf[, tag.genotypes_alternate_count])
vcf.totalAltAlleles <- sum(vcf[, tag.genotypes_alternate_count])
vcf.positiveDiscrepancy <- sum(vcf[, tag.alternate_positive_discrepancy]) / sum(vcf[, tag.genotypes_alternate_count])
vcf.negativeDiscrepancy <- sum(vcf[, tag.alternate_negative_discrepancy]) / sum(vcf[, tag.genotypes_alternate_count])
vcf.sitesTruePositive <- sum(vcf[, tag.has_variant]) / nrow(vcf)

cat('number of sites', vcf.numberOfSites, '\n')
cat('total alternate alleles', vcf.totalAltAlleles, '\n')
cat('positive discrepancy', vcf.positiveDiscrepancy, '\n')
cat('negative discrepancy', vcf.negativeDiscrepancy, '\n')

x <- cbind(by(vcf, vcf$AC,
    function(x) {
        sum(x[, tag.alternate_positive_discrepancy]) / sum(x[, tag.genotypes_alternate_count])
    }))

byac <- data.frame(ac=as.numeric(rownames(x)), fdr=as.vector(x))

print(byac)