File: genesTxtColumnNames.sh

package info (click to toggle)
snpeff 5.2.f%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 701,384 kB
  • sloc: java: 62,547; perl: 2,279; sh: 1,185; python: 744; xml: 507; makefile: 50
file content (33 lines) | stat: -rwxr-xr-x 1,024 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/bin/sh

#-------------------------------------------------------------------------------
# Convert the gene names in order to be used in an R script
#
# Usage: cat snpEff_genes.txt | ./scripts/genesTxtColumnNames.sh > genes.txt
#
# Once in R, you can:
#	- Load this table:
#			data <- read.csv("genes.txt", sep= "\t", header=TRUE);
#
#	- Access the data:
#			data$countINTRON
#
#	- Add missing or empty columns:
#			if( is.null(data$countINTRON ) { data$countINTRON <- 0 * (1:length(data$geneId) ); }
#
#																Pablo Cingolani
#-------------------------------------------------------------------------------

cat  \
    | grep -v "^# The following"\
    | sed "s/Bases affected (/bases/g" \
    | sed "s/Length (/len/g"  \
    | sed "s/Count (/count/g" \
    | sed "s/Total score (/score/g" \
    | sed "s/)//g" \
    | sed "s/#GeneId/geneId/" \
    | sed "s/GeneName/geneName/" \
    | sed "s/BioType/bioType/" \
    | sed "s/_PRIME//g" \
    | sed "s/SPLICE_SITE_//g" \
    | sed "s/SYNONYMOUS_CODING/SYN/g" \