File: make.doc

package info (click to toggle)
augustus 3.4.0%2Bdfsg2-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 758,480 kB
  • sloc: cpp: 65,451; perl: 21,436; python: 3,927; ansic: 1,240; makefile: 1,032; sh: 189; javascript: 32
file content (20 lines) | stat: -rw-r--r-- 797 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# Katharina J. Hoff
# April 5th 2018
# Example data set for demonstration of AUGUSTUS UTR training

# Downloaded NCBI genes from UCSC genome browser for dm6 via table browser -> genes_with_utrs.gtf

ucsc_gtf2gff3.pl --in=genes_with_utrs.gtf --out=genes_with_utrs.gff3

cat genes_with_utrs.gff3 | perl -pe "s/five_prime_utr/5'-UTR/; s/three_prime_utr/3'-UTR/;" > genes_with_utrs.f.gff3

cat genes_with_utrs.f.gff3 | perl -ne 'if(not($_=~m/\tgene\t/) and not($_=~m/\tmRNA\t/)){
@t = split(/\t/); print "$t[0]\t$t[1]\t$t[2]\t$t[3]\t";
print "$t[4]\t$t[5]\t$t[6]\t$t[7]\t"; $_=~m/Parent=(\S+)/; print "gene_id \"$1\"; transcript_id \"$1\";\n";
}' > genes_with_utrs.gtf

# Downloaded genome from UCSC dm6

gunzip dm6.fa.gz
cdbfasta dm6.fa -o dm6.fa.idx
cdbyank dm6.fa.idx -d dm6.fa -a chr2R > genome.fa