File: mouse_download.sh

package info (click to toggle)
python-loompy 3.0.7%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,316 kB
  • sloc: python: 3,152; sh: 63; makefile: 16
file content (45 lines) | stat: -rw-r--r-- 3,354 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
mkdir inputs

# Make sure these manual steps have been done:
# Download "BrowseTF  TcoF-DB.xlsx" from https://tools.sschmeier.com/tcof/browse/?type=tcof&species=mouse&class=all# (a button at https://tools.sschmeier.com/tcof/home/)
#   Open the file in Excel and save tab-separated as "inputs/TcoF-Db.tsv"
#
# You need to import data from BioMart using this link:
# http://www.ensembl.org/biomart/martview/7c9b283e3eca26cb81449ec518f4fc14?VIRTUALSCHEMANAME=default&ATTRIBUTES=mmusculus_gene_ensembl.default.feature_page.ensembl_gene_id|mmusculus_gene_ensembl.default.feature_page.ensembl_gene_id_version|mmusculus_gene_ensembl.default.feature_page.ensembl_transcript_id|mmusculus_gene_ensembl.default.feature_page.ensembl_transcript_id_version|mmusculus_gene_ensembl.default.feature_page.ucsc|mmusculus_gene_ensembl.default.feature_page.vega_translation|mmusculus_gene_ensembl.default.feature_page.ccds&FILTERS=&VISIBLEPANEL=resultspanel
# by clicking "Go" button, and saving the downloaded "mart_export.txt" file in "inputs/mart_export.txt".
# The file should contain the following columns:
# Gene stable ID	Gene stable ID version	Transcript stable ID	Transcript stable ID version	UCSC Stable ID	Vega translation ID	CCDS ID

wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M23/GRCm38.primary_assembly.genome.fa.gz
wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M23/gencode.vM23.primary_assembly.annotation.gtf.gz
wget https://github.com/10XGenomics/cellranger/raw/master/lib/python/cellranger/barcodes/3M-february-2018.txt.gz
wget https://github.com/10XGenomics/cellranger/raw/master/lib/python/cellranger/barcodes/737K-april-2014_rc.txt
wget https://github.com/10XGenomics/cellranger/raw/master/lib/python/cellranger/barcodes/737K-august-2016.txt

zcat gencode.vM23.primary_assembly.annotation.gtf.gz | gawk 'OFS="\t" {if ($3=="gene") {print $1,$4-1,$5,$10,0,$7}}' | tr -d '";' > gencode.vM23.primary_assembly.annotation.bed
bedtools sort -i gencode.vM23.primary_assembly.annotation.bed > gencode.vM23.primary_assembly.annotation.sorted.bed
bedtools merge -i gencode.vM23.primary_assembly.annotation.sorted.bed -s -c 4 -o collapse > gencode.vM23.primary_assembly.annotation.merged.bed
gunzip GRCm38.primary_assembly.genome.fa.gz 
bedtools getfasta -name -fo gencode.vM23.unspliced.fa -fi GRCm38.primary_assembly.genome.fa -bed gencode.vM23.primary_assembly.annotation.sorted.bed

mv 737K-april-2014_rc.txt 10xv1_whitelist.txt
mv 737K-august-2016.txt 10xv2_whitelist.txt
gunzip 3M-february-2018.txt.gz 
mv 3M-february-2018.txt 10xv3_whitelist.txt 

mv GRCm38.primary_assembly.genome.fa* inputs/
mv gencode.vM23.unspliced.fa inputs/
mv gencode.vM23.primary_assembly.annotation.bed inputs/
mv gencode.vM23.primary_assembly.annotation.sorted.bed inputs/
gunzip gencode.vM23.primary_assembly.annotation.gtf.gz 
mv gencode.vM23.primary_assembly.annotation.gtf inputs/

cd inputs
wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M23/gencode.vM23.transcripts.fa.gz
wget https://www.grnpedia.org/trrust/data/trrust_rawdata.mouse.tsv
gunzip gencode.vM23.transcripts.fa.gz 
wget http://www.informatics.jax.org/downloads/reports/MGI_Gene_Model_Coord.rpt
wget http://www.informatics.jax.org/downloads/reports/MRK_ENSEMBL.rpt
wget http://www.informatics.jax.org/downloads/reports/MRK_Sequence.rpt
cd ..