File: import_proteomdata.pl

package info (click to toggle)
arb 6.0.6-8
  • links: PTS, VCS
  • area: non-free
  • in suites: sid, trixie
  • size: 66,204 kB
  • sloc: ansic: 394,911; cpp: 250,290; makefile: 19,644; sh: 15,879; perl: 10,473; fortran: 6,019; ruby: 683; xml: 503; python: 53; awk: 32
file content (105 lines) | stat: -rwxr-xr-x 3,539 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/perl

use strict;
use warnings;

BEGIN {
  if (not exists $ENV{'ARBHOME'}) { die "Environment variable \$ARBHOME has to be defined"; }
  my $arbhome = $ENV{'ARBHOME'};
  push @INC, "$arbhome/lib";
  push @INC, "$arbhome/PERL_SCRIPTS/GENOME";
  1;
}

use ARB;
use GI;

# -----------------------
# configure here:

my $create_ORFs = 0; # 0 -> error if ORF not found; 1 -> auto-create gene
my $verbose     = 0; # 0 -> be quiet; 1 -> be noisy
my $overwrite   = 0; # 0 -> if entry exists -> error; 1 -> silently overwrite entry

# -----------------------


my $importfilename;

{
  my $argcount = $#ARGV + 1;
  if ($argcount == 1) {
    $importfilename = $ARGV[0];
  }
  else {
    print "\nUsage: import_proteomdata.pl datafile\n";
    print "Imports a proteom datafile in CSV format.\n";
    GI::show_csv_info();
    print "The columns in datafile should contain:\n";
    print '"ORF-Name","Substrat","Ausstiche","Mittelw Score","Stabw Score","best hit","x","y"'.
      "\n\n";
    exit(1);
  }
}

GI::connectDB();
GI::define_tokenizer_columns(8); # datafile is expected to contain 8 columns

GI::message("Reading '$importfilename'..");
open(IMPORT,"<$importfilename") || GI::error("Can't read '$importfilename'");
my $lineno = 1;
my @head   = GI::tokenize_columns(<IMPORT>,"$lineno of $importfilename");

my ($gb_genome,$genome_name) = GI::findCurrentGenome();
my $gb_gene_data = ARB::search($gb_genome, "gene_data", "CONTAINER");
if (!$gb_gene_data) {
  my $reason = ARB::await_error();
  GI::error("Couldn't find or create container 'gene_data' for organism '$genome_name' ($reason)");
}
GI::unmarkGenesOfGenome($gb_genome);

GI::message("Importing data to organism '$genome_name' ..");
my $gene_count = 0;

ORF: foreach (<IMPORT>) { # loop over all lines from inputfile
  $lineno++;
  my @elems = GI::tokenize_columns($_,"$lineno of $importfilename");
  my ($orf,$substrate,$spots,$mean_score,$sd_score,$best_hit,$coordx,$coordy) = @elems;

  # find (or create) the orf gene:
  my ($gb_orf, $error) = GI::findORF($gb_gene_data,$genome_name,$orf,$create_ORFs,$verbose);
  if (!$error) {
    my $substrate_field = "proteome/$substrate";
    my $gb_substrate = ARB::search($gb_orf, $substrate_field, "NONE");

    if (!$gb_substrate) {
      $gb_substrate = ARB::search($gb_orf, $substrate_field, "CONTAINER");
    }

    if (!$gb_substrate) {
      my $reason = ARB::await_error();
      $error = "Could not create container '$substrate_field' ($reason)";
    }
    else {
      $error = GI::write_entry($gb_substrate, "spots", "STRING", $spots, $overwrite, $verbose);
      if (!$error) { $error = GI::write_entry($gb_substrate, "score", "INT",    $mean_score, $overwrite, $verbose); }
      if (!$error) { $error = GI::write_entry($gb_substrate, "sd",    "INT",    $sd_score, $overwrite, $verbose); }
      if (!$error) { $error = GI::write_entry($gb_substrate, "id",    "STRING", $best_hit, $overwrite, $verbose); }
      if (!$error) { $error = GI::write_entry($gb_substrate, "coordx","INT",    $coordx, $overwrite, $verbose); }
      if (!$error) { $error = GI::write_entry($gb_substrate, "coordy","INT",    $coordy, $overwrite, $verbose); }
      if (!$error) {
        my $marked = ARB::read_flag($gb_orf);
        if ($marked == 0) {
          ARB::write_flag($gb_orf,1); # mark changed genes
          $gene_count++;
        }
      }
    }
  }

  if ($error) { GI::error("$error (while parsing $lineno of $importfilename)"); }
}
close IMPORT;
GI::message("$gene_count genes modified and marked.");

GI::disconnectDB();