File: merTrim-compare-logs.pl

package info (click to toggle)
canu 1.7.1+dfsg-1~bpo9+1
  • links: PTS, VCS
  • area: main
  • in suites: stretch-backports
  • size: 7,680 kB
  • sloc: cpp: 66,708; perl: 13,682; ansic: 4,020; makefile: 627; sh: 472; python: 39
file content (118 lines) | stat: -rw-r--r-- 3,072 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env perl

###############################################################################
 #
 #  This file is part of canu, a software program that assembles whole-genome
 #  sequencing reads into contigs.
 #
 #  This software is based on:
 #    'Celera Assembler' (http://wgs-assembler.sourceforge.net)
 #    the 'kmer package' (http://kmer.sourceforge.net)
 #  both originally distributed by Applera Corporation under the GNU General
 #  Public License, version 2.
 #
 #  Canu branched from Celera Assembler at its revision 4587.
 #  Canu branched from the kmer project at its revision 1994.
 #
 #  This file is derived from:
 #
 #    src/AS_MER/merTrim-compare-logs.pl
 #
 #  Modifications by:
 #
 #    Brian P. Walenz from 2014-NOV-15 to 2014-DEC-05
 #      are Copyright 2014 Battelle National Biodefense Institute, and
 #      are subject to the BSD 3-Clause License
 #
 #    Brian P. Walenz beginning on 2015-OCT-12
 #      are a 'United States Government Work', and
 #      are released in the public domain
 #
 #  File 'README.licenses' in the root directory of this distribution contains
 #  full conditions and disclaimers for each license.
 ##

use strict;

my $log1 = shift @ARGV;
my $log2 = shift @ARGV;

if (!defined($log1) || !defined($log2)) {
    die "usage: $0 run1.log run2.log\n";
}

open(L1, "< $log1") or die;
open(L2, "< $log1") or die;

my ($a1, $a2, $a3, $a4, $a5, $a6, $a7, $a8, $a9);
my ($b1, $b2, $b3, $b4, $b5, $b6, $b7, $b8, $b9);

while (!eof(L1) && !eof(L2)) {

  anotherA:
    do {
        $a1 = <L1>;  #  FINAL, or "Correct" lines
    } while ($a1 !~ m/^FINAL/);
    $a2 = <L1>;  #  ORI seq
    $a3 = <L1>;  #  COR seq
    $a4 = <L1>;  #  COR qlt
    $a5 = <L1>;  #  COVERAGE
    $a6 = <L1>;  #  CORRECTIONS
    $a7 = <L1>;  #  DISCONNECTION
    $a8 = <L1>;  #  ADAPTER
    $a9 = <L1>;  #  RESULT

    #if ($a1 =~ m/^ADAPTERSEARCH/) {
    #    goto anotherA;
    #}

  anotherB:
    do {
        $b1 = <L2>;  #  FINAL, or "Correct" lines
    } while ($b1 !~ m/^FINAL/);
    $b2 = <L2>;  #  ORI seq
    $b3 = <L2>;  #  COR seq
    $b4 = <L2>;  #  COR qlt
    $b5 = <L2>;  #  COVERAGE
    $b6 = <L2>;  #  CORRECTIONS
    $b7 = <L2>;  #  DISCONNECTION
    $b8 = <L2>;  #  ADAPTER
    $b9 = <L2>;  #  RESULT

    #if ($b1 =~ m/^ADAPTERSEARCH/) {
    #    goto anotherB;
    #}

    my ($aID, $aLen, $aBgn, $aEnd);
    my ($bID, $bLen, $bBgn, $bEnd);

    #  FINAL or ADAPTERSEARCH
    if ($a1 =~ m/^\w+\sread\s(\d+)\slen\s(\d+)\s\(trim\s(\d+)-(\d+)\)$/) {
        $aID  = $1;
        $aLen = $2;
        $aBgn = $3;
        $aEnd = $4;
    } else {
        die "Nope a1 $a1";
    }

    if ($b1 =~ m/^\w+\sread\s(\d+)\slen\s(\d+)\s\(trim\s(\d+)-(\d+)\)$/) {
        $bID  = $1;
        $bLen = $2;
        $bBgn = $3;
        $bEnd = $4;
    } else {
        die "Nope b1 $b1";
    }

    die "ID mismatch $aID $bID\n" if ($aID != $bID);

    if (($aBgn != $bBgn) || ($aEnd != $bEnd)) {
        print "$aID/$bID $aLen/$bLen $aBgn-$aEnd $bBgn-$bEnd\n";
    }

    if (($aID % 10000) == 0) {
        print STDERR "$aID\n";
    }
}