File: cg-sort

package info (click to toggle)
cg3 1.5.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,684 kB
  • sloc: cpp: 26,476; xml: 6,139; perl: 1,398; lisp: 1,091; ansic: 178; sh: 47; python: 26; makefile: 14
file content (127 lines) | stat: -rwxr-xr-x 2,365 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/perl
# -*- mode: cperl; indent-tabs-mode: nil; tab-width: 3; cperl-indent-level: 3; -*-
use warnings;
use strict;
use utf8;
BEGIN {
   $| = 1;
   binmode(STDIN, ':encoding(UTF-8)');
   binmode(STDOUT, ':encoding(UTF-8)');
}
use open qw( :encoding(UTF-8) :std );

use Getopt::Long;
Getopt::Long::Configure('bundling');
Getopt::Long::Configure('no_ignore_case');
my %opts = ();
GetOptions(\%opts, ('weight|w:s', 'reverse|r', 'first|1', 'help|?'));

sub print_help {
   print <<'XOUT';
Usage: cg-sort [OPTIONS]

Pipe a CG stream through this to sort and unique the readings of each cohort.

Options:
 -?, --help       outputs this help
 -w, --weight     sorts by a numeric tag; defaults to W
 -r, --reverse    reverses the sort order
 -1, --first      only keep the first reading

XOUT
}

if (defined $opts{'help'}) {
   print_help();
   exit(0);
}

my $W = 'W';
if (exists($opts{weight}) && length($opts{weight})) {
   $W = $opts{weight};
}

my $in_cohort = 0;
my %readings = ();
my %deleted = ();
my $trail = '';

sub sort_weight {
   my $wa = 0.0;
   my $wb = 0.0;
   if ($a =~ m/(?:^|\s)<\Q$W\E:([\d.]+)>(?:\s|$)/ || $a =~ m/(?:^|\s)\Q$W\E:([\d.]+)(?:\s|$)/) {
      $wa = $1;
   }
   if ($b =~ m/(?:^|\s)<\Q$W\E:([\d.]+)>(?:\s|$)/ || $b =~ m/(?:^|\s)\Q$W\E:([\d.]+)(?:\s|$)/) {
      $wb = $1;
   }

   if ($wa == $wb) {
      return $a cmp $b;
   }
   return $wa <=> $wb;
}

sub print_sorted_readings {
   if (!@_) {
      return;
   }
   if (exists($opts{weight})) {
      @_ = sort sort_weight @_;
   }
   else {
      @_ = sort @_;
   }
   if (exists($opts{reverse})) {
      @_ = reverse @_;
   }
   if (exists($opts{first})) {
      print $_[0];
   }
   else {
      print join('', @_);
   }
}

sub print_sorted {
   if (!$in_cohort) {
      return;
   }

   print_sorted_readings(keys(%readings));
   print_sorted_readings(keys(%deleted));

   print $trail;

   %readings = ();
   %deleted = ();
   $trail = '';
   $in_cohort = 0;
}

my $fh = *STDIN;
if (defined $ARGV[0]) {
   open($fh, '<', $ARGV[0]) or die $!;
}

while (<$fh>) {
   if (m/^"<.+?>"/) {
      print_sorted();
      $in_cohort = 1;
   }
   elsif ($in_cohort) {
      if (m/^\s+".+?"/) {
         $readings{$_} = 1;
      }
      elsif (m/^;\s+".+?"/) {
         $deleted{$_} = 1;
      }
      else {
         $trail .= $_;
      }
      next;
   }
   print;
}

print_sorted();