File: fetch_composer_codm.pl

package info (click to toggle)
libmp3-tag-perl 1.16-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 1,488 kB
  • sloc: perl: 10,139; makefile: 16; sh: 11
file content (188 lines) | stat: -rw-r--r-- 7,204 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#!/usr/bin/perl -w
use strict;

@ARGV == 2 or @ARGV == 1 or die "Usage: $0 Composer_Name [Oxford_CDict_URL]\n";
$ARGV[0] eq 'Beethoven' or die "Only Beethoven supported now...\n";
my $comp = shift;

my $url_get_txt = 'lynx -display_charset=ISO-8859-1 -width=400 -number_links- -nolist -dump';

sub get_url_txt ($) {
  my ($url, $f) = shift;
  local %ENV = %ENV;
  delete $ENV{LYNX_CFG};
  delete $ENV{LYNX_LSS};
  local $ENV{HOME} = '/';
  open $f, "$url_get_txt $url |" or die "open lynx pipe for read: $|";
  $f
}

my $f = get_url_txt 'http://en.wikipedia.org/wiki/List_of_works_by_Beethoven';
#open my $f, '/dev/null';
# XXXX Actually, we "want" writing years, not publication year; need to
#      pull it from some other place
my ($work, $op, $no, $opyears, %opus, %opnums, %op_publ_year);
while (<$f>) {
  $work++ if /^\s*Works having assigned Opus/;
  next unless $work;
  s/\.?\s*$//;
  if (s/^\s*\*\s*(\w+\s.*?):\s*//) {
    $op = $1;
    $op =~ s/\bOpus\b/Op./;
    $no = 0;
    $op_publ_year{$op} = ( s/\s*\(([-\d]+)\)\s*$// ? $1 : '' );
    my $opy = $op_publ_year{$op};
    $opy = " ($opy)" if length $opy;
    $opus{$op} = "$_; $op$opy\n";
  } elsif (s/^\s*\+\s*//) {
    $no++;
    $opus{$op} =~ s/^#*\s*/### /;
    my $opy = $op_publ_year{$op};
    $opy = " ($opy)" if length $opy;
    push @{$opnums{$op}}, "$_; $op, No. $no$opy\n";
  }
}
close $f or die "error closing lynx pipe: $!";

# Get years
my $oxford_url = shift
  || 'http://www.classicalarchives.com/bios/codm/beethoven.html';
$f = get_url_txt $oxford_url;
$work = 0;
my %bywork;

while (<$f>) {
  $work = $1, $bywork{$work} ||= '' if s/^\s*(OPERA|SYMPHONIES|CONCERTOS|ORCHESTRAL|PIANO SONATAS|OTHER PIANO WORKS|CHAMBER MUSIC|CHORAL|SOLO VOICE)\s*(\([^()]+\)\s*)?:\s*//i;
  next unless $work;
  last if /\[Home\]/i;
  $bywork{$work} .= $_;
}
1 while <$f>;
close $f or die "error closing lynx pipe: $!";

my $months_short = q(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec);
my $months_long = q(January|February|March|April|May|June|July|August|September|October|November|December);
my $months_s_rex = qr($months_short)i;
my $months_l_rex = qr($months_long)i;
my $months_days_rex = qr(\b(?:\d{1,2}\s+)?(?:$months_s_rex\b\.?|$months_l_rex\b))i;

my $years_rx1 = qr/(?:$months_days_rex\s+)?\d\d\d\d(?:-\d{1,4})?/;
my $years_rx2 = qr/$years_rx1(?:(?:,|\sand)\s$years_rx1)*/;
# Abbrev: 2consonants, or ob.
my $year_spec_rx = qr/pubd?\.|rev\.|comp\.|arr\.\s(?:by|for|of)(?:\s\w+|\sob\.|\s[b-df-hj-np-tv-z]{1,2}\.)*/;
my $years_rx = qr/(?:$year_spec_rx\s)?$years_rx2(?:,\s(?:$year_spec_rx\s)?$years_rx2)*/;
my $years_fp_rx = qr/$years_rx(?:(?:[,;]|\sand)\s(?:$years_rx|f\.\s*(?:pub\.\s*)?p\.\s[^;]*(?=;|$)))*/;

my $o = 1;
my %iso_month;
$iso_month{lc $_} = $o++ for split /\|/, $months_short;

sub date_to_ISO {		# "Our version" of ISO; use -- instead of /
  my $d = shift;		# Suppose it is matched by $years_rx1
  $d =~ /(?:(?:(\d+)\s+)?($months_short)\w*\.?\s+)?(\d{4})(?:-(\d{1,4}))?/i
    or die "Unrecognized format of date: `$d'";
  my $y = $3;
  $y .= sprintf '-%02d', $iso_month{lc $2} if $2;
  $y .= sprintf '-%02d', $1 if defined $1;
  $y .= ('--' . substr($3, 0, 4 - length $4) . $4) if $4;
  $y
}

open ERR, "> $comp.err" or die "open $comp.err for write: $!";

my %per_opnum;
for my $w (keys %bywork) {
  my $txt = $bywork{$w};
  $txt =~ s/\.?\s*$//;
  $txt =~ s/\s+/ /g;
  my $dot = 1;
  # 1st try: break into sentences ending in date (see Beethoven's Symphonies)
  my @parts = split /(?:(?<=\b\d\d\d\d)|(?<=\b\d\d\d\d-\d)|(?<=\b\d\d\d\d-\d\d)|(?<=\b\d\d\d\d-\d\d\d)|(?<=\b\d\d\d\d-\d\d\d\d))\.\s+/, $txt;
  my $match = qr/^(\?|c\.\s*)?$years_fp_rx$/;
  unless (@parts > 1) {
    # 2nd try: as above, but allow parens before dot; then break via semicolons
    # preceeded by date (see Beethoven's non Symphonies)
    my @p = split /(?<=(?:(?<=\b\d\d\d\d)|(?<=\b\d\d\d\d-\d)|(?<=\b\d\d\d\d-\d\d)|(?<=\b\d\d\d\d-\d\d\d)|(?<=\b\d\d\d\d-\d\d\d\d))\))\.\s+/, $txt;
    @parts = ();
    push @parts, split /(?<=[)\d]);\s+/ for @p;
    $dot = 0;
    $match = qr/^(\?|c\.\s*)?$years_rx$/;
  }
  my($pref, $npref) = '';	# Look for subheader
  # Is beneficial only as in "Op.47, in A major (Kreutzer)", except:
  # Pf. trios: Variations on `Ich bin der Schneider Kakadu', Op.121a (Kakadu)
  # Overtures: Die Weihe des Hauses (Consecration of the House), Op.124
  # Vc. sonatas: Op.102, Nos. 1-2, in C major and D major
  for my $p (@parts) {
    # Is beneficial only as in "Op.47, in A major (Kreutzer)", except:
    # Pf. trios: Variations on `Ich bin der Schneider Kakadu', Op.121a (Kakadu)
    # Overtures: Die Weihe des Hauses (Consecration of the House), Op.124
    # Vc. sonatas: Op.102, Nos. 1-2, in C major and D major
    # String trios (notes):, Vc. Sonatas:, Miscelaneous, str. qts:
    if ($p =~ /^((?:\w+|\w\w\w?\.)(?:\s\w+|\sob\.|\s[b-df-hj-np-tv-z]{1,3}\.)*(?:\s\([^()]+\))?):\s+/i) {
      $npref = $1; $pref = '';
    } else {
      # $p =~ s/^/$pref/ if $pref;
    }
    my $y;
    $p =~ s/,*\s*(?=$years_fp_rx\s*$)/ (/ and $p .= ')' if $dot;
    my $txt = $p;
    $txt =~ s/\s\(([^()]+)\)\s*$// and $y = $1;
    # Explanation: == can't find year; ##  Duplicate Op; plain: !unique Op+year
    print(ERR "==### $w // $pref\n$p\n"), next unless $y and $y =~ /\b\d\d\d\d\b/;
    my @opn = ($txt =~ /\b(?:Op\.\s*|(?=WoO\b))((?:WoO\.?\s*)?\d+[a-d]?)(?:[,\s]|$)/);
    print(ERR "###$w // $pref\n$p\n"), next
      unless $y =~ /$match/ and @opn == 1 and $txt !~ /\b\d\d\d\d\b/;
    if ($per_opnum{$opn[0]}) {
      print(ERR "#####$per_opnum{$opn[0]}[3] // $per_opnum{$opn[0]}[4]\n$per_opnum{$opn[0]}[2]\n")
	if @{$per_opnum{$opn[0]}};
      $per_opnum{$opn[0]} = [];
      print(ERR "#####$w // $pref\n$p\n"), next
    }
    $y =~ s/($years_rx1)/date_to_ISO $1/ge;
    $per_opnum{$opn[0]} = [$y, $txt, $p, $w, $pref];
    ($pref, $npref) = ($npref, '') if $npref;
    #print "@opn // $y // $txt\n";
  }
}
close ERR or die "close $comp.err for write: $!";

sub alignnums ($) {
  my $s = shift;
  $s =~ s/(\d+)/ sprintf '%029d', $1/ge;
  $s
}

open COMP, "> $comp.wiki" or die "open $comp.wiki for write: $!";
for (sort {alignnums($a) cmp alignnums $b} keys %opus) {
  print COMP $opus{$_};
  print COMP for @{$opnums{$_}};
}
close COMP or die "close $comp.wiki for write: $!";

open COMP, "> $comp.codm" or die "open $comp.codm for write: $!";
for (sort {alignnums($a) cmp alignnums $b} keys %per_opnum) {
  next unless @{$per_opnum{$_}};
  print COMP <<EOP
### $per_opnum{$_}[3] // $per_opnum{$_}[4]
Op. $_ // $per_opnum{$_}[0] // $per_opnum{$_}[1]
EOP
}
close COMP or die "close $comp.codm for write: $!";

open DIFF, "> $comp.diffs" or die "open $comp.diffs for write: $!";
for (sort {alignnums($a) cmp alignnums $b} keys %per_opnum) {
  (my $op = $_) =~ s/^(\d+)/Op. $1/;
  next unless @{$per_opnum{$_}}
    and (defined $op_publ_year{$op}
	 and $per_opnum{$_}[0] ne $op_publ_year{$op}
	 and -1 == index $per_opnum{$_}[0], $op_publ_year{$op});
  print DIFF "##$opus{$op}";
  print DIFF "##### $_" for @{$opnums{$op}};

  print DIFF <<EOP
### $per_opnum{$_}[3] // $per_opnum{$_}[4]
Op. $_ // $op_publ_year{$op} // $per_opnum{$_}[0] // $per_opnum{$_}[1]
EOP
}
close DIFF or die "close $comp.diffs for write: $!";