File: eval_pitch

package info (click to toggle)
aubio 0.4.9-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,720 kB
  • sloc: python: 20,447; ansic: 20,127; makefile: 348; sh: 232
file content (143 lines) | stat: -rwxr-xr-x 5,654 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#! /usr/bin/env python

"""
Script to evaluate pitch algorithms against TONAS database.

See http://mtg.upf.edu/download/datasets/tonas/

Example run:

    $ ./eval_pitch /path/to/TONAS/*/*.wav
    OK:  94.74% vx r:  96.87% vx f:  15.83% f0:  96.02% %12:   0.50% /path/to/TONAS/Deblas/01-D_AMairena.wav
    OK:  89.89% vx r:  93.21% vx f:  13.81% f0:  90.74% %12:   1.51% /path/to/TONAS/Deblas/02-D_ChanoLobato.wav
    OK:  96.02% vx r:  96.73% vx f:  10.91% f0:  96.42% %12:   0.00% /path/to/TONAS/Deblas/03-D_Chocolate.wav
    [...]
    OK:  82.35% vx r:  95.52% vx f:  67.09% f0:  89.80% %12:   0.95% /path/to/TONAS/Martinetes2/80-M2_Rancapinos.wav
    OK:  61.97% vx r:  85.71% vx f:  22.03% f0:  55.63% %12:   8.57% /path/to/TONAS/Martinetes2/81-M2_SDonday.wav
    OK:  75.26% vx r:  91.63% vx f:  27.27% f0:  75.99% %12:   5.05% /path/to/TONAS/Martinetes2/82-M2_TiaAnicalaPiriniaca.wav
    OK:  82.77% vx r:  92.74% vx f:  38.27% f0:  87.33% %12:   1.67% 69 files, total_length: 1177.69s, total runtime: 25.91s


"""

import sys
import time
import os.path
import numpy
from .utils import array_from_text_file, array_from_yaml_file
from aubio import source, pitch, freqtomidi

start = time.time()

freq_tol = .50 # more or less half a tone

methods = ["default", "yinfft", "mcomb", "yin", "fcomb", "schmitt", "specacf"]
method = methods[0]

downsample = 1
tolerance =  0.35
silence = -40.
skip = 1
if method in ["yinfft", "default"]:
    downsample = 1
    tolerance = 0.45
elif method == "mcomb":
    downsample = 4
elif method == "yin":
    downsample = 4
    tolerance = 0.2

samplerate = 44100 / downsample
hop_s = 512 / downsample
win_s = 2048 / downsample

def get_pitches (filename, samplerate = samplerate, win_s = win_s, hop_s = hop_s):
    s = source(filename, samplerate, hop_s)
    samplerate = s.samplerate

    p = pitch(method, win_s, hop_s, samplerate)
    p.set_unit("freq")
    p.set_tolerance(tolerance)
    p.set_silence(silence)

    # list of pitches, in samples
    pitches = []

    # total number of frames read
    total_frames = 0
    while True:
        samples, read = s()
        new_pitch = p(samples)[0]
        pitches.append([total_frames/float(samplerate), new_pitch])
        total_frames += read
        if read < hop_s: break
    return numpy.array(pitches)

total_correct_f0, total_correct_sil, total_missed, total_incorrect, total_fp, total_total = 0, 0, 0, 0, 0, 0
total_correct_chroma, total_voiced = 0, 0
for source_file in sys.argv[1:]:
    ground_truth_file = source_file.replace('.wav', '.f0.Corrected')
    if os.path.isfile(ground_truth_file):
        ground_truth = array_from_text_file(ground_truth_file)[:,[0,2]]
        experiment = get_pitches(source_file)
        # check that we have the same length, more or less one frame
        assert abs(len(ground_truth) - len(experiment)) < 2
        # align experiment by skipping first results
        experiment = experiment[skip:]
        experiment[:,0] -= experiment[0,0]
        # trim to shortest list
        maxlen = min(len(ground_truth), len(experiment))
        experiment = experiment[:maxlen]
        ground_truth = ground_truth[:maxlen]
        # get difference matrix
        diffmat = abs(experiment - ground_truth)
        # make sure we got the timing right
        assert max(diffmat[:,0]) < 10e-4, source_file
        truth_pitches = freqtomidi(ground_truth[:,1])
        exper_pitches = freqtomidi(experiment[:,1])

        total = len(truth_pitches)
        unvoiced = len(truth_pitches[truth_pitches == 0])
        voiced = total - unvoiced
        correct_sil, fp, missed, correct_f0, correct_chroma, incorrect = 0, 0, 0, 0, 0, 0
        for a, b in zip(truth_pitches, exper_pitches):
            if a == 0 and b == 0:
                correct_sil += 1
            elif a == 0 and b != 0:
                fp += 1
            elif a != 0 and b == 0:
                missed += 1
            elif abs(b - a) < freq_tol:
                correct_f0 += 1
            elif abs(b - a) % 12. < freq_tol:
                correct_chroma += 1
            else:
                incorrect += 1
        assert correct_sil + fp + missed + correct_f0 + correct_chroma + incorrect == total
        assert unvoiced == correct_sil + fp
        assert voiced == missed + correct_f0 + correct_chroma + incorrect
        print "OK: %6s%%" % ("%.2f" % (100. * (correct_f0 + correct_sil) / total )),
        print "vx r: %6s%%" % ("%.2f" % (100. - 100. * missed / voiced)),
        print "vx f: %6s%%" % ("%.2f" % (100. * fp / unvoiced)),
        print "f0: %6s%%" % ("%.2f" % (100. * correct_f0 / voiced)),
        print "%%12: %6s%%" % ("%.2f" % (100. * correct_chroma / voiced)),
        print source_file
        total_correct_sil += correct_sil
        total_correct_f0 += correct_f0
        total_correct_chroma += correct_chroma
        total_missed += missed
        total_incorrect += incorrect
        total_fp += fp
        total_voiced += voiced
        total_total += total
    else:
        print "ERR", "could not find ground_truth_file", ground_truth_file

print "OK: %6s%%" % ("%.2f" % (100. * (total_correct_f0 + total_correct_sil) / total_total )),
print "vx r: %6s%%" % ("%.2f" % (100. - 100. * total_missed / total_voiced)),
print "vx f: %6s%%" % ("%.2f" % (100. * (total_fp) / (total_correct_sil + total_fp))),
print "f0: %6s%%" % ("%.2f" % (100. * total_correct_f0 / total_voiced)),
print "%%12: %6s%%" % ("%.2f" % (100. * total_correct_chroma / total_voiced)),
print "%d files," % len(sys.argv[1:]),
print "total_length: %.2fs," % ((total_total * hop_s) / float(samplerate)),
print "total runtime: %.2fs" % (time.time() - start)