Source code for untwist.analysis.pitch

"""
Pitch detection algorithms
"""
import numpy as np
from scipy import signal, interpolate
from untwist.base.algorithms import Processor


[docs]class ZCR(Processor):
    """
    Zero-crossing rate (can be used as rough pitch estimator or as feature).
    """
    def __init__(self):
        pass
        
[docs]    def process(self, wave):
        n_crossings = np.count_nonzero(np.diff(np.sign(wave[:,0])))
        return 0.5 * n_crossings / wave.duration

[docs]class HPS(Processor):
    """
    Harmonic product spectrum pitch estimator.
    Based on implementation in:
    Lerch, A. (2012). An introduction to audio content analysis: Applications in signal processing and music informatics. John Wiley & Sons.
    """
    def __init__(self, n_harms= 8,  min_pitch = 50, max_pitch = 5000):
        self.n_harms = n_harms
        self.min_pitch = min_pitch
        self.max_pitch = max_pitch


[docs]    def process(self, X):
        min_bin = np.round((float(self.min_pitch)/X.sample_rate) * X.shape[0])
        max_bin = np.round((float(self.max_pitch)/X.sample_rate) * X.shape[0])

        HPS = X.magnitude().copy()
        for h in range(2, self.n_harms):
            H = X.magnitude()[0:HPS.shape[0]:h,:]
            HP = np.zeros_like(HPS)
            HP[0:H.shape[0],:] = H
            HPS *= HP
        pitch_bins = min_bin+np.argmax(HPS[min_bin:max_bin,:],0)
        return pitch_bins * 0.5 * X.sample_rate / X.shape[0]
        


[docs]class YINFFT(Processor):
    """
    YINFFT F0 estimation algorithm
    Brossier, P. M. (2006). Automatic annotation of musical audio for interactive applications 
    (Doctoral dissertation, Queen Mary University of London).
    Based on Essentia C++ implementation http://essentia.upf.edu
    """
    freq_mask = [0., 20., 25., 31.5, 40., 50., 63., 80., 100., 125., 160.,\
    200., 250., 315., 400., 500., 630., 800., 1000., 1250., 1600., 2000.,\
    2500.,3150., 4000., 5000., 6300., 8000., 9000., 10000.,12500., 15000.,\
     20000., 25100]

    weight_mask = [-75.8, -70.1, -60.8, -52.1, -44.2, -37.5, -31.3, -25.6,\
     -20.9, -16.5, -12.6, -9.6, -7.0, -4.7, -3.0, -1.8, -0.8, -0.2, -0.0,\
     0.5, 1.6, 3.2, 5.4, 7.8, 8.1, 5.3, -2.4, -11.1, -12.8, -12.2, -7.4,\
     -17.8, -17.8, -17.8]
    
[docs]    def compute_weights(self):
        self.weights = np.zeros((self.n_bins, 1))
        j = 1
        bin_hz = float(self.sample_rate) /(2 * (self.n_bins - 1))
        for i in range(self.n_bins):
            freq = i * bin_hz
            while(self.freq_mask[j] < freq): j+=1
            a0 = self.weight_mask[j-1]; f0 = self.freq_mask[j-1]
            a1 = self.weight_mask[j];f1 = self.freq_mask[j]
            if f0 == 0:
                self.weights[i] = (a1-a0)/f1*freq + a0
            else:
                self.weights[i] = (a1-a0)/(f1-f0)*freq +\
                (a0 - (a1 - a0)/(f1/f0 - 1.));
            self.weights[i] = np.power(10,self.weights[i] / 10.0)

    def __init__(self, n_bins, sample_rate, min_pitch = 100, max_pitch = 5000, interp = True):
        self.n_bins = n_bins
        self.sample_rate = sample_rate
        self.compute_weights()
        self.tau_max = int(min(np.ceil(float(sample_rate)/ min_pitch),n_bins))
        self.tau_min = int(min(np.floor(float(sample_rate)/ max_pitch),n_bins))
        self.interp = interp
        

[docs]    def process(self, spectrogram):
        mult = np.multiply(self.weights, np.ones((1, spectrogram.shape[1])))
        square_mag = np.power(spectrogram.magnitude(),2)
        square_mag = np.vstack((square_mag[1:,:],np.flipud(square_mag[1:,:])))
        square_mag_sum = np.sum(square_mag, 0)
        energy_threshold = np.percentile(square_mag_sum, 25)
        res = np.fft.rfft(square_mag.T, 2 * (self.n_bins - 1)).T
        resN = np.abs(res)
        resP = np.angle(res)
        yin = np.zeros((spectrogram.shape))
        yin[0,:] = 1
        tmp = np.zeros((spectrogram.shape[1],))
        for tau in range(1, self.n_bins):
            yin[tau,:] = square_mag_sum - resN[tau,:] * np.cos(resP[tau,:])
            tmp += yin[tau,:]
            yin[tau,:] *= tau/tmp

        tau = self.tau_min + np.argmin(yin[self.tau_min:self.tau_max,:],0)
        y_min = np.min(yin[self.tau_min:self.tau_max,:],0)
        tau = tau[:,np.newaxis]
        if self.interp:
            ranges = np.hstack((tau - 5, tau - 4, tau - 3, tau-2,tau-1,tau, 
                tau+1,tau + 2, tau + 3, tau + 4, tau + 5))
            new_tau = np.empty_like(tau)
            for frame in range(spectrogram.shape[1]):                    
                r = ranges[frame]
                r[0] = max(r[0], 0)
                r[1] = min(r[1], self.n_bins-1)
                val = yin[r.astype(int), frame]
                tck = interpolate.splrep(r, val, s=0, k = 2)
                xnew = np.arange(r[0],r[-1], (r[-1] - r[0]) /10)
                ynew = interpolate.splev(xnew, tck, der=0)
                new_tau[frame] = xnew[np.argmin(ynew)]
                y_min[frame] = np.min(ynew)            
            tau = new_tau
        tau = tau[:,0]
        pitch_confidence = 1- y_min
        pitch = np.zeros((spectrogram.shape[1],))
        pitch[tau!=0] = np.nan_to_num(self.sample_rate * 1.0 / (tau[tau!=0]))
        pitch[tau==0] = 0
        pitch_confidence[tau==0] = 0
    
        pitch[square_mag_sum < energy_threshold] = 0
        pitch_confidence[square_mag_sum < energy_threshold] = 0
        return (pitch, pitch_confidence)