Source code for untwist.analysis.pitch

"""
Pitch detection algorithms
"""
import numpy as np
from scipy import signal, interpolate
from untwist.base.algorithms import Processor


[docs]class ZCR(Processor): """ Zero-crossing rate (can be used as rough pitch estimator or as feature). """ def __init__(self): pass
[docs] def process(self, wave): n_crossings = np.count_nonzero(np.diff(np.sign(wave[:,0]))) return 0.5 * n_crossings / wave.duration
[docs]class HPS(Processor): """ Harmonic product spectrum pitch estimator. Based on implementation in: Lerch, A. (2012). An introduction to audio content analysis: Applications in signal processing and music informatics. John Wiley & Sons. """ def __init__(self, n_harms= 8, min_pitch = 50, max_pitch = 5000): self.n_harms = n_harms self.min_pitch = min_pitch self.max_pitch = max_pitch
[docs] def process(self, X): min_bin = np.round((float(self.min_pitch)/X.sample_rate) * X.shape[0]) max_bin = np.round((float(self.max_pitch)/X.sample_rate) * X.shape[0]) HPS = X.magnitude().copy() for h in range(2, self.n_harms): H = X.magnitude()[0:HPS.shape[0]:h,:] HP = np.zeros_like(HPS) HP[0:H.shape[0],:] = H HPS *= HP pitch_bins = min_bin+np.argmax(HPS[min_bin:max_bin,:],0) return pitch_bins * 0.5 * X.sample_rate / X.shape[0]
[docs]class YINFFT(Processor): """ YINFFT F0 estimation algorithm Brossier, P. M. (2006). Automatic annotation of musical audio for interactive applications (Doctoral dissertation, Queen Mary University of London). Based on Essentia C++ implementation http://essentia.upf.edu """ freq_mask = [0., 20., 25., 31.5, 40., 50., 63., 80., 100., 125., 160.,\ 200., 250., 315., 400., 500., 630., 800., 1000., 1250., 1600., 2000.,\ 2500.,3150., 4000., 5000., 6300., 8000., 9000., 10000.,12500., 15000.,\ 20000., 25100] weight_mask = [-75.8, -70.1, -60.8, -52.1, -44.2, -37.5, -31.3, -25.6,\ -20.9, -16.5, -12.6, -9.6, -7.0, -4.7, -3.0, -1.8, -0.8, -0.2, -0.0,\ 0.5, 1.6, 3.2, 5.4, 7.8, 8.1, 5.3, -2.4, -11.1, -12.8, -12.2, -7.4,\ -17.8, -17.8, -17.8]
[docs] def compute_weights(self): self.weights = np.zeros((self.n_bins, 1)) j = 1 bin_hz = float(self.sample_rate) /(2 * (self.n_bins - 1)) for i in range(self.n_bins): freq = i * bin_hz while(self.freq_mask[j] < freq): j+=1 a0 = self.weight_mask[j-1]; f0 = self.freq_mask[j-1] a1 = self.weight_mask[j];f1 = self.freq_mask[j] if f0 == 0: self.weights[i] = (a1-a0)/f1*freq + a0 else: self.weights[i] = (a1-a0)/(f1-f0)*freq +\ (a0 - (a1 - a0)/(f1/f0 - 1.)); self.weights[i] = np.power(10,self.weights[i] / 10.0)
def __init__(self, n_bins, sample_rate, min_pitch = 100, max_pitch = 5000, interp = True): self.n_bins = n_bins self.sample_rate = sample_rate self.compute_weights() self.tau_max = int(min(np.ceil(float(sample_rate)/ min_pitch),n_bins)) self.tau_min = int(min(np.floor(float(sample_rate)/ max_pitch),n_bins)) self.interp = interp
[docs] def process(self, spectrogram): mult = np.multiply(self.weights, np.ones((1, spectrogram.shape[1]))) square_mag = np.power(spectrogram.magnitude(),2) square_mag = np.vstack((square_mag[1:,:],np.flipud(square_mag[1:,:]))) square_mag_sum = np.sum(square_mag, 0) energy_threshold = np.percentile(square_mag_sum, 25) res = np.fft.rfft(square_mag.T, 2 * (self.n_bins - 1)).T resN = np.abs(res) resP = np.angle(res) yin = np.zeros((spectrogram.shape)) yin[0,:] = 1 tmp = np.zeros((spectrogram.shape[1],)) for tau in range(1, self.n_bins): yin[tau,:] = square_mag_sum - resN[tau,:] * np.cos(resP[tau,:]) tmp += yin[tau,:] yin[tau,:] *= tau/tmp tau = self.tau_min + np.argmin(yin[self.tau_min:self.tau_max,:],0) y_min = np.min(yin[self.tau_min:self.tau_max,:],0) tau = tau[:,np.newaxis] if self.interp: ranges = np.hstack((tau - 5, tau - 4, tau - 3, tau-2,tau-1,tau, tau+1,tau + 2, tau + 3, tau + 4, tau + 5)) new_tau = np.empty_like(tau) for frame in range(spectrogram.shape[1]): r = ranges[frame] r[0] = max(r[0], 0) r[1] = min(r[1], self.n_bins-1) val = yin[r.astype(int), frame] tck = interpolate.splrep(r, val, s=0, k = 2) xnew = np.arange(r[0],r[-1], (r[-1] - r[0]) /10) ynew = interpolate.splev(xnew, tck, der=0) new_tau[frame] = xnew[np.argmin(ynew)] y_min[frame] = np.min(ynew) tau = new_tau tau = tau[:,0] pitch_confidence = 1- y_min pitch = np.zeros((spectrogram.shape[1],)) pitch[tau!=0] = np.nan_to_num(self.sample_rate * 1.0 / (tau[tau!=0])) pitch[tau==0] = 0 pitch_confidence[tau==0] = 0 pitch[square_mag_sum < energy_threshold] = 0 pitch_confidence[square_mag_sum < energy_threshold] = 0 return (pitch, pitch_confidence)