From 7f7df0c65dc086b1a6ba1312d203241bf44f5983 Mon Sep 17 00:00:00 2001 From: Sting <loic.allegre@ensiie.fr> Date: Thu, 13 Jul 2023 12:51:16 +0200 Subject: [PATCH] Use phase deviation + energy + CNN --- cnn_madmom/segment.py | 69 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 14 deletions(-) diff --git a/cnn_madmom/segment.py b/cnn_madmom/segment.py index fdab858..40034d3 100644 --- a/cnn_madmom/segment.py +++ b/cnn_madmom/segment.py @@ -7,17 +7,39 @@ from scipy.ndimage.filters import maximum_filter def segment(songfile): - delay = -4 - smooth = 20 - threshold = 1 + delay = 0 cnn = madmom.features.onsets.CNNOnsetProcessor() - #proc = madmom.features.onsets.OnsetPeakPickingProcessor(threshold=threshold, smooth=smooth, delay=delay, fps=100) + spectral = madmom.features.onsets.SpectralOnsetProcessor('complex_domain') - activation_function = cnn(songfile, num_channels=1) + + spec = spec = madmom.audio.spectrogram.Spectrogram(songfile, num_channels=1) + filt_spec = madmom.audio.spectrogram.FilteredSpectrogram(spec, filterbank=madmom.audio.filters.LogFilterbank, num_bands=24) + log_spec = madmom.audio.spectrogram.LogarithmicSpectrogram(filt_spec, add=1) + + cnn_function = cnn(songfile, num_channels=1) + spectral_function = spectral(songfile, num_channels=1) + spectral_function = spectral_function/(spectral_function.max()) + + activation_function = 0.5*cnn_function + 0.5*spectral_function + #activation_function = (2 * cnn_function * spectral_function)/(cnn_function + spectral_function) + #activation_function = np.where(spectral_function > 0.14, cnn_function, 0) #onsets = proc(activation_function) + + activation_smoothed = madmom.audio.signal.smooth(activation_function, 20) + cnn_smoothed = madmom.audio.signal.smooth(cnn_function, 20) + onsets = madmom.features.onsets.peak_picking(activation_smoothed, threshold=1, smooth=0) + onsets = np.array([o for o in onsets if cnn_smoothed[o] > 0.2]) + + + # Backtrack onsets to closest earlier local minimum + backtrack_max_frames = 50 + for i in range(len(onsets)): + initial_onset = onsets[i] + while(activation_smoothed[onsets[i] - 1] < activation_smoothed[onsets[i]] and onsets[i] > initial_onset - backtrack_max_frames): + onsets[i] -= 1 - onsets = (madmom.features.onsets.peak_picking(activation_function, threshold, smooth=smooth) + delay)/100 + onsets = (onsets + delay)/100 print(onsets) @@ -29,22 +51,41 @@ if __name__ == "__main__": songfile = sys.argv[1] cnn = madmom.features.onsets.CNNOnsetProcessor() + spectral = madmom.features.onsets.SpectralOnsetProcessor('complex_domain') - spec = spec = madmom.audio.spectrogram.Spectrogram(songfile, num_channels=1) - log_spec = madmom.audio.spectrogram.LogarithmicSpectrogram(spec, add=1) - activation_function = cnn(songfile, num_channels=1) + spec = spec = madmom.audio.spectrogram.Spectrogram(songfile, num_channels=1) + filt_spec = madmom.audio.spectrogram.FilteredSpectrogram(spec, filterbank=madmom.audio.filters.LogFilterbank, num_bands=24) + log_spec = madmom.audio.spectrogram.LogarithmicSpectrogram(filt_spec, add=1) + + cnn_function = cnn(songfile, num_channels=1) + spectral_function = spectral(songfile, num_channels=1) + spectral_function = spectral_function/(spectral_function.max()) + + activation_function = 0.5*cnn_function + 0.5*spectral_function + #activation_function = (2 * cnn_function * spectral_function)/(cnn_function + spectral_function) + #activation_function = np.where(spectral_function > 0.14, cnn_function, 0) #onsets = proc(activation_function) - - onsets = madmom.features.onsets.peak_picking(activation_function, threshold=1, smooth=20) - #onset_smoothed = madmom.audio.signal.smooth(activation_function, 20) + + activation_smoothed = madmom.audio.signal.smooth(activation_function, 20) + cnn_smoothed = madmom.audio.signal.smooth(cnn_function, 20) + onsets = madmom.features.onsets.peak_picking(activation_smoothed, threshold=1, smooth=0) + onsets = np.array([o for o in onsets if cnn_smoothed[o] > 0.2]) + + # Backtrack onsets to closest earlier local minimum + backtrack_max_frames = 50 + for i in range(len(onsets)): + initial_onset = onsets[i] + while(activation_smoothed[onsets[i] - 1] < activation_smoothed[onsets[i]] and onsets[i] > initial_onset - backtrack_max_frames): + onsets[i] -= 1 print(onsets/100) fig, axs = plt.subplots(nrows=2, sharex=True) axs[0].imshow(log_spec.T, origin='lower', aspect='auto') - axs[1].plot(activation_function) - #axs[1].plot(onset_smoothed, color='red') + axs[1].plot(cnn_smoothed) + axs[1].plot(spectral_function, color='green') + axs[1].plot(activation_smoothed, color='pink') axs[1].vlines(onsets, 0, 1, colors='red') plt.show() -- GitLab