From 7f7df0c65dc086b1a6ba1312d203241bf44f5983 Mon Sep 17 00:00:00 2001
From: Sting <loic.allegre@ensiie.fr>
Date: Thu, 13 Jul 2023 12:51:16 +0200
Subject: [PATCH] Use phase deviation + energy + CNN

---
 cnn_madmom/segment.py | 69 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 55 insertions(+), 14 deletions(-)

diff --git a/cnn_madmom/segment.py b/cnn_madmom/segment.py
index fdab858..40034d3 100644
--- a/cnn_madmom/segment.py
+++ b/cnn_madmom/segment.py
@@ -7,17 +7,39 @@ from scipy.ndimage.filters import maximum_filter
 
 def segment(songfile):
 
-    delay = -4
-    smooth = 20
-    threshold = 1
+    delay = 0
 
     cnn = madmom.features.onsets.CNNOnsetProcessor()
-    #proc = madmom.features.onsets.OnsetPeakPickingProcessor(threshold=threshold, smooth=smooth, delay=delay, fps=100)
+    spectral = madmom.features.onsets.SpectralOnsetProcessor('complex_domain')
 
-    activation_function = cnn(songfile, num_channels=1)
+
+    spec = spec = madmom.audio.spectrogram.Spectrogram(songfile, num_channels=1)
+    filt_spec = madmom.audio.spectrogram.FilteredSpectrogram(spec, filterbank=madmom.audio.filters.LogFilterbank, num_bands=24)
+    log_spec = madmom.audio.spectrogram.LogarithmicSpectrogram(filt_spec, add=1)
+
+    cnn_function = cnn(songfile, num_channels=1)
+    spectral_function = spectral(songfile, num_channels=1)
+    spectral_function = spectral_function/(spectral_function.max())
+    
+    activation_function = 0.5*cnn_function + 0.5*spectral_function
+    #activation_function = (2 * cnn_function * spectral_function)/(cnn_function + spectral_function)
+    #activation_function = np.where(spectral_function > 0.14, cnn_function, 0)
     #onsets = proc(activation_function)
+    
+    activation_smoothed = madmom.audio.signal.smooth(activation_function, 20)
+    cnn_smoothed = madmom.audio.signal.smooth(cnn_function, 20)
+    onsets = madmom.features.onsets.peak_picking(activation_smoothed, threshold=1, smooth=0)
+    onsets = np.array([o for o in onsets if cnn_smoothed[o] > 0.2])
+
+
+    # Backtrack onsets to closest earlier local minimum
+    backtrack_max_frames = 50
+    for i in range(len(onsets)):
+        initial_onset = onsets[i]
+        while(activation_smoothed[onsets[i] - 1] < activation_smoothed[onsets[i]] and onsets[i] > initial_onset - backtrack_max_frames):
+            onsets[i] -= 1
 
-    onsets = (madmom.features.onsets.peak_picking(activation_function, threshold, smooth=smooth) + delay)/100
+    onsets = (onsets + delay)/100
 
     print(onsets)
 
@@ -29,22 +51,41 @@ if __name__ == "__main__":
     songfile = sys.argv[1]
 
     cnn = madmom.features.onsets.CNNOnsetProcessor()
+    spectral = madmom.features.onsets.SpectralOnsetProcessor('complex_domain')
 
-    spec = spec = madmom.audio.spectrogram.Spectrogram(songfile, num_channels=1)
-    log_spec = madmom.audio.spectrogram.LogarithmicSpectrogram(spec, add=1)
 
-    activation_function = cnn(songfile, num_channels=1)
+    spec = spec = madmom.audio.spectrogram.Spectrogram(songfile, num_channels=1)
+    filt_spec = madmom.audio.spectrogram.FilteredSpectrogram(spec, filterbank=madmom.audio.filters.LogFilterbank, num_bands=24)
+    log_spec = madmom.audio.spectrogram.LogarithmicSpectrogram(filt_spec, add=1)
+
+    cnn_function = cnn(songfile, num_channels=1)
+    spectral_function = spectral(songfile, num_channels=1)
+    spectral_function = spectral_function/(spectral_function.max())
+    
+    activation_function = 0.5*cnn_function + 0.5*spectral_function
+    #activation_function = (2 * cnn_function * spectral_function)/(cnn_function + spectral_function)
+    #activation_function = np.where(spectral_function > 0.14, cnn_function, 0)
     #onsets = proc(activation_function)
-
-    onsets = madmom.features.onsets.peak_picking(activation_function, threshold=1, smooth=20)
-    #onset_smoothed = madmom.audio.signal.smooth(activation_function, 20)
+    
+    activation_smoothed = madmom.audio.signal.smooth(activation_function, 20)
+    cnn_smoothed = madmom.audio.signal.smooth(cnn_function, 20)
+    onsets = madmom.features.onsets.peak_picking(activation_smoothed, threshold=1, smooth=0)
+    onsets = np.array([o for o in onsets if cnn_smoothed[o] > 0.2])
+    
+    # Backtrack onsets to closest earlier local minimum
+    backtrack_max_frames = 50
+    for i in range(len(onsets)):
+        initial_onset = onsets[i]
+        while(activation_smoothed[onsets[i] - 1] < activation_smoothed[onsets[i]] and onsets[i] > initial_onset - backtrack_max_frames):
+            onsets[i] -= 1
 
     print(onsets/100)
 
     fig, axs = plt.subplots(nrows=2, sharex=True)
     axs[0].imshow(log_spec.T, origin='lower', aspect='auto')
-    axs[1].plot(activation_function)
-    #axs[1].plot(onset_smoothed, color='red')
+    axs[1].plot(cnn_smoothed)
+    axs[1].plot(spectral_function, color='green')
+    axs[1].plot(activation_smoothed, color='pink')
     axs[1].vlines(onsets, 0, 1, colors='red')
 
     plt.show()
-- 
GitLab