diff --git a/cnn_madmom/segment.py b/cnn_madmom/segment.py index c3acd9e16e4c6d983564309d6f293334ce898df0..1c49adcadfbf2dbcdd34a5f70f4a3f11f69302f1 100644 --- a/cnn_madmom/segment.py +++ b/cnn_madmom/segment.py @@ -7,14 +7,42 @@ from scipy.ndimage.filters import maximum_filter def segment(songfile): + delay = -4 + smooth = 20 + threshold = 0.4 + cnn = madmom.features.onsets.CNNOnsetProcessor() - proc = madmom.features.onsets.OnsetPeakPickingProcessor(threshold=0.4, smooth=5, delay=0, fps=100) + #proc = madmom.features.onsets.OnsetPeakPickingProcessor(threshold=threshold, smooth=smooth, delay=delay, fps=100) activation_function = cnn(songfile, num_channels=1) #onsets = proc(activation_function) - onsets = madmom.features.onsets.peak_picking(activation_function, 0.4, smooth=10)/100 + onsets = (madmom.features.onsets.peak_picking(activation_function, threshold, smooth=smooth) + delay)/100 print(onsets) - return onsets \ No newline at end of file + return onsets + + + +if __name__ == "__main__": + songfile = sys.argv[1] + + cnn = madmom.features.onsets.CNNOnsetProcessor() + + spec = spec = madmom.audio.spectrogram.Spectrogram(songfile, num_channels=1) + log_spec = madmom.audio.spectrogram.LogarithmicSpectrogram(spec, add=1) + + activation_function = cnn(songfile, num_channels=1) + #onsets = proc(activation_function) + + onsets = madmom.features.onsets.peak_picking(activation_function, 0.4, smooth=20) + + print(onsets/100) + + fig, axs = plt.subplots(nrows=2, sharex=True) + axs[0].imshow(log_spec.T, origin='lower', aspect='auto') + axs[1].plot(activation_function) + axs[1].vlines(onsets, 0, 1, colors='red') + + plt.show()