Skip to content
Extraits de code Groupes Projets

CNN + Spectral segmentation

Fusionnées Sting a demandé de fusionner madmom vers master
6 fichiers
+ 131
50
Comparer les modifications
  • Côte à côte
  • En ligne
Fichiers
6
+ 96
0
import madmom
import numpy as np
import sys
import matplotlib.pyplot as plt
from scipy.ndimage.filters import maximum_filter
def segment(songfile):
delay = -4
backtrack = False
cnn = madmom.features.onsets.CNNOnsetProcessor()
spectral = madmom.features.onsets.SpectralOnsetProcessor('complex_domain')
spec = spec = madmom.audio.spectrogram.Spectrogram(songfile, num_channels=1)
filt_spec = madmom.audio.spectrogram.FilteredSpectrogram(spec, filterbank=madmom.audio.filters.LogFilterbank, num_bands=24)
log_spec = madmom.audio.spectrogram.LogarithmicSpectrogram(filt_spec, add=1)
cnn_function = cnn(songfile, num_channels=1)
spectral_function = spectral(songfile, num_channels=1)
spectral_function = spectral_function/(spectral_function.max())
#activation_function = 0.5*cnn_function + 0.5*spectral_function
activation_function = (2 * cnn_function * spectral_function)/(cnn_function + spectral_function)
#activation_function = np.where(spectral_function > 0.14, cnn_function, 0)
#onsets = proc(activation_function)
activation_smoothed = madmom.audio.signal.smooth(activation_function, 20)
cnn_smoothed = madmom.audio.signal.smooth(cnn_function, 20)
onsets = madmom.features.onsets.peak_picking(activation_smoothed, threshold=0.6, smooth=0)
onsets = np.array([o for o in onsets if cnn_smoothed[o] > 0.2])
if backtrack:
# Backtrack onsets to closest earlier local minimum
backtrack_max_frames = 50
for i in range(len(onsets)):
initial_onset = onsets[i]
while(activation_smoothed[onsets[i] - 1] < activation_smoothed[onsets[i]] and onsets[i] > initial_onset - backtrack_max_frames):
onsets[i] -= 1
onsets = (onsets + delay)/100
print(onsets)
return onsets
if __name__ == "__main__":
songfile = sys.argv[1]
backtrack = False
cnn = madmom.features.onsets.CNNOnsetProcessor()
spectral = madmom.features.onsets.SpectralOnsetProcessor('modified_kullback_leibler')
spec = spec = madmom.audio.spectrogram.Spectrogram(songfile, num_channels=1)
filt_spec = madmom.audio.spectrogram.FilteredSpectrogram(spec, filterbank=madmom.audio.filters.LogFilterbank, num_bands=24)
log_spec = madmom.audio.spectrogram.LogarithmicSpectrogram(filt_spec, add=1)
cnn_function = cnn(songfile, num_channels=1)
spectral_function = spectral(songfile, num_channels=1)
spectral_function = spectral_function/(spectral_function.max())
#activation_function = 0.5*cnn_function + 0.5*spectral_function
activation_function = (2 * cnn_function * spectral_function)/(cnn_function + spectral_function)
#activation_function = np.where(spectral_function > 0.14, cnn_function, 0)
#onsets = proc(activation_function)
activation_smoothed = madmom.audio.signal.smooth(activation_function, 20)
cnn_smoothed = madmom.audio.signal.smooth(cnn_function, 20)
onsets = madmom.features.onsets.peak_picking(activation_smoothed, threshold=0.6, smooth=0)
onsets = np.array([o for o in onsets if cnn_smoothed[o] > 0.2])
# Backtrack onsets to closest earlier local minimum
if backtrack:
backtrack_max_frames = 50
for i in range(len(onsets)):
initial_onset = onsets[i]
while(activation_smoothed[onsets[i] - 1] < activation_smoothed[onsets[i]] and onsets[i] > initial_onset - backtrack_max_frames):
onsets[i] -= 1
print(onsets/100)
fig, axs = plt.subplots(nrows=2, sharex=True)
axs[0].imshow(log_spec.T, origin='lower', aspect='auto')
axs[1].plot(cnn_smoothed)
axs[1].plot(spectral_function, color='green')
axs[1].plot(activation_smoothed, color='orange')
axs[1].vlines(onsets, 0, 1, colors='red')
plt.show()
Chargement en cours