diff --git a/autosyl/segment.py b/autosyl/segment.py index 2f8ac129f6f758c8084d9b402a5c387edb3dea03..5f4d785b7767ce5a92b93f048c3238717e7be823 100644 --- a/autosyl/segment.py +++ b/autosyl/segment.py @@ -33,10 +33,16 @@ def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500): #activation_function = np.where(spectral_function > 0.14, cnn_function, 0) #onsets = proc(activation_function) + if reference_syls: + activation_threshold = 0.1 + else: + activation_threshold = 0.2 + + activation_smoothed = madmom.audio.signal.smooth(activation_function, 20) cnn_smoothed = madmom.audio.signal.smooth(cnn_function, 20) - onsets = madmom.features.onsets.peak_picking(activation_smoothed, threshold=0.1, smooth=0) - #onsets = np.array([o for o in onsets if cnn_smoothed[o] > 0.1]) + onsets = madmom.features.onsets.peak_picking(activation_smoothed, threshold=activation_threshold, smooth=0) + #onsets = np.array([o for o in onsets if cnn_smoothed[o] > activation_threshold]) pitch = parsel.to_pitch() pitch_values = pitch.selected_array['frequency'] @@ -148,8 +154,10 @@ if __name__ == "__main__": songfile = sys.argv[1] - if(len(sys.argv) == 3): + if(len(sys.argv) >= 3): reference_syls = getSyls(sys.argv[2]) + else: + reference_syls = None #print(reference_syls) @@ -176,9 +184,15 @@ if __name__ == "__main__": #activation_function = np.where(spectral_function > 0.14, cnn_function, 0) #onsets = proc(activation_function) + + if reference_syls: + activation_threshold = 0.1 + else: + activation_threshold = 0.2 + activation_smoothed = madmom.audio.signal.smooth(activation_function, 20) cnn_smoothed = madmom.audio.signal.smooth(cnn_function, 20) - onsets = madmom.features.onsets.peak_picking(activation_smoothed, threshold=0.1, smooth=0) + onsets = madmom.features.onsets.peak_picking(activation_smoothed, threshold=activation_threshold, smooth=0) #onsets = np.array([o for o in onsets if cnn_smoothed[o] > 0.1]) pitch = parsel.to_pitch() @@ -226,18 +240,20 @@ if __name__ == "__main__": print(onsets/100) - reference_onsets = [syl[0]+8 for line in reference_syls for syl in line[:-1]] + if reference_syls: + reference_onsets = [syl[0]+8 for line in reference_syls for syl in line[:-1]] fig, axs = plt.subplots(nrows=2, sharex=True) axs[0].imshow(log_spec.T, origin='lower', aspect='auto') - axs[0].vlines(reference_onsets, 0, 140, colors='red') + if reference_syls: + axs[0].vlines(reference_onsets, 0, 140, colors='red') axs[0].plot((pitch_values/np.max(pitch_values))*140, color='yellow') axs[1].plot(mask_function) #axs[1].plot(cnn_smoothed) #axs[1].plot(spectral_function, color='green') axs[1].plot(activation_smoothed, color='orange') axs[1].vlines(onsets, 0, 2, colors='red') - axs[1].hlines([max(mask_threshold, 0)], 0, onsets[-1]+100, colors='black') + axs[1].hlines([max(mask_threshold, 0), activation_threshold], 0, onsets[-1]+100, colors='black') #bins = np.arange(0, 1, 0.02) #hist, hist_axs = plt.subplots(nrows=1)