diff --git a/autosyl/segment.py b/autosyl/segment.py index adacae081bfd39c17847a526334c05587bd11ae9..6a54e36c971c2c60596bae459872a58418b63000 100644 --- a/autosyl/segment.py +++ b/autosyl/segment.py @@ -56,13 +56,26 @@ def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500, v onsets = (onsets + delay)/100 #print(onsets) + sig = madmom.audio.signal.Signal(songfile, num_channels=1) + spec = madmom.audio.spectrogram.Spectrogram(sig) + filt_spec = madmom.audio.spectrogram.FilteredSpectrogram(spec, filterbank=madmom.audio.filters.LogFilterbank, num_bands=24) + log_spec = madmom.audio.spectrogram.LogarithmicSpectrogram(filt_spec, add=1) + + magnitude = np.max(log_spec[:,:100], axis=1) + + magnitude_threshold = 0.75 + magnitude_start = 2 + if reference_syls: syls = [] onset_index = 0 for line in reference_syls: #print(onset_index, " : ", line) l = [[onsets[onset_index + i], line[i][1]] for i in range(len(line)-1)] - l.append([word_align[onset_index + (len(line) - 2)][1]/100, '']) + next_drop = words_onsets[onset_index + len(line) - 2] + magnitude_start + while magnitude[next_drop] > magnitude_threshold: + next_drop += 1 + l.append([min(word_align[onset_index + (len(line) - 2)][1]/100, next_drop/100), '']) syls.append(l) onset_index += (len(line) - 1) else: