diff --git a/.gitignore b/.gitignore index 7f215e8b7b9821567bf2767178f22b813b7f13ea..18089626dc6236b362b28f5b73836e9711453aef 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ !extractAss.sh !karaUtils.py !autokara.py +!segment.py media/ \ No newline at end of file diff --git a/autokara.py b/autokara.py index 4d4512ae7a28b0656935e197cd020afef88738e1..894308dfdeea9926de71caec176bd6c104a0a6c4 100644 --- a/autokara.py +++ b/autokara.py @@ -4,6 +4,7 @@ import subprocess import shlex from pathlib import Path +from segment import Segment try: video_file = sys.argv[1] @@ -25,3 +26,8 @@ output_folder = "./media/vocals" subprocess.call(shlex.split('demucs --two-stems vocals -o "%s" "%s"' % (output_folder, audio_file))) vocals_file = "./media/vocals/htdemucs/%s/vocals.wav" % basename + + +seg = Segment(vocals_file) +seg.onsets() + diff --git a/segment.py b/segment.py new file mode 100644 index 0000000000000000000000000000000000000000..d5c2eda00a262980a44bf3611a44d6e6cd3a28d7 --- /dev/null +++ b/segment.py @@ -0,0 +1,51 @@ +import librosa +import numpy as np +import matplotlib.pyplot as plt +import sys + + + +class Segment: + + def __init__(self, file): + self.file = file + + + def onsets(self): + ''' + Use librosa's onset detection to detect syllable start times + ''' + + y, sr = librosa.load(self.file) + + o_env = librosa.onset.onset_strength(y=y, sr=sr) + times = librosa.times_like(o_env, sr=sr) + onset_raw = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr) + onset_bt = librosa.onset.onset_backtrack(onset_raw, o_env) + + S = np.abs(librosa.stft(y=y)) + rms = librosa.feature.rms(S=S) + onset_bt_rms = librosa.onset.onset_backtrack(onset_raw, rms[0]) + + print(onset_bt_rms) + + ''' + fig, ax = plt.subplots(nrows=3, sharex=True) + librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),y_axis='log', x_axis='time', ax=ax[0]) + ax[0].label_outer() + ax[1].plot(times, o_env, label='Onset strength') + ax[1].vlines(librosa.frames_to_time(onset_raw), 0, o_env.max(), label='Raw onsets') + ax[1].vlines(librosa.frames_to_time(onset_bt), 0, o_env.max(), label='Backtracked', color='r') + ax[1].legend() + ax[1].label_outer() + ax[2].plot(times, rms[0], label='RMS') + ax[2].vlines(librosa.frames_to_time(onset_bt_rms), 0, rms.max(), label='Backtracked (RMS)', color='r') + ax[2].legend() + + plt.show() + ''' + + +if __name__ == "__main__": + seg = Segment(sys.argv[1]) + seg.onsets() \ No newline at end of file