diff --git a/.gitignore b/.gitignore
index 0b531c7db05713c9016533ac057376e8a061e62d..b1db3f291983c114d523b100e9c3ae149e731c23 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,17 +1,6 @@
-*
-!.gitignore
-!README.md
-!requirements.txt
-!extractWav.sh
-!extractAss.sh
-!karaUtils.py
-!autokara.py
-!assUtils.py
-!process_train_data.sh
-!cnn_prepare_data.py
-!cnn_train.py
-!*/cnn/segment.py
-!*/cnn/music_processor.py
-!*/cnn/model.py
-!rosa/*.py
-media/
\ No newline at end of file
+__pycache__/
+data/
+env/
+media/
+models/
+
diff --git a/README.md b/README.md
index d22d58f3a0514c7ab184a0aa956d5de0c9e97d5c..04b43ea3eeac6eb6cdaf19d7e733738be4f5a9a6 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,7 @@ An introduction to neural networks and deep learning:
 ### Machine Learning & Deep Learning methods
 
 [Using CNNs on spectrogram images](https://www.ofai.at/~jan.schlueter/pubs/2014_icassp.pdf) (Schlüter, Böck, 2014) :
+ - [MADMOM implementation](https://madmom.readthedocs.io/en/v0.16/modules/features/onsets.html)
  - Python implementation for Taiko rythm games : https://github.com/seiichiinoue/odcnn
 
 ### Other methods
@@ -45,7 +46,9 @@ If we ever want to use an AI to identify syllables without a reference lyrics fi
 
 - MKVToolnix (at least the CLI utils)
 - Python >= 3.8
-- PyTorch : follow the instructions [here](https://pytorch.org/get-started/locally/)
+
+Optional :
+- PyTorch for custom model training : follow the instructions [here](https://pytorch.org/get-started/locally/)
 
 All other python modules can be installed directly through pip, see further.
 
@@ -62,35 +65,12 @@ $ pip install -r requirements.txt
 $ deactivate              
 ```
 
-Having a CUDA-capable GPU is optional, but can greatly reduce processing time.
+Having a CUDA-capable GPU is optional, but can greatly reduce processing time in some situations.
 
 
 # Use
 
-
-## Training
-
-To extract vocals and ASS from MKV video files:
-```bash
-$ ./process_train_data video_folder train_folder
-```
-
-To prepare the training data for the model :
-```bash
-$ python cnn_prepare_data.py train train_folder 
-```
-
-Prepared data will be stored in `./data/pickles/train_data.pickle`
-
-To train the model on the prepared data :
-```bash
-$ python cnn_train.py
-```
-
-The model will be written to `./models/model.pth`
-
-
-## Infer
+## Inference
 
 To execute AutoKara on a MKV video file :
 ```bash
diff --git a/assUtils.py b/assUtils.py
index 510e639f8c31ea0d41365f84233cf7abffabe374..3345d5b1665b0c9b21812ab3fcfbf6a9ee3a82c8 100644
--- a/assUtils.py
+++ b/assUtils.py
@@ -51,12 +51,24 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 '''
         self.file.write(header)
 
-    def writeSyls(self, syl_timings):
+    def writeSyls(self, syl_timings, syls_per_line=10000):
         last_syl_dur = 500
-        start_time = timeToDate(syl_timings[0][0])
+        syl_index = 0
+        while syl_index < (len(syl_timings) - syls_per_line):
+            start_time = timeToDate(syl_timings[syl_index][0])
+            end_time = timeToDate(syl_timings[syl_index + syls_per_line][0])
+            line = f'Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,'
+            for i in range(syl_index, syl_index + syls_per_line):
+                syl_dur = round((syl_timings[i+1][0] - syl_timings[i][0]) * 100)
+                line += f'{{\k{syl_dur:d}}}{syl_timings[i][1]:s}'
+            line += '\n'
+            self.file.write(line)
+            syl_index += syls_per_line
+
+        start_time = timeToDate(syl_timings[syl_index][0])
         end_time = timeToDate(syl_timings[-1][0] + last_syl_dur//100)
         line = f'Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,'
-        for i in range(len(syl_timings) - 1):
+        for i in range(syl_index, len(syl_timings) - 1):
             syl_dur = round((syl_timings[i+1][0] - syl_timings[i][0]) * 100)
             line += f'{{\k{syl_dur:d}}}{syl_timings[i][1]:s}'
         line += f'{{\k{last_syl_dur:d}}}{syl_timings[-1][1]:s}\n'
diff --git a/autokara.py b/autokara.py
index c62053a37ca55ae6803f47f29606f6df79bbd07b..e7f69b174336edf669cc884c52275ae95fdea7f7 100644
--- a/autokara.py
+++ b/autokara.py
@@ -6,7 +6,7 @@ import shlex
 from pathlib import Path
 from assUtils import AssWriter
 
-from cnn.segment import segment
+from cnn_madmom.segment import segment
 
 
 parser = argparse.ArgumentParser(description='AutoKara - Automatic karaoke timing tool')
@@ -42,12 +42,12 @@ else:
 
 print("Identifying syl starts...")
 onsets = segment(vocals_file)
-syls = [[t, ''] for t in onsets]
+syls = [[t, 'la'] for t in onsets]
 
 print("Syls found, writing ASS file...")
 writer = AssWriter()
 writer.openAss(ass_file)
 writer.writeHeader()
-writer.writeSyls(syls)
+writer.writeSyls(syls, syls_per_line=10)
 writer.closeAss()
 
diff --git a/cnn_madmom/segment.py b/cnn_madmom/segment.py
new file mode 100644
index 0000000000000000000000000000000000000000..a86921df036ac72943a271da2440cbfae1b1e0a4
--- /dev/null
+++ b/cnn_madmom/segment.py
@@ -0,0 +1,96 @@
+import madmom
+import numpy as np
+import sys
+import matplotlib.pyplot as plt
+from scipy.ndimage.filters import maximum_filter
+
+
+def segment(songfile):
+
+    delay = -4
+    backtrack = False
+
+    cnn = madmom.features.onsets.CNNOnsetProcessor()
+    spectral = madmom.features.onsets.SpectralOnsetProcessor('complex_domain')
+
+
+    spec = spec = madmom.audio.spectrogram.Spectrogram(songfile, num_channels=1)
+    filt_spec = madmom.audio.spectrogram.FilteredSpectrogram(spec, filterbank=madmom.audio.filters.LogFilterbank, num_bands=24)
+    log_spec = madmom.audio.spectrogram.LogarithmicSpectrogram(filt_spec, add=1)
+
+    cnn_function = cnn(songfile, num_channels=1)
+    spectral_function = spectral(songfile, num_channels=1)
+    spectral_function = spectral_function/(spectral_function.max())
+    
+    #activation_function = 0.5*cnn_function + 0.5*spectral_function
+    activation_function = (2 * cnn_function * spectral_function)/(cnn_function + spectral_function)
+    #activation_function = np.where(spectral_function > 0.14, cnn_function, 0)
+    #onsets = proc(activation_function)
+    
+    activation_smoothed = madmom.audio.signal.smooth(activation_function, 20)
+    cnn_smoothed = madmom.audio.signal.smooth(cnn_function, 20)
+    onsets = madmom.features.onsets.peak_picking(activation_smoothed, threshold=0.6, smooth=0)
+    onsets = np.array([o for o in onsets if cnn_smoothed[o] > 0.2])
+
+
+    if backtrack:
+        # Backtrack onsets to closest earlier local minimum
+        backtrack_max_frames = 50
+        for i in range(len(onsets)):
+            initial_onset = onsets[i]
+            while(activation_smoothed[onsets[i] - 1] < activation_smoothed[onsets[i]] and onsets[i] > initial_onset - backtrack_max_frames):
+                onsets[i] -= 1
+
+    onsets = (onsets + delay)/100
+
+    print(onsets)
+
+    return onsets
+
+
+
+if __name__ == "__main__":
+    songfile = sys.argv[1]
+
+    backtrack = False
+
+    cnn = madmom.features.onsets.CNNOnsetProcessor()
+    spectral = madmom.features.onsets.SpectralOnsetProcessor('modified_kullback_leibler')
+
+
+    spec = spec = madmom.audio.spectrogram.Spectrogram(songfile, num_channels=1)
+    filt_spec = madmom.audio.spectrogram.FilteredSpectrogram(spec, filterbank=madmom.audio.filters.LogFilterbank, num_bands=24)
+    log_spec = madmom.audio.spectrogram.LogarithmicSpectrogram(filt_spec, add=1)
+
+    cnn_function = cnn(songfile, num_channels=1)
+    spectral_function = spectral(songfile, num_channels=1)
+    spectral_function = spectral_function/(spectral_function.max())
+    
+    #activation_function = 0.5*cnn_function + 0.5*spectral_function
+    activation_function = (2 * cnn_function * spectral_function)/(cnn_function + spectral_function)
+    #activation_function = np.where(spectral_function > 0.14, cnn_function, 0)
+    #onsets = proc(activation_function)
+    
+    activation_smoothed = madmom.audio.signal.smooth(activation_function, 20)
+    cnn_smoothed = madmom.audio.signal.smooth(cnn_function, 20)
+    onsets = madmom.features.onsets.peak_picking(activation_smoothed, threshold=0.6, smooth=0)
+    onsets = np.array([o for o in onsets if cnn_smoothed[o] > 0.2])
+    
+    # Backtrack onsets to closest earlier local minimum
+    if backtrack:
+        backtrack_max_frames = 50
+        for i in range(len(onsets)):
+            initial_onset = onsets[i]
+            while(activation_smoothed[onsets[i] - 1] < activation_smoothed[onsets[i]] and onsets[i] > initial_onset - backtrack_max_frames):
+                onsets[i] -= 1
+
+    print(onsets/100)
+
+    fig, axs = plt.subplots(nrows=2, sharex=True)
+    axs[0].imshow(log_spec.T, origin='lower', aspect='auto')
+    axs[1].plot(cnn_smoothed)
+    axs[1].plot(spectral_function, color='green')
+    axs[1].plot(activation_smoothed, color='orange')
+    axs[1].vlines(onsets, 0, 1, colors='red')
+
+    plt.show()
diff --git a/requirements.txt b/requirements.txt
index 4a1d9cbfadfab62043cc52f5798be97e66bc7b97..770a61b27edbe79b68aa14593a431c0a841062bf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,8 @@ soundfile
 sklearn
 matplotlib
 numpy
-tqdm
\ No newline at end of file
+tqdm
+scipy
+cython
+mido
+git+https://github.com/CPJKU/madmom.git
\ No newline at end of file