From 61c9b4b22a8fd172cb70234ed8d66412e18d45e3 Mon Sep 17 00:00:00 2001
From: Sting <loic.allegre@ensiie.fr>
Date: Thu, 22 Jun 2023 10:46:39 +0200
Subject: [PATCH] Update README with some docs and resources

---
 README.md  | 42 ++++++++++++++++++++++++++++--------------
 segment.py |  2 +-
 2 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index def9463..bd61b5e 100644
--- a/README.md
+++ b/README.md
@@ -7,29 +7,37 @@ Experiment in automatic karaoke timing.
 Having a clean python environment:
 
 - https://realpython.com/python-virtual-environments-a-primer/
-- https://www.pyimagesearch.com/2016/11/14/installing-keras-with-tensorflow-backend/
 
 An introduction to neural networks and deep learning:
 
 - http://neuralnetworksanddeeplearning.com
 
-Get a data buffer from an audio file:
 
-- https://www.analyticsvidhya.com/blog/2017/08/audio-voice-processing-deep-learning/
-- https://pypi.org/project/audioread/
-- https://pythonbasics.org/convert-mp3-to-wav/
-- http://zulko.github.io/blog/2013/10/04/read-and-write-audio-files-in-python-using-ffmpeg/
-- https://stackoverflow.com/questions/49279425/extract-human-vocals-from-song
-- https://www.geeksforgeeks.org/python-speech-recognition-on-large-audio-files/
-
-### Extracting vocals from music
+## Extracting vocals from music
 
 - https://github.com/facebookresearch/demucs
 
-### Syllable segmentation and recognition
+## Syllable segmentation
+
+### Symbolic methods
 
  - [Syllable segmentation](https://www.sciencedirect.com/science/article/pii/S1877050916319068/pdf?md5=abc426e84a71cd4f5c0e6bef9713643e&pid=1-s2.0-S1877050916319068-main.pdf&_valck=1)
  - [Syllable segmentation and recognition](https://cdn.intechopen.com/pdfs/15947/InTech-Syllable_based_speech_recognition.pdf)
+ - [Onset detection with librosa](https://librosa.org/doc/latest/onset.html)
+
+### Machine Learning & Deep Learning methods
+
+[Using CNNs on spectrogram images](https://www.ofai.at/~jan.schlueter/pubs/2014_icassp.pdf) (Schlüter, Böck, 2014) :
+ - Python implementation for Taiko rythm games : https://www.ofai.at/~jan.schlueter/pubs/2014_icassp.pdf
+
+### Other methods
+
+Other stuff goes here
+
+## Syllable recognition
+
+If we ever want to use an AI to identify syllables without a reference lyrics file 
+
 
 # Installation
 
@@ -50,6 +58,7 @@ $ source env/bin/activate # use the virtual environement
 
 # Install the Demucs (vocal separation tool)
 $ pip install -U demucs
+$ pip install librosa
 
 # To exit the virtual environment
 $ deactivate              
@@ -59,16 +68,21 @@ $ deactivate
 
 To execute AutoKara on a MKV video file :
 ```bash
-$ python autokara.py video.mkv
+$ python autokara.py video.mkv output.ass
+```
+
+To execute AutoKara on a (pre-extracted) WAV vocals file :
+```bash
+$ python autokara.py vocals.wav output.ass --vocals
 ```
 
 
-To extract .wav audio from a MKV file :
+To only extract .wav audio from a MKV file :
 ```bash
 $ ./extractWav.sh source_video output_audio
 ```
 
-To separate vocals from instruments in an audio file :
+To only separate vocals from instruments in an audio file :
 ```bash
 demucs --two-stems=vocals -o output_folder audio_file.wav
 ```
diff --git a/segment.py b/segment.py
index 4657a99..f455a22 100644
--- a/segment.py
+++ b/segment.py
@@ -1,6 +1,6 @@
 import librosa
 import numpy as np
-import matplotlib.pyplot as plt
+# import matplotlib.pyplot as plt
 import sys
 
 
-- 
GitLab