diff --git a/MANIFEST.in b/MANIFEST.in
index 7c6af3d33e05eaea206a2d73856b5224bcbe2b3d..06bf2ec9ca32d78fc96496a69e3b158bacccd7e2 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,3 @@
 include requirements.txt
 include README.md
-include preprocess_media.sh
 recursive-include autokara *
diff --git a/README.md b/README.md
index acc798d4b426add94665173de3779ef4d828d416..22488ab17dd3e512bcb1f2165e31cf947240a26e 100644
--- a/README.md
+++ b/README.md
@@ -14,24 +14,22 @@ All other python modules can be installed directly through PIP, see next section
 
 ## Install
 
-### Linux
-
 Using a virtual environment is strongly recommended (but not mandatory if you know what you're doing) :
 ```bash
-$ python -m venv env     # create the virtual environment, do it once
-$ source env/bin/activate # use the virtual environement
+python -m venv env     # create the virtual environment, do it once
+source env/bin/activate # use the virtual environement
 
 # To exit the virtual environment
-$ deactivate              
+deactivate              
 ```
 
 The simplest way to install Autokara is through PIP.
 ```bash
 # Using HTTPS
-$ pip install git+https://git.iiens.net/bakaclub/autokara.git
+pip install git+https://git.iiens.net/bakaclub/autokara.git
 
 # Or SSH
-$ pip install git+ssh://git@git.iiens.net:bakaclub/autokara.git
+pip install git+ssh://git@git.iiens.net:bakaclub/autokara.git
 ```
 
 Or you can clone the repo and use `pip install <repo_directory>` if you prefer.
@@ -39,12 +37,9 @@ Or you can clone the repo and use `pip install <repo_directory>` if you prefer.
 
 To use the custom phonetic mappings for Japanese Romaji and other non-English languages, you need to update manually (for now) the g2p DB (within the venv):
 ```bash
-$ autokara-gen-lang
+autokara-gen-lang
 ```
 
-### Windows
-
-Still working on that...
 
 ## Configuration
 
@@ -61,32 +56,37 @@ This new file has priority over the default one, which is used only as fallback.
 
 To use Autokara, you need :
  - A media file of the song (video, or pre-extracted vocals)
- - An ASS file with the lyrics, split by syllable
+ - An ASS file with the lyrics, split by syllable (you can use the [Auto-Split](https://docs.karaokes.moe/aegisub/auto-split.lua) in Aegisub, but doing it manually may yield better results)
 
 To execute AutoKara on a MKV video file and an ASS file containing the lyrics (ASS will be overwritten):
 ```bash
-$ autokara video.mkv lyrics.ass
+autokara video.mkv lyrics.ass
 ```
 
 To output to a different file (and keep the original) :
 ```bash
-$ autokara video.mkv lyrics.ass -o output.ass
+autokara video.mkv lyrics.ass -o output.ass
 ```
 
 To execute AutoKara on a (pre-extracted) WAV (or OGG, MP3, ...) vocals file, pass the `--vocals` flag :
 ```bash
-$ autokara vocals.wav output.ass --vocals
+autokara vocals.wav lyrics.ass --vocals
 ```
 
-To use a phonetic transcription optimized for a specific language, use `--lang` (or `-l`) :
+To use a phonetic transcription optimized for a specific language, use `--lang` (or `-l`). Default is Japanese Romaji.
+You can also specify a specific language for uppercase words (default is set in your config file) :
 ```bash
-$ autokara vocals.wav output.ass --lang jp
+# Use french transcription
+autokara video.mkv lyrics.ass --lang fr
+
+# Use english transcription, but treat all uppercase words as french :
+autokara video.mkv lyrics.ass --lang en --uppercase-lang fr
 ```
 
 Available languages options are :
 ```
-jp : Japanese Romaji (default)
-en : English
+jp : Japanese Romaji (base default)
+en : English (uppercase default)
 fr : French
 fi : Finnish
 da : Danish
@@ -94,36 +94,35 @@ da : Danish
 
 Full help for all options is available with :
 ```bash
-$ autokara -h
+autokara -h
 ```
 
 ## Useful scripts
 
-To only extract .wav audio from a MKV file :
-```bash
-$ ./extractWav.sh source_video output_audio
-```
+### Manual preprocessing
 
-To only extract .ass sub file from a MKV file :
-```bash
-$ ./extractAss.sh source_video output_subs
-```
+Use `autokara-preprocess` if you want to manually preprocess video/lyrics in advance :
 
-To only separate vocals from instruments in an audio file :
 ```bash
-demucs --two-stems=vocals -o output_folder audio_file.wav
-```
+# Extract vocals from video :
+autokara-preprocess --vocals video_file output_folder/ 
 
-Batch preprocessing (vocals + ASS extraction) of all videos in a directory :
-```bash
-$ ./preprocess_media.sh video_folder output_folder
+# Extract ASS file from a MKV containing a subtitle track :
+autokara-preprocess --lyrics video_file output_file.ass
+
+# Do both at once :
+autokara-preprocess --full video_file output_folder/
 ```
 
-A visualization tool, mainly intended for debug.
-Does the same as autokara.py, but instead of writing to a file, plots a graphic with onset times, spectrogram, probability curves,... 
-Does not work on video files, only separated vocals audio files
+Then you can use Autokara on the extracted files with the `--vocals` flag.
+
+### Sound and onsets plotting
+
+A visualization tool, mainly intended for debug or curious people.
+Does the same as `autokara`, but instead of writing to a file, plots a graphic with syllable onset times, spectrogram, probability curves,... 
+Does not work on video files, only separated vocals audio files :
 ```bash
-$ autokara-plot vocals.wav lyrics.ass
+autokara-plot vocals.wav lyrics.ass
 ```
 
 
diff --git a/autokara/autokara.py b/autokara/autokara.py
index d9812112f21e09bc96a6ddb8571bd6496561840e..21f8d2189e878d924bcfee4c25906f060c75efee 100644
--- a/autokara/autokara.py
+++ b/autokara/autokara.py
@@ -3,11 +3,14 @@ import argparse
 import demucs.separate
 import subprocess
 import shlex
+import shutil
 from pathlib import Path
 from configparser import ConfigParser
 
 from .autosyl.assUtils import AssWriter, getSyls, getHeader
 from .autosyl.segment import segment
+from .preprocess.audio import *
+from .preprocess.lyrics import *
 
 
 
@@ -25,6 +28,7 @@ def main(opts=None):
     args = parser.parse_args(opts)
 
     ass_file = args.ass_file
+    source_file = args.source_file
     verbose = args.verbose
 
     here = Path(__file__).parent
@@ -54,16 +58,14 @@ def main(opts=None):
         basename = Path(args.source_file).stem
         audio_file = f"{media_dir:s}/audio/{basename:s}.wav"
 
-        subprocess.call(shlex.split(f'{str(here)}/extractWav.sh "{args.source_file:s}" "{audio_file}"'))
+        extract_audio(source_file, output_file=audio_file)
 
         Path(f"{media_dir:s}/vocals").mkdir(parents=True, exist_ok=True)
         output_folder = f"{media_dir:s}/vocals"
 
         print("Isolating vocals...")
 
-        # Not working, don't know why
-        # demucs.separate.main(shlex.split('--two-stems vocals -o "%s" "%s"' % (output_folder, audio_file)))
-        subprocess.call(shlex.split(f'demucs --two-stems vocals -o "{output_folder:s}" "{audio_file:s}"'))
+        extract_vocals(audio_file, output_folder)
 
         vocals_file = f"{media_dir:s}/vocals/htdemucs/{basename:s}/vocals.wav"
     else:
@@ -92,6 +94,11 @@ def main(opts=None):
     writer.writeSyls(syls, line_meta)
     writer.closeAss()
 
+    # clean up
+    if not args.vocals:
+        shutil.rmtree(f'{media_dir:s}/vocals/htdemucs/{basename:s}')
+        Path(audio_file).unlink(missing_ok=True)
+
 
 if __name__ == "__main__":
     main()
diff --git a/autokara/autosyl/assUtils.py b/autokara/autosyl/assUtils.py
index 0362147b20ee298fdd7289d1e0047fe2d65cf5f4..32521d7fc4dca56939ac929e6cec8f74b06823c7 100644
--- a/autokara/autosyl/assUtils.py
+++ b/autokara/autosyl/assUtils.py
@@ -32,25 +32,26 @@ def getSyls(ass_file):
         LINES_KARA = re.compile(r"(?:Comment|Dialogue):.*(\d+:\d{2}:\d{2}.\d{2}),(\d+:\d{2}:\d{2}.\d{2}),([^,]*),([^,]*),(\d+),(\d+),(\d+),(?:(?!fx|template|code)\w)*,(.*)\n")
         RGX_TAGS = re.compile(r"\{\\k(\d+)\}([^\{\n\r]*)")
         for line in LINES_KARA.findall(CONTENT):
-            syl_line = []
-            lastTime = dateToTime(line[0])
-            syl_line_index = 0
-            for couple in RGX_TAGS.findall(line[7]):
-                if couple[1] != '' and not strip_regex.sub('', couple[1]) == '':
-                    syl_line.append([lastTime, couple[1], int(couple[0])])
-                    syl_line_index += 1
-                if couple[1].isspace() and len(couple[1]) > 0 and syl_line_index > 0:
-                    syl_line[syl_line_index - 1][1] += " "
-                lastTime += int(couple[0])
-            syl_line.append([lastTime, '', 0])
-            SYLS.append(syl_line)
-            line_meta = {}
-            line_meta['stylename'] = line[2]
-            line_meta['actor'] = line[3]
-            line_meta['margin_l'] = int(line[4])
-            line_meta['margin_r'] = int(line[5])
-            line_meta['margin_v'] = int(line[6])
-            META.append(line_meta)
+            if line[7].strip() != "":
+                syl_line = []
+                lastTime = dateToTime(line[0])
+                syl_line_index = 0
+                for couple in RGX_TAGS.findall(line[7]):
+                    if couple[1] != '' and not strip_regex.sub('', couple[1]) == '':
+                        syl_line.append([lastTime, couple[1], int(couple[0])])
+                        syl_line_index += 1
+                    if couple[1].isspace() and len(couple[1]) > 0 and syl_line_index > 0:
+                        syl_line[syl_line_index - 1][1] += " "
+                    lastTime += int(couple[0])
+                syl_line.append([lastTime, '', 0])
+                SYLS.append(syl_line)
+                line_meta = {}
+                line_meta['stylename'] = line[2]
+                line_meta['actor'] = line[3]
+                line_meta['margin_l'] = int(line[4])
+                line_meta['margin_r'] = int(line[5])
+                line_meta['margin_v'] = int(line[6])
+                META.append(line_meta)
     return SYLS, META
 
 
diff --git a/autokara/extractAss.sh b/autokara/extractAss.sh
deleted file mode 100755
index cf5dcdd528f58ba17304cb71321e4c08eed09f3f..0000000000000000000000000000000000000000
--- a/autokara/extractAss.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-
-##########################################################################################################
-#
-# COMMAND : extractAss.sh
-#
-# AUTHOR : Kubat
-#
-# DESCRIPTION : CLI tool to extract subtitles from .mkv files
-#
-# USE : ./extractAss.sh fileInput.mkv fileOutput.ass
-#
-# REQUIREMENTS : Have FFMPEG and SoX installed (for audio/video decoding)
-#
-#
-##########################################################################################################
-
-USAGE_MESSAGE="usage : $0 fileInput.mkv fileOutput.ass"
-if [ $# != 2 ]
-then
-  echo $USAGE_MESSAGE
-  exit 1
-fi
-
-if ! [[ "$1" =~ .mkv$ ]] || ! [[ "$2" =~ .ass$ ]]
-then
-  echo $USAGE_MESSAGE
-  exit 1
-fi
-
-# get the subtitles track id
-ID=$(mkvmerge --identify "$1" | sed -n 's/Track ID \([[:digit:]]*\).*subtitles.*/\1/p')
-
-mkvextract tracks "$1" "$ID":"$2"
diff --git a/autokara/extractWav.sh b/autokara/extractWav.sh
deleted file mode 100755
index cee7e4c6a4b5719bb24a8d1c75f50977db6a22a0..0000000000000000000000000000000000000000
--- a/autokara/extractWav.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-
-##########################################################################################################
-#
-# COMMAND : extractWav.sh
-#
-# AUTHOR : Sting
-#
-# DESCRIPTION : CLI tool to extract audio from .mkv files and convert it to 1-channel WAV files
-#               Currently supported formats :
-#			- Video : .mkv files only, any codec supported by FFMPEG
-#			- Audio : AAC, FLAC, DTS, AC3, MP3 (MPEG), OPUS, VORBIS
-#
-# USE : ./extractWav.sh source_folder destination_folder
-#
-# REQUIREMENTS : Have FFMPEG and SoX installed (for audio/video decoding)
-#
-#
-##########################################################################################################
-
-
-
-USAGE_MESSAGE="usage : $0 source_file dest_file"
-if [ $# != 2 ]; then
-        echo $USAGE_MESSAGE; exit 1;
-fi
-
-
-filename=$1
-dest_file=$2
-
-echo $filename
-echo $dest_file
-
-[ -e "$filename" ] || continue
-name=${filename##*/}
-base=${name%.mkv}
-
-codecLine=$(mkvinfo "$filename" | grep " A_")
-regex=".*A_([A-Z0-9]+).*"
-
-[[ $codecLine =~ $regex ]]
-
-codec=${BASH_REMATCH[1]}
-
-case $codec in
-
-	"AAC")
-			extension="m4a"
-			;;
-
-	"FLAC")
-			extension="flac"
-			;;
-
-	"VORBIS")
-			extension="ogg"
-			;;
-
-	"MPEG")
-		extension="mp3"
-			;;
-
-	"AC3")
-		extension="ac3"
-		;;
-
-	"EAC3")
-		extension="eac3"
-		;;
-
-	"DTS")
-		extension="dts"
-		;;
-
-	"OPUS")
-		extension="opus"
-		;;
-
-	*)
-		extension=""
-		;;
-
-esac
-
-
-
-ffmpeg -i "$filename" -acodec copy -vn "$base.$extension" && \
-ffmpeg -i "$base.$extension" "$base.wav" && \
-#sox "$2/$base.stereo.wav" "$2/$base.wav" remix - && \
-#rm "$2/$base.stereo.wav" && \
-rm "$base.$extension"
-mv "$base.wav" "$2"
-
-
-
diff --git a/autokara/preprocess/audio.py b/autokara/preprocess/audio.py
new file mode 100644
index 0000000000000000000000000000000000000000..a28e493f86857e1fc8694581c13ca511de6d3953
--- /dev/null
+++ b/autokara/preprocess/audio.py
@@ -0,0 +1,50 @@
+import sys
+import argparse
+import demucs.separate
+import subprocess
+import shlex
+from pathlib import Path
+import shutil
+
+
+
+
+
+def extract_audio(source_file, output_file=None):
+    if not output_file:
+        out_path = Path(source_file).with_suffix(".wav")
+    else:
+        out_path = output_file
+    
+    subprocess.call(shlex.split(f'ffmpeg -i "{source_file:s}" -vn "{out_path:s}"'))
+    
+    return str(out_path)
+
+
+
+def extract_vocals(source_file, output_folder):
+    subprocess.call(shlex.split(f'demucs --two-stems vocals -o "{output_folder:s}" "{source_file:s}"'))
+
+
+
+
+
+def preprocess_video(source_file, output_folder=None, media_dir="."):
+    Path(media_dir + "/audio").mkdir(parents=True, exist_ok=True)
+    basename = Path(source_file).stem
+    audio_file = f"{media_dir:s}/audio/{basename:s}.wav"
+
+    print("Extracting audio from video file...")
+    extract_audio(source_file, audio_file)
+
+    if not output_folder:
+        Path(f"{media_dir:s}/vocals").mkdir(parents=True, exist_ok=True)
+        output_folder = f"{media_dir:s}/vocals"
+
+    print("Isolating vocals...")
+    extract_vocals(audio_file, output_folder)
+    
+    subprocess.call(shlex.split(f'ffmpeg -i "{output_folder:s}/htdemucs/{basename:s}/vocals.wav" "{output_folder:s}/vocals.ogg"'))
+    shutil.rmtree(f'{output_folder:s}/htdemucs/{basename:s}')
+
+    Path(audio_file).unlink(missing_ok=True)
diff --git a/autokara/preprocess/lyrics.py b/autokara/preprocess/lyrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0191f2f3d6d94bf0c8bb4b189571db92cbb0fe6
--- /dev/null
+++ b/autokara/preprocess/lyrics.py
@@ -0,0 +1,22 @@
+import sys
+import argparse
+import subprocess
+import shlex
+from pathlib import Path
+import re
+
+
+
+
+def extract_subtitles(source_file, output_file=None):
+    if not output_file:
+        out_path = Path(source_file).with_suffix(".ass")
+    else:
+        out_path = output_file
+    
+    data = subprocess.run(f'mkvmerge --identify "{source_file:s}"', capture_output=True, shell=True, text=True)
+    RGX_TRACK = re.compile(r"Track ID (\d+): subtitles")
+    matches = RGX_TRACK.findall(data.stdout)
+    track_id = matches[0][0]
+    
+    subprocess.call(shlex.split(f'mkvextract "{source_file:s}" tracks "{track_id:s}":"{out_path:s}"'))
\ No newline at end of file
diff --git a/autokara/preprocess_media.py b/autokara/preprocess_media.py
new file mode 100644
index 0000000000000000000000000000000000000000..196c0b08d585e6c51804cd1279a99d10f43836cc
--- /dev/null
+++ b/autokara/preprocess_media.py
@@ -0,0 +1,42 @@
+from .preprocess.audio import *
+from .preprocess.lyrics import *
+import sys
+import argparse
+from configparser import ConfigParser
+
+
+def main(opts=None):
+    parser = argparse.ArgumentParser(description='Script to prepare media for Autokara - extract vocals and lyrics from video')
+    parser.add_argument("--vocals", action="store_true", help="Perform vocals extraction on source file")
+    parser.add_argument("--lyrics", action="store_true", help="Perform ASS extraction on source file, if it has a subtitle track")
+    parser.add_argument("--full", action="store_true", help="Extract both vocals and lyrics")
+    parser.add_argument("source_file", type=str, help="The video/audio file to preprocess")
+    parser.add_argument("output_file", type=str, help="If extracting lyrics, the ASS output file. If extracting vocals or both, the output folder for separated tracks")
+
+    args = parser.parse_args(opts)
+
+    here = Path(__file__).parent
+
+    config = ConfigParser()
+    config.read([
+        str(here / "default.conf"),                                             # Default config file
+        str(Path().home()/ ".config" / "autokara"/ "autokara.conf")             # User config file
+    ])
+
+    media_dir = config['Media']['media_dir']
+
+    source_file = args.source_file
+    output_file = args.output_file
+
+    if args.full or (args.vocals and args.lyrics) or (not args.full and not args.vocals and not args.lyrics):
+        preprocess_video(source_file, output_folder=output_file, media_dir=media_dir)
+        extract_subtitles(source_file, output_file=f'{output_file:s}/vocals.ass')
+    elif args.vocals:
+        preprocess_video(source_file, output_folder=output_file, media_dir=media_dir)
+    elif args.lyrics:
+        extract_subtitles(source_file, output_file=output_file)
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/preprocess_media.sh b/preprocess_media.sh
deleted file mode 100755
index cc0f1cd1883d050cd825aba0d643229b12987d90..0000000000000000000000000000000000000000
--- a/preprocess_media.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-
-##########################################################################################################
-#
-# COMMAND : preprocess_media.sh
-#
-# AUTHOR : Sting
-#
-# DESCRIPTION : CLI tool to batch extract ASS lyrics and vocals from a video folder
-#
-# USE : ./preprocess_media.sh input_folder output_folder
-#
-# REQUIREMENTS : FFMPEG, Demucs, extractAss and extractWav
-#
-#
-##########################################################################################################
-
-
-USAGE_MESSAGE="usage : $0 video_folder train_folder"
-if [ $# != 2 ]; then
-        echo $USAGE_MESSAGE; exit 1;
-fi
-
-
-video_folder=$1
-train_folder=$2
-
-for filename in "$video_folder"/*.mkv; do
-    name=${filename##*/}
-    base=${name%.mkv}
-    mkdir -p "$train_folder/$base"
-
-    extractWav.sh "$filename" "$train_folder/$base/$base.wav"
-    demucs --two-stems vocals -o "$train_folder/$base" "$train_folder/$base/$base.wav"
-    rm "$train_folder/$base/$base.wav"
-    ffmpeg -i "$train_folder/$base/htdemucs/$base/vocals.wav" "$train_folder/$base/vocals.ogg"
-    rm -r "$train_folder/$base/htdemucs"
-
-    extractAss.sh "$filename" "$train_folder/$base/vocals.ass"
-done;
\ No newline at end of file
diff --git a/setup.py b/setup.py
index c71a27b851a0ddfc76ffbe0efda851c06db10851..a72de99cf6aa7012ae4e063b094a33b62d3b49b5 100644
--- a/setup.py
+++ b/setup.py
@@ -50,14 +50,11 @@ setup(
     entry_points={
         'console_scripts': ['autokara=autokara.autokara:main',
                             'autokara-plot=autokara.plot_syls:main',
-                            'autokara-gen-lang=autokara.update_lang_db:main'
+                            'autokara-gen-lang=autokara.update_lang_db:main',
+                            'autokara-preprocess=autokara.preprocess_media:main'
                             ],
     },
-    scripts=[
-        'autokara/extractAss.sh',
-        'autokara/extractWav.sh',
-        'preprocess_media.sh'    
-    ],
+    scripts=[],
     license='MIT License',
     classifiers=[
         # Trove classifiers