diff --git a/.gitignore b/.gitignore
index b1db3f291983c114d523b100e9c3ae149e731c23..08ab719c7ab57a2c8438443652cd11f1572c5f0e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,5 @@ data/
 env/
 media/
 models/
-
+build/
+*.egg-info
\ No newline at end of file
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000000000000000000000000000000000000..7c6af3d33e05eaea206a2d73856b5224bcbe2b3d
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,4 @@
+include requirements.txt
+include README.md
+include preprocess_media.sh
+recursive-include autokara *
diff --git a/README.md b/README.md
index 5239f2f3ddba3a304e9d93170c8130eeeb04b4f6..793b0e5c15056a43ba456f54c0197e92b010e443 100644
--- a/README.md
+++ b/README.md
@@ -46,21 +46,36 @@ If we ever want to use an AI to identify syllables without a reference lyrics fi
 ## Requirements
 
 - MKVToolnix (at least the CLI utils)
+- FFmpeg
 - Python >= 3.8
 
-Optional :
-- PyTorch for custom model training : follow the instructions [here](https://pytorch.org/get-started/locally/)
-
 All other python modules can be installed directly through pip, see further.
 
-This project requires at least Python 3.8, and using a virtual environment is strongly recommended.
-To install the dependencies, execute in the project directory :
+## Install
+
+The simplest way to install Autokara is through PIP :
+```bash
+# Using HTTPS
+$ pip install git+https://git.iiens.net/bakaclub/autokara.git
+
+# Or SSH
+$ pip install git+ssh://git@git.iiens.net:bakaclub/autokara.git
+```
+
+Or you can clone the repo and use `pip install <repo_directory>` if you prefer.
+
+
+To use the custom phonetic mappings for Japanese Romaji and other non-English languages, you need to update manually (for now) the g2p DB (within the venv):
+```bash
+$ autokara-gen-lang
+```
+
+
+If you plan on contributing to development, the use of a virtual environment is recommended :
 ```bash
 $ python -m venv env     # create the virtual environment, do it once
 $ source env/bin/activate # use the virtual environement
-
-# Install the required python modules
-$ pip install -r requirements.txt
+$ pip install git+ssh://git@git.iiens.net:bakaclub/autokara.git # install autokara
 
 # To exit the virtual environment
 $ deactivate              
@@ -68,15 +83,11 @@ $ deactivate
 
 Having a CUDA-capable GPU is optional, but can greatly reduce processing time in some situations.
 
+## Configuration
 
-To use the custom phonetic mapping for Japanese Romaji, you need to update manually (for now) the g2p DB (within the venv):
-```bash
-$ cp g2p/mappings/langs/rji/* env/lib/python3.11/site-packages/g2p/mappings/langs/rji/
-
-#Then update :
-$ g2p update
-```
-
+Autokara comes with a default config file in `autokara/default.conf`.
+If you want to tweak some values (enable CUDA, for example), you should add them to a new config file in your personal config directory : `~/.config/autokara/autokara.conf`.
+This new file has priority over the default one, which is used only as fallback for unspecified values.
 
 
 # Use
@@ -89,22 +100,22 @@ To use Autokara, you need :
 
 To execute AutoKara on a MKV video file and an ASS file containing the lyrics (ASS will be overwritten):
 ```bash
-$ python autokara.py video.mkv lyrics.ass
+$ autokara video.mkv lyrics.ass
 ```
 
 To output to a different file (and keep the original) :
 ```bash
-$ python autokara.py video.mkv lyrics.ass -o output.ass
+$ autokara video.mkv lyrics.ass -o output.ass
 ```
 
 To execute AutoKara on a (pre-extracted) WAV (or OGG, MP3, ...) vocals file, pass the `--vocals` flag :
 ```bash
-$ python autokara.py vocals.wav output.ass --vocals
+$ autokara vocals.wav output.ass --vocals
 ```
 
 To use a phonetic transcription optimized for a specific language, use `--lang` (or `-l`) :
 ```bash
-$ python autokara.py vocals.wav output.ass --lang jp
+$ autokara vocals.wav output.ass --lang jp
 ```
 Available languages are :
 ```
@@ -114,7 +125,7 @@ en : English
 
 Full help for all options is available with :
 ```bash
-$ python autokara.py -h
+$ autokara -h
 ```
 
 ## Useful scripts
@@ -143,7 +154,7 @@ A visualization tool, mainly intended for debug.
 Does the same as autokara.py, but instead of writing to a file, plots a graphic with onset times, spectrogram, probability curves,... 
 Does not work on video files, only separated vocals audio files
 ```bash
-$ python plot_syls.py vocals.wav lyrics.ass
+$ autokara-plot vocals.wav lyrics.ass
 ```
 
 
diff --git a/autokara.py b/autokara.py
deleted file mode 100644
index 8fe6f2e6eafc35e54f1f1c6dab83e3f1bab72dce..0000000000000000000000000000000000000000
--- a/autokara.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import sys
-import argparse
-import demucs.separate
-import subprocess
-import shlex
-from pathlib import Path
-
-from autosyl.assUtils import AssWriter, getSyls, getHeader
-from autosyl.segment import segment
-
-
-parser = argparse.ArgumentParser(description='AutoKara - Automatic karaoke timing tool')
-parser.add_argument("source_file", type=str, help="The video/audio file to time")
-parser.add_argument("ass_file", type=str, help="The ASS file with lyrics to time")
-parser.add_argument("--vocals", action="store_true", help="Treat the input as vocals file, i.e. do not perform vocals extraction")
-parser.add_argument("-o", "--output", help="Write output to specified file. If absent, overwrite source file")
-parser.add_argument("-v","--verbose", action="store_true", help="Increased verbosity")
-parser.add_argument("-l","--lang", help="Select language to use (default is Japanese Romaji)")
-
-args = parser.parse_args()
-
-ass_file = args.ass_file
-verbose = args.verbose
-
-if not args.vocals :
-    print("Extracting audio from video file...")
-    Path("./media/audio").mkdir(parents=True, exist_ok=True)
-    basename = Path(args.source_file).stem
-    audio_file = "media/audio/%s.wav" % basename
-
-    subprocess.call(shlex.split('./extractWav.sh "%s" "%s"' % (args.source_file, audio_file)))
-
-    Path("./media/vocals").mkdir(parents=True, exist_ok=True)
-    output_folder = "./media/vocals"
-
-    print("Isolating vocals...")
-
-    # Not working, don't know why
-    # demucs.separate.main(shlex.split('--two-stems vocals -o "%s" "%s"' % (output_folder, audio_file)))
-    subprocess.call(shlex.split('demucs --two-stems vocals -o "%s" "%s"' % (output_folder, audio_file)))
-
-    vocals_file = "./media/vocals/htdemucs/%s/vocals.wav" % basename
-else:
-    vocals_file = args.source_file
-
-
-
-print("Identifying syl starts...")
-
-
-if verbose:
-    print("Retrieving syls from lyrics...")
-reference_syls, line_meta = getSyls(ass_file)
-
-if verbose:
-    print("Starting syl detection...")
-syls = segment(vocals_file, reference_syls=reference_syls, verbose=verbose, language=args.lang)
-print(syls)
-print(line_meta)
-
-print("Syls found, writing ASS file...")
-header = getHeader(ass_file)
-writer = AssWriter()
-writer.openAss(args.output if args.output else ass_file)
-writer.writeHeader(header=header)
-writer.writeSyls(syls, line_meta)
-writer.closeAss()
-
diff --git a/autokara/__init__.py b/autokara/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b31ecc406afe6e17bb27fe80f20ed10e8516b16f
--- /dev/null
+++ b/autokara/__init__.py
@@ -0,0 +1,2 @@
+
+__version__ = "0.1.0"
\ No newline at end of file
diff --git a/autokara/autokara.py b/autokara/autokara.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffb7edace7fad76076009591ed2a71cc06cc01b6
--- /dev/null
+++ b/autokara/autokara.py
@@ -0,0 +1,93 @@
+import sys
+import argparse
+import demucs.separate
+import subprocess
+import shlex
+from pathlib import Path
+from configparser import ConfigParser
+
+from .autosyl.assUtils import AssWriter, getSyls, getHeader
+from .autosyl.segment import segment
+
+
+
+def main(opts=None):
+
+    parser = argparse.ArgumentParser(description='AutoKara - Automatic karaoke timing tool')
+    parser.add_argument("source_file", type=str, help="The video/audio file to time")
+    parser.add_argument("ass_file", type=str, help="The ASS file with lyrics to time")
+    parser.add_argument("--vocals", action="store_true", help="Treat the input as vocals file, i.e. do not perform vocals extraction")
+    parser.add_argument("-o", "--output", help="Write output to specified file. If absent, overwrite source file")
+    parser.add_argument("-v","--verbose", action="store_true", help="Increased verbosity")
+    parser.add_argument("-l","--lang", help="Select language to use (default is Japanese Romaji)")
+
+    args = parser.parse_args(opts)
+
+    ass_file = args.ass_file
+    verbose = args.verbose
+
+    here = Path(__file__).parent
+
+    config = ConfigParser()
+    config.read([
+        str(here / "default.conf"),                                             # Default config file
+        str(Path().home()/ ".config" / "autokara"/ "autokara.conf")             # User config file
+    ])
+
+    media_dir = config['Media']['media_dir']
+    segment_config = {
+        'model': config['Segment']['model'],
+        'bdr': config['Segment'].getboolean('bdr'),
+        'cuda': config['Segment'].getboolean('cuda'),
+        'syl_delay': config['Segment'].getint('syl_delay')
+    }
+
+
+    if not args.vocals :
+        print("Extracting audio from video file...")
+        Path(media_dir + "/audio").mkdir(parents=True, exist_ok=True)
+        basename = Path(args.source_file).stem
+        audio_file = f"{media_dir:s}/audio/{basename:s}.wav"
+
+        subprocess.call(shlex.split(f'{str(here)}/extractWav.sh "{args.source_file:s}" "{audio_file}"'))
+
+        Path(f"{media_dir:s}/vocals").mkdir(parents=True, exist_ok=True)
+        output_folder = f"{media_dir:s}/vocals"
+
+        print("Isolating vocals...")
+
+        # Not working, don't know why
+        # demucs.separate.main(shlex.split('--two-stems vocals -o "%s" "%s"' % (output_folder, audio_file)))
+        subprocess.call(shlex.split(f'demucs --two-stems vocals -o "{output_folder:s}" "{audio_file:s}"'))
+
+        vocals_file = f"{media_dir:s}/vocals/htdemucs/{basename:s}/vocals.wav"
+    else:
+        vocals_file = args.source_file
+
+
+
+    print("Identifying syl starts...")
+
+
+    if verbose:
+        print("Retrieving syls from lyrics...")
+    reference_syls, line_meta = getSyls(ass_file)
+
+    if verbose:
+        print("Starting syl detection...")
+    syls = segment(vocals_file, segment_config, reference_syls=reference_syls, verbose=verbose, language=args.lang)
+    print(syls)
+    print(line_meta)
+
+    print("Syls found, writing ASS file...")
+    header = getHeader(ass_file)
+    writer = AssWriter()
+    writer.openAss(args.output if args.output else ass_file)
+    writer.writeHeader(header=header)
+    writer.writeSyls(syls, line_meta)
+    writer.closeAss()
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/autosyl/LyricsAlignment/LICENSE b/autokara/autosyl/LyricsAlignment/LICENSE
similarity index 100%
rename from autosyl/LyricsAlignment/LICENSE
rename to autokara/autosyl/LyricsAlignment/LICENSE
diff --git a/autosyl/LyricsAlignment/checkpoints/checkpoint_BDR b/autokara/autosyl/LyricsAlignment/checkpoints/checkpoint_BDR
similarity index 100%
rename from autosyl/LyricsAlignment/checkpoints/checkpoint_BDR
rename to autokara/autosyl/LyricsAlignment/checkpoints/checkpoint_BDR
diff --git a/autosyl/LyricsAlignment/checkpoints/checkpoint_Baseline b/autokara/autosyl/LyricsAlignment/checkpoints/checkpoint_Baseline
similarity index 100%
rename from autosyl/LyricsAlignment/checkpoints/checkpoint_Baseline
rename to autokara/autosyl/LyricsAlignment/checkpoints/checkpoint_Baseline
diff --git a/autosyl/LyricsAlignment/checkpoints/checkpoint_MTL b/autokara/autosyl/LyricsAlignment/checkpoints/checkpoint_MTL
similarity index 100%
rename from autosyl/LyricsAlignment/checkpoints/checkpoint_MTL
rename to autokara/autosyl/LyricsAlignment/checkpoints/checkpoint_MTL
diff --git a/autosyl/LyricsAlignment/model.py b/autokara/autosyl/LyricsAlignment/model.py
similarity index 99%
rename from autosyl/LyricsAlignment/model.py
rename to autokara/autosyl/LyricsAlignment/model.py
index f6fd66b10cb934ee1911b50d35b6bbcd13a9320f..50120c275151ad17e553ee01eb53a9d76c420962 100644
--- a/autosyl/LyricsAlignment/model.py
+++ b/autokara/autosyl/LyricsAlignment/model.py
@@ -4,7 +4,7 @@ import torch.nn.functional as F
 import torchaudio
 import warnings
 
-from autosyl.LyricsAlignment.utils import notes_to_pc
+from .utils import notes_to_pc
 
 # following FFT parameters are designed for a 22.5k sampling rate
 sr = 22050
diff --git a/autosyl/LyricsAlignment/utils.py b/autokara/autosyl/LyricsAlignment/utils.py
similarity index 100%
rename from autosyl/LyricsAlignment/utils.py
rename to autokara/autosyl/LyricsAlignment/utils.py
diff --git a/autosyl/LyricsAlignment/wrapper.py b/autokara/autosyl/LyricsAlignment/wrapper.py
similarity index 97%
rename from autosyl/LyricsAlignment/wrapper.py
rename to autokara/autosyl/LyricsAlignment/wrapper.py
index 9e43b4bbe9c59074141669cb7aa11f9e357f91a6..f308ef9872955f4fdf4d45ce384c814fbee8664d 100644
--- a/autosyl/LyricsAlignment/wrapper.py
+++ b/autokara/autosyl/LyricsAlignment/wrapper.py
@@ -5,8 +5,8 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
-import autosyl.LyricsAlignment.utils as utils
-from autosyl.LyricsAlignment.model import train_audio_transforms, AcousticModel, BoundaryDetection
+from . import utils
+from .model import train_audio_transforms, AcousticModel, BoundaryDetection
 
 np.random.seed(7)
 
diff --git a/autosyl/assUtils.py b/autokara/autosyl/assUtils.py
similarity index 100%
rename from autosyl/assUtils.py
rename to autokara/autosyl/assUtils.py
diff --git a/autosyl/segment.py b/autokara/autosyl/segment.py
similarity index 75%
rename from autosyl/segment.py
rename to autokara/autosyl/segment.py
index 6a54e36c971c2c60596bae459872a58418b63000..46155073b8c10738a55de11981295909f38c91e9 100644
--- a/autosyl/segment.py
+++ b/autokara/autosyl/segment.py
@@ -5,24 +5,25 @@ import re
 import matplotlib.pyplot as plt
 import scipy.signal as sg
 import parselmouth
+from pathlib import Path
 
-from autosyl.assUtils import getSyls, timeToDate, dateToTime 
-from autosyl.LyricsAlignment.wrapper import align, preprocess_from_file
+from .assUtils import getSyls, timeToDate, dateToTime 
+from .LyricsAlignment.wrapper import align, preprocess_from_file
 
 
 
-def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500, verbose=False, language="jp"):
+def segment(songfile, config, reference_syls=None,syls_per_line=10, last_syl_dur=500, verbose=False, language="jp"):
 
-    delay = -4
+    delay = config['syl_delay']
     backtrack = False
 
     print(reference_syls)
 
-    audio_file = songfile                      # pre-computed source-separated vocals; These models do not work with mixture input.
-    word_file = None                           # example: jamendolyrics/lyrics/*.words.txt"; Set to None if you don't have it
-    method = "MTL_BDR"                             # "Baseline", "MTL", "Baseline_BDR", "MTL_BDR"
-    cuda=False                                 # set True if you have access to a GPU
-    checkpoint_folder = "./autosyl/LyricsAlignment/checkpoints"
+    audio_file = songfile                                                               # pre-computed source-separated vocals;
+    word_file = None                                                                    # example: jamendolyrics/lyrics/*.words.txt"; Set to None if you don't have it
+    method = config['model'] + ("_BDR" if config['bdr'] else "")                        # "Baseline", "MTL", "Baseline_BDR", "MTL_BDR"
+    cuda = config['cuda']                                                               # set True if you have access to a GPU
+    checkpoint_folder = f"{str(Path(__file__).parent):s}/LyricsAlignment/checkpoints"
     language = language
 
 
diff --git a/autokara/default.conf b/autokara/default.conf
new file mode 100644
index 0000000000000000000000000000000000000000..82d914ce4a69e2792135de3cb620ee67f36e604e
--- /dev/null
+++ b/autokara/default.conf
@@ -0,0 +1,20 @@
+
+[General]
+
+
+
+[Media]
+
+# Where to store temporary media files (such as extracted vocals or ASS tracks)
+media_dir = /tmp/autokara
+
+[Segment]
+
+# Which model to use. Options are "Baseline" (base) and "MTL" (better)
+model = MTL
+# Whether to use BDR model for boundary recognition. Better, but resource-intensive
+bdr = true 
+# Whether to use CUDA
+cuda = false
+# Default delay applied to detected syls, in centiseconds
+syl_delay = -4
diff --git a/extractAss.sh b/autokara/extractAss.sh
similarity index 100%
rename from extractAss.sh
rename to autokara/extractAss.sh
diff --git a/extractWav.sh b/autokara/extractWav.sh
similarity index 100%
rename from extractWav.sh
rename to autokara/extractWav.sh
diff --git a/g2p/mappings/langs/rji/config.yaml b/autokara/g2p/mappings/langs/rji/config.yaml
similarity index 100%
rename from g2p/mappings/langs/rji/config.yaml
rename to autokara/g2p/mappings/langs/rji/config.yaml
diff --git a/g2p/mappings/langs/rji/rji_abbs.csv b/autokara/g2p/mappings/langs/rji/rji_abbs.csv
similarity index 100%
rename from g2p/mappings/langs/rji/rji_abbs.csv
rename to autokara/g2p/mappings/langs/rji/rji_abbs.csv
diff --git a/g2p/mappings/langs/rji/romaji_to_eng-arpa.csv b/autokara/g2p/mappings/langs/rji/romaji_to_eng-arpa.csv
similarity index 100%
rename from g2p/mappings/langs/rji/romaji_to_eng-arpa.csv
rename to autokara/g2p/mappings/langs/rji/romaji_to_eng-arpa.csv
diff --git a/autokara/plot_syls.py b/autokara/plot_syls.py
new file mode 100644
index 0000000000000000000000000000000000000000..9383639fc6cf3ab2a285eeaf9f8518ae8c53f9b5
--- /dev/null
+++ b/autokara/plot_syls.py
@@ -0,0 +1,195 @@
+import madmom
+import numpy as np
+import sys
+import re
+import matplotlib.pyplot as plt
+import scipy.signal as sg
+import parselmouth
+import argparse
+
+from .autosyl.assUtils import getSyls, timeToDate, dateToTime 
+from .autosyl.LyricsAlignment.wrapper import align, preprocess_from_file
+
+
+##############################################################################
+#
+# This is a test script to visualize extracted onsets and other audio features
+# It is mainly intended for development/debug
+#
+# If you just want to detect the syllables, use autokara.py instead
+#
+##############################################################################
+
+def main(opts=None):
+    parser = argparse.ArgumentParser(description='AutoKara - Automatic karaoke timing tool')
+    parser.add_argument("vocals_file", type=str, help="The audio file to time")
+    parser.add_argument("ass_file", type=str, help="The ASS file with lyrics to time")
+
+    args = parser.parse_args()
+
+
+    songfile = args.vocals_file
+    reference_syls, line_meta = getSyls(sys.argv[2])
+
+
+    print(reference_syls)
+
+    backtrack = False
+
+
+
+
+    audio_file = songfile                      # pre-computed source-separated vocals; These models do not work with mixture input.
+    word_file = None                           # example: jamendolyrics/lyrics/*.words.txt"; Set to None if you don't have it
+    method = "MTL_BDR"                             # "Baseline", "MTL", "Baseline_BDR", "MTL_BDR"
+    cuda=True                                 # set True if you have access to a GPU
+    checkpoint_folder = "./autosyl/LyricsAlignment/checkpoints"
+
+    pred_file = "./MTL.csv"                    # saved alignment results, "(float) start_time, (float) end_time, (string) word"
+
+
+    lyrics_lines = [" ".join([syl[1] for syl in line]) for line in reference_syls]
+    #print(lyrics_lines)
+
+
+    # load audio and lyrics
+    # words:        a list of words
+    # lyrics_p:     phoneme sequence of the target lyrics
+    # idx_word_p:   indices of word start in lyrics_p
+    # idx_line_p:   indices of line start in lyrics_p
+    audio, words, lyrics_p, idx_word_p, idx_line_p = preprocess_from_file(audio_file, lyrics_lines, word_file)
+
+    # compute alignment
+    # word_align:   a list of frame indices aligned to each word
+    # words:        a list of words
+    word_align, words = align(audio, words, lyrics_p, idx_word_p, idx_line_p, method=method, cuda=False, checkpoint_folder=checkpoint_folder)
+
+
+    print([[word_align[i][0], word_align[i][1], words[i]] for i in range(len(word_align))])
+    words_onsets = np.array([word_align[i][0] for i in range(len(word_align))])
+
+
+    cnn = madmom.features.onsets.CNNOnsetProcessor()
+    spectral = madmom.features.onsets.SpectralOnsetProcessor('modified_kullback_leibler')
+
+    sig = madmom.audio.signal.Signal(songfile, num_channels=1)
+    parsel = parselmouth.Sound(sig)
+
+    spec = madmom.audio.spectrogram.Spectrogram(sig)
+    filt_spec = madmom.audio.spectrogram.FilteredSpectrogram(spec, filterbank=madmom.audio.filters.LogFilterbank, num_bands=24)
+    log_spec = madmom.audio.spectrogram.LogarithmicSpectrogram(filt_spec, add=1)
+
+    magnitude = np.max(log_spec[:,:100], axis=1)
+
+    cnn_function = cnn(sig)
+    spectral_function = spectral(sig)
+    spectral_function = spectral_function/(spectral_function.max())
+
+    #activation_function = 0.5*cnn_function + 0.5*spectral_function
+    activation_function = (2 * cnn_function * spectral_function)/(cnn_function + spectral_function)
+    #activation_function = np.where(spectral_function > 0.14, cnn_function, 0)
+    #onsets = proc(activation_function)
+
+
+    if reference_syls:
+        activation_threshold = 0.1
+    else:
+        activation_threshold = 0.2
+
+    activation_smoothed = madmom.audio.signal.smooth(activation_function, 20)
+    cnn_smoothed = madmom.audio.signal.smooth(cnn_function, 20)
+    onsets = madmom.features.onsets.peak_picking(activation_smoothed, threshold=activation_threshold, smooth=0)
+    #onsets = np.array([o for o in onsets if cnn_smoothed[o] > 0.1])
+
+    pitch = parsel.to_pitch()
+    pitch_values = pitch.selected_array['frequency']
+
+    pad_before = round(pitch.xs()[0]*100)
+    pad_after = len(magnitude) - len(pitch_values) - pad_before
+
+    pitch_values = np.pad(pitch_values, (pad_before, pad_after), 'constant', constant_values=(0,0))
+
+    mask_function = magnitude * pitch_values
+    mask_function = mask_function/np.max(mask_function)
+    mask_threshold = 0.15
+    mask_window = [1,6]
+    invalid_onsets_idx = []
+
+    for i in range(len(onsets)):
+        if np.max(mask_function[onsets[i]+mask_window[0]:onsets[i]+mask_window[1]]) < mask_threshold:
+            invalid_onsets_idx.append(i)
+
+    onsets = np.delete(onsets, invalid_onsets_idx)
+
+
+
+    if reference_syls:
+        filtered_onsets = []
+        line_index = 0
+        for line in reference_syls:
+            line_index += 1
+            syl_number = len(line) - 1
+            line_onsets = [o for o in onsets if (o >= line[0][0] and o <= line[-1][0])]
+            line_onsets.sort(reverse=True, key=(lambda x: activation_smoothed[x]))
+            if syl_number > len(line_onsets):
+                print("WARNING : failed to detect enough onsets in line %d (%d, %d)" % (line_index, line[0][0], line[-1][0]))
+            filtered_onsets += line_onsets[0:syl_number]
+        
+        onsets = np.array(sorted(filtered_onsets))
+
+
+    """ 
+        if word_index > 0:
+            word_start = max(word_align[word_index][0] - 5, line[0][0], previous_onset+1)
+        else:
+            word_start = line[0][0]
+        if word_index < len(words) - 1 and syl_index < len(line) - 2:
+            word_end = min(line[-1][0], word_align[word_index + 1][0] - 5)
+        else:
+            word_end = line[-1][0]
+
+        word_onsets = [o for o in onsets if (o >= word_start and o <= word_end)]
+        word_onsets.sort(reverse=True, key=(lambda x: activation_smoothed[x]))
+        if word_syl_count > len(word_onsets):
+            print("WARNING : failed to detect enough onsets in word %s (%d, %d)" % (word_tmp, word_start, word_end))
+        filtered_onsets += word_onsets[0:word_syl_count]
+        print(word_onsets[0:word_syl_count])
+        previous_onset = max(word_onsets[0:word_syl_count] + [0])
+    """
+
+    # Backtrack onsets to closest earlier local minimum
+    if backtrack:
+        backtrack_max_frames = 50
+        for i in range(len(onsets)):
+            initial_onset = onsets[i]
+            while(activation_smoothed[onsets[i] - 1] < activation_smoothed[onsets[i]] and onsets[i] > initial_onset - backtrack_max_frames):
+                onsets[i] -= 1
+
+    #print(onsets/100)
+    print(words_onsets/100)
+
+    if reference_syls:
+        reference_onsets = [syl[0]+8 for line in reference_syls for syl in line[:-1]]
+
+    fig, axs = plt.subplots(nrows=2, sharex=True)
+    axs[0].imshow(log_spec.T, origin='lower', aspect='auto')
+    if reference_syls:
+        axs[0].vlines(reference_onsets, 0, 140, colors='red')
+    axs[0].plot((pitch_values/np.max(pitch_values))*140, color='yellow')
+    axs[1].plot(mask_function)
+    #axs[1].plot(cnn_smoothed)
+    #axs[1].plot(spectral_function, color='green')
+    axs[1].plot(activation_smoothed, color='orange')
+    axs[1].vlines(onsets, 0, 2, colors='red')
+    axs[1].vlines(words_onsets, 0, 3, colors='m')
+    axs[1].hlines([max(mask_threshold, 0), activation_threshold], 0, onsets[-1]+100, colors='black')
+
+    #bins = np.arange(0, 1, 0.02)
+    #hist, hist_axs = plt.subplots(nrows=1)
+    #hist_axs.hist(mask_function, bins=bins)
+
+    plt.show()
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/autokara/update_lang_db.py b/autokara/update_lang_db.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e0c63e926ebef29287967a04d7fbed6c53cb9f8
--- /dev/null
+++ b/autokara/update_lang_db.py
@@ -0,0 +1,28 @@
+import subprocess
+import shlex
+from pathlib import Path
+import glob
+
+
+def main():
+    from g2p import __path__ as g2p_path
+
+    HERE = Path(__file__).parent
+    g2p_base = Path(g2p_path[0])
+
+    print("Regenerating custom language mappings...")
+    mapping_dir = HERE / "g2p/mappings/langs/"
+    mappings = glob.glob(f"{str(mapping_dir):s}/*")
+    for map in mappings:
+        subprocess.check_call(shlex.split(f'cp -r {map:s} {str(g2p_base):s}/mappings/langs/'))
+    subprocess.check_call(shlex.split(f'g2p update'))
+    
+    if not Path.exists(g2p_base / "mappings/langs/rji"):
+        print("ERROR : Failed to find language mapping")
+    else:
+        print("Setup successful")
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/plot_syls.py b/plot_syls.py
deleted file mode 100644
index e86960430761fc37dcc54ebfc60dbcc1a1361da1..0000000000000000000000000000000000000000
--- a/plot_syls.py
+++ /dev/null
@@ -1,191 +0,0 @@
-import madmom
-import numpy as np
-import sys
-import re
-import matplotlib.pyplot as plt
-import scipy.signal as sg
-import parselmouth
-import argparse
-
-from autosyl.assUtils import getSyls, timeToDate, dateToTime 
-from autosyl.LyricsAlignment.wrapper import align, preprocess_from_file
-
-
-##############################################################################
-#
-# This is a test script to visualize extracted onsets and other audio features
-# It is mainly intended for development/debug
-#
-# If you just want to detect the syllables, use autokara.py instead
-#
-##############################################################################
-
-
-parser = argparse.ArgumentParser(description='AutoKara - Automatic karaoke timing tool')
-parser.add_argument("vocals_file", type=str, help="The audio file to time")
-parser.add_argument("ass_file", type=str, help="The ASS file with lyrics to time")
-
-args = parser.parse_args()
-
-
-songfile = args.vocals_file
-reference_syls, line_meta = getSyls(sys.argv[2])
-
-
-print(reference_syls)
-
-backtrack = False
-
-
-
-
-audio_file = songfile                      # pre-computed source-separated vocals; These models do not work with mixture input.
-word_file = None                           # example: jamendolyrics/lyrics/*.words.txt"; Set to None if you don't have it
-method = "MTL_BDR"                             # "Baseline", "MTL", "Baseline_BDR", "MTL_BDR"
-cuda=True                                 # set True if you have access to a GPU
-checkpoint_folder = "./autosyl/LyricsAlignment/checkpoints"
-
-pred_file = "./MTL.csv"                    # saved alignment results, "(float) start_time, (float) end_time, (string) word"
-
-
-lyrics_lines = [" ".join([syl[1] for syl in line]) for line in reference_syls]
-#print(lyrics_lines)
-
-
-# load audio and lyrics
-# words:        a list of words
-# lyrics_p:     phoneme sequence of the target lyrics
-# idx_word_p:   indices of word start in lyrics_p
-# idx_line_p:   indices of line start in lyrics_p
-audio, words, lyrics_p, idx_word_p, idx_line_p = preprocess_from_file(audio_file, lyrics_lines, word_file)
-
-# compute alignment
-# word_align:   a list of frame indices aligned to each word
-# words:        a list of words
-word_align, words = align(audio, words, lyrics_p, idx_word_p, idx_line_p, method=method, cuda=False, checkpoint_folder=checkpoint_folder)
-
-
-print([[word_align[i][0], word_align[i][1], words[i]] for i in range(len(word_align))])
-words_onsets = np.array([word_align[i][0] for i in range(len(word_align))])
-
-
-cnn = madmom.features.onsets.CNNOnsetProcessor()
-spectral = madmom.features.onsets.SpectralOnsetProcessor('modified_kullback_leibler')
-
-sig = madmom.audio.signal.Signal(songfile, num_channels=1)
-parsel = parselmouth.Sound(sig)
-
-spec = madmom.audio.spectrogram.Spectrogram(sig)
-filt_spec = madmom.audio.spectrogram.FilteredSpectrogram(spec, filterbank=madmom.audio.filters.LogFilterbank, num_bands=24)
-log_spec = madmom.audio.spectrogram.LogarithmicSpectrogram(filt_spec, add=1)
-
-magnitude = np.max(log_spec[:,:100], axis=1)
-
-cnn_function = cnn(sig)
-spectral_function = spectral(sig)
-spectral_function = spectral_function/(spectral_function.max())
-
-#activation_function = 0.5*cnn_function + 0.5*spectral_function
-activation_function = (2 * cnn_function * spectral_function)/(cnn_function + spectral_function)
-#activation_function = np.where(spectral_function > 0.14, cnn_function, 0)
-#onsets = proc(activation_function)
-
-
-if reference_syls:
-    activation_threshold = 0.1
-else:
-    activation_threshold = 0.2
-
-activation_smoothed = madmom.audio.signal.smooth(activation_function, 20)
-cnn_smoothed = madmom.audio.signal.smooth(cnn_function, 20)
-onsets = madmom.features.onsets.peak_picking(activation_smoothed, threshold=activation_threshold, smooth=0)
-#onsets = np.array([o for o in onsets if cnn_smoothed[o] > 0.1])
-
-pitch = parsel.to_pitch()
-pitch_values = pitch.selected_array['frequency']
-
-pad_before = round(pitch.xs()[0]*100)
-pad_after = len(magnitude) - len(pitch_values) - pad_before
-
-pitch_values = np.pad(pitch_values, (pad_before, pad_after), 'constant', constant_values=(0,0))
-
-mask_function = magnitude * pitch_values
-mask_function = mask_function/np.max(mask_function)
-mask_threshold = 0.15
-mask_window = [1,6]
-invalid_onsets_idx = []
-
-for i in range(len(onsets)):
-    if np.max(mask_function[onsets[i]+mask_window[0]:onsets[i]+mask_window[1]]) < mask_threshold:
-        invalid_onsets_idx.append(i)
-
-onsets = np.delete(onsets, invalid_onsets_idx)
-
-
-
-if reference_syls:
-    filtered_onsets = []
-    line_index = 0
-    for line in reference_syls:
-        line_index += 1
-        syl_number = len(line) - 1
-        line_onsets = [o for o in onsets if (o >= line[0][0] and o <= line[-1][0])]
-        line_onsets.sort(reverse=True, key=(lambda x: activation_smoothed[x]))
-        if syl_number > len(line_onsets):
-            print("WARNING : failed to detect enough onsets in line %d (%d, %d)" % (line_index, line[0][0], line[-1][0]))
-        filtered_onsets += line_onsets[0:syl_number]
-    
-    onsets = np.array(sorted(filtered_onsets))
-
-
-""" 
-    if word_index > 0:
-        word_start = max(word_align[word_index][0] - 5, line[0][0], previous_onset+1)
-    else:
-        word_start = line[0][0]
-    if word_index < len(words) - 1 and syl_index < len(line) - 2:
-        word_end = min(line[-1][0], word_align[word_index + 1][0] - 5)
-    else:
-        word_end = line[-1][0]
-
-    word_onsets = [o for o in onsets if (o >= word_start and o <= word_end)]
-    word_onsets.sort(reverse=True, key=(lambda x: activation_smoothed[x]))
-    if word_syl_count > len(word_onsets):
-        print("WARNING : failed to detect enough onsets in word %s (%d, %d)" % (word_tmp, word_start, word_end))
-    filtered_onsets += word_onsets[0:word_syl_count]
-    print(word_onsets[0:word_syl_count])
-    previous_onset = max(word_onsets[0:word_syl_count] + [0])
-"""
-
-# Backtrack onsets to closest earlier local minimum
-if backtrack:
-    backtrack_max_frames = 50
-    for i in range(len(onsets)):
-        initial_onset = onsets[i]
-        while(activation_smoothed[onsets[i] - 1] < activation_smoothed[onsets[i]] and onsets[i] > initial_onset - backtrack_max_frames):
-            onsets[i] -= 1
-
-#print(onsets/100)
-print(words_onsets/100)
-
-if reference_syls:
-    reference_onsets = [syl[0]+8 for line in reference_syls for syl in line[:-1]]
-
-fig, axs = plt.subplots(nrows=2, sharex=True)
-axs[0].imshow(log_spec.T, origin='lower', aspect='auto')
-if reference_syls:
-    axs[0].vlines(reference_onsets, 0, 140, colors='red')
-axs[0].plot((pitch_values/np.max(pitch_values))*140, color='yellow')
-axs[1].plot(mask_function)
-#axs[1].plot(cnn_smoothed)
-#axs[1].plot(spectral_function, color='green')
-axs[1].plot(activation_smoothed, color='orange')
-axs[1].vlines(onsets, 0, 2, colors='red')
-axs[1].vlines(words_onsets, 0, 3, colors='m')
-axs[1].hlines([max(mask_threshold, 0), activation_threshold], 0, onsets[-1]+100, colors='black')
-
-#bins = np.arange(0, 1, 0.02)
-#hist, hist_axs = plt.subplots(nrows=1)
-#hist_axs.hist(mask_function, bins=bins)
-
-plt.show()
\ No newline at end of file
diff --git a/preprocess_media.sh b/preprocess_media.sh
index a88ae744d830cadc1fdf2122e5c545180eaa4fb4..ec05203bec0a6563ab7adc7298b08c0fac3d2c4a 100755
--- a/preprocess_media.sh
+++ b/preprocess_media.sh
@@ -1,6 +1,20 @@
-
-
-
+#!/bin/bash
+
+
+##########################################################################################################
+#
+# COMMAND : preprocess_media.sh
+#
+# AUTHOR : Sting
+#
+# DESCRIPTION : CLI tool to batch extract ASS lyrics and vocals from a video folder
+#
+# USE : ./preprocess_media.sh input_folder output_folder
+#
+# REQUIREMENTS : FFMPEG, Demucs, extractAss and extractWav
+#
+#
+##########################################################################################################
 
 
 USAGE_MESSAGE="usage : $0 video_folder train_folder"
diff --git a/requirements.txt b/requirements.txt
index 04299146d72785e9cf25e747f7bcea65dd11d05c..1574a1b402f57d64880c618eb05472c3fd8879fc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,15 +1,9 @@
 librosa
 demucs
-chainer
 soundfile
-sklearn
 matplotlib
 numpy
-tqdm
-scipy
-cython
-mido
-git+https://github.com/CPJKU/madmom.git
+madmom@git+https://github.com/CPJKU/madmom.git
 praat-parselmouth
 future
 musdb
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..c71a27b851a0ddfc76ffbe0efda851c06db10851
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,69 @@
+
+from pathlib import Path
+from setuptools import setup, find_packages
+import atexit
+from setuptools.command.install import install
+import subprocess
+import shlex
+import glob
+
+
+NAME = 'autokara'
+DESCRIPTION = 'Automatic karaoke timing'
+
+URL = 'https://git.iiens.net/bakaclub/autokara'
+AUTHOR = 'LoÃ¯c "Sting" AllÃ¨gre'
+REQUIRES_PYTHON = '>=3.8.0'
+
+HERE = Path(__file__).parent
+
+# Get version without explicitly loading the module.
+for line in open('autokara/__init__.py'):
+    line = line.strip()
+    if '__version__' in line:
+        context = {}
+        exec(line, context)
+        VERSION = context['__version__']
+
+
+def load_requirements(name):
+    required = [i.strip() for i in open(HERE / name)]
+    required = [i for i in required if not i.startswith('#')]
+    print(required)
+    return required
+
+
+REQUIRED = load_requirements('requirements.txt')
+ALL_REQUIRED = load_requirements('requirements.txt')
+
+
+setup(
+    name=NAME,
+    version=VERSION,
+    description=DESCRIPTION,
+    author=AUTHOR,
+    python_requires=REQUIRES_PYTHON,
+    url=URL,
+    packages=find_packages(),
+    install_requires=REQUIRED,
+    include_package_data=True,
+    entry_points={
+        'console_scripts': ['autokara=autokara.autokara:main',
+                            'autokara-plot=autokara.plot_syls:main',
+                            'autokara-gen-lang=autokara.update_lang_db:main'
+                            ],
+    },
+    scripts=[
+        'autokara/extractAss.sh',
+        'autokara/extractWav.sh',
+        'preprocess_media.sh'    
+    ],
+    license='MIT License',
+    classifiers=[
+        # Trove classifiers
+        # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
+        'License :: OSI Approved :: MIT License',
+        'Topic :: Multimedia :: Sound/Audio',
+        'Topic :: Scientific/Engineering :: Artificial Intelligence',
+    ],
+)