From b844a882bf0c70ef90fc87ea43cf6071db1224f4 Mon Sep 17 00:00:00 2001 From: Sting <loic.allegre@ensiie.fr> Date: Tue, 25 Jul 2023 18:41:16 +0200 Subject: [PATCH] Add config files, include shell scripts in dist --- README.md | 6 +++++ autokara/autokara.py | 53 ++++++++++++++++++------------------- autokara/autosyl/segment.py | 12 ++++----- autokara/default.conf | 20 ++++++++++++++ preprocess_media.sh | 20 +++++++++++--- setup.py | 5 ++++ 6 files changed, 80 insertions(+), 36 deletions(-) create mode 100644 autokara/default.conf diff --git a/README.md b/README.md index 291cd15..793b0e5 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,12 @@ $ deactivate Having a CUDA-capable GPU is optional, but can greatly reduce processing time in some situations. +## Configuration + +Autokara comes with a default config file in `autokara/default.conf`. +If you want to tweak some values (enable CUDA, for example), you should add them to a new config file in your personal config directory : `~/.config/autokara/autokara.conf`. +This new file has priority over the default one, which is used only as fallback for unspecified values. + # Use diff --git a/autokara/autokara.py b/autokara/autokara.py index e5d9e21..ffb7eda 100644 --- a/autokara/autokara.py +++ b/autokara/autokara.py @@ -4,6 +4,7 @@ import demucs.separate import subprocess import shlex from pathlib import Path +from configparser import ConfigParser from .autosyl.assUtils import AssWriter, getSyls, getHeader from .autosyl.segment import segment @@ -12,25 +13,6 @@ from .autosyl.segment import segment def main(opts=None): - from g2p import __path__ as g2p_path - - HERE = Path(__file__).parent - g2p_base = Path(g2p_path[0]) - if not Path.exists(g2p_base / "mappings/langs/rji"): - print("No Romaji language mapping found, attempting first-time setup") - mapping_dir = HERE / "g2p/mappings/langs/" - mappings = glob.glob(f"{str(mapping_dir):s}/*") - for map in mappings: - subprocess.check_call(shlex.split(f'cp -r {map:s} {str(g2p_base):s}/mappings/langs/')) - subprocess.check_call(shlex.split(f'g2p update')) - - if not Path.exists(g2p_base / "mappings/langs/rji"): - print("ERROR : Failed to find language mapping") - else: - print("Setup successful") - - - parser = argparse.ArgumentParser(description='AutoKara - Automatic karaoke timing tool') parser.add_argument("source_file", type=str, help="The video/audio file to time") parser.add_argument("ass_file", type=str, help="The ASS file with lyrics to time") @@ -44,24 +26,41 @@ def main(opts=None): ass_file = args.ass_file verbose = args.verbose + here = Path(__file__).parent + + config = ConfigParser() + config.read([ + str(here / "default.conf"), # Default config file + str(Path().home()/ ".config" / "autokara"/ "autokara.conf") # User config file + ]) + + media_dir = config['Media']['media_dir'] + segment_config = { + 'model': config['Segment']['model'], + 'bdr': config['Segment'].getboolean('bdr'), + 'cuda': config['Segment'].getboolean('cuda'), + 'syl_delay': config['Segment'].getint('syl_delay') + } + + if not args.vocals : print("Extracting audio from video file...") - Path("./media/audio").mkdir(parents=True, exist_ok=True) + Path(media_dir + "/audio").mkdir(parents=True, exist_ok=True) basename = Path(args.source_file).stem - audio_file = "media/audio/%s.wav" % basename + audio_file = f"{media_dir:s}/audio/{basename:s}.wav" - subprocess.call(shlex.split('./extractWav.sh "%s" "%s"' % (args.source_file, audio_file))) + subprocess.call(shlex.split(f'{str(here)}/extractWav.sh "{args.source_file:s}" "{audio_file}"')) - Path("./media/vocals").mkdir(parents=True, exist_ok=True) - output_folder = "./media/vocals" + Path(f"{media_dir:s}/vocals").mkdir(parents=True, exist_ok=True) + output_folder = f"{media_dir:s}/vocals" print("Isolating vocals...") # Not working, don't know why # demucs.separate.main(shlex.split('--two-stems vocals -o "%s" "%s"' % (output_folder, audio_file))) - subprocess.call(shlex.split('demucs --two-stems vocals -o "%s" "%s"' % (output_folder, audio_file))) + subprocess.call(shlex.split(f'demucs --two-stems vocals -o "{output_folder:s}" "{audio_file:s}"')) - vocals_file = "./media/vocals/htdemucs/%s/vocals.wav" % basename + vocals_file = f"{media_dir:s}/vocals/htdemucs/{basename:s}/vocals.wav" else: vocals_file = args.source_file @@ -76,7 +75,7 @@ def main(opts=None): if verbose: print("Starting syl detection...") - syls = segment(vocals_file, reference_syls=reference_syls, verbose=verbose, language=args.lang) + syls = segment(vocals_file, segment_config, reference_syls=reference_syls, verbose=verbose, language=args.lang) print(syls) print(line_meta) diff --git a/autokara/autosyl/segment.py b/autokara/autosyl/segment.py index 84a5ac9..4615507 100644 --- a/autokara/autosyl/segment.py +++ b/autokara/autosyl/segment.py @@ -12,17 +12,17 @@ from .LyricsAlignment.wrapper import align, preprocess_from_file -def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500, verbose=False, language="jp"): +def segment(songfile, config, reference_syls=None,syls_per_line=10, last_syl_dur=500, verbose=False, language="jp"): - delay = -4 + delay = config['syl_delay'] backtrack = False print(reference_syls) - audio_file = songfile # pre-computed source-separated vocals; These models do not work with mixture input. - word_file = None # example: jamendolyrics/lyrics/*.words.txt"; Set to None if you don't have it - method = "MTL_BDR" # "Baseline", "MTL", "Baseline_BDR", "MTL_BDR" - cuda=False # set True if you have access to a GPU + audio_file = songfile # pre-computed source-separated vocals; + word_file = None # example: jamendolyrics/lyrics/*.words.txt"; Set to None if you don't have it + method = config['model'] + ("_BDR" if config['bdr'] else "") # "Baseline", "MTL", "Baseline_BDR", "MTL_BDR" + cuda = config['cuda'] # set True if you have access to a GPU checkpoint_folder = f"{str(Path(__file__).parent):s}/LyricsAlignment/checkpoints" language = language diff --git a/autokara/default.conf b/autokara/default.conf new file mode 100644 index 0000000..82d914c --- /dev/null +++ b/autokara/default.conf @@ -0,0 +1,20 @@ + +[General] + + + +[Media] + +# Where to store temporary media files (such as extracted vocals or ASS tracks) +media_dir = /tmp/autokara + +[Segment] + +# Which model to use. Options are "Baseline" (base) and "MTL" (better) +model = MTL +# Whether to use BDR model for boundary recognition. Better, but resource-intensive +bdr = true +# Whether to use CUDA +cuda = false +# Default delay applied to detected syls, in centiseconds +syl_delay = -4 diff --git a/preprocess_media.sh b/preprocess_media.sh index a88ae74..ec05203 100755 --- a/preprocess_media.sh +++ b/preprocess_media.sh @@ -1,6 +1,20 @@ - - - +#!/bin/bash + + +########################################################################################################## +# +# COMMAND : preprocess_media.sh +# +# AUTHOR : Sting +# +# DESCRIPTION : CLI tool to batch extract ASS lyrics and vocals from a video folder +# +# USE : ./preprocess_media.sh input_folder output_folder +# +# REQUIREMENTS : FFMPEG, Demucs, extractAss and extractWav +# +# +########################################################################################################## USAGE_MESSAGE="usage : $0 video_folder train_folder" diff --git a/setup.py b/setup.py index 6f9b339..c71a27b 100644 --- a/setup.py +++ b/setup.py @@ -53,6 +53,11 @@ setup( 'autokara-gen-lang=autokara.update_lang_db:main' ], }, + scripts=[ + 'autokara/extractAss.sh', + 'autokara/extractWav.sh', + 'preprocess_media.sh' + ], license='MIT License', classifiers=[ # Trove classifiers -- GitLab