From a204811e5a7c7485534cb819785f4ca70596a713 Mon Sep 17 00:00:00 2001 From: Sting <lallegre26@gmail.com> Date: Sat, 25 Nov 2023 22:35:31 +0100 Subject: [PATCH] Allow choice of language for uppercase words --- README.md | 2 ++ autokara/autokara.py | 6 +++++- autokara/autosyl/LyricsAlignment/utils.py | 9 +++------ autokara/autosyl/LyricsAlignment/wrapper.py | 8 ++++---- autokara/autosyl/segment.py | 4 ++-- autokara/default.conf | 4 ++-- 6 files changed, 18 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 4bc86e1..acc798d 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,9 @@ To use the custom phonetic mappings for Japanese Romaji and other non-English la $ autokara-gen-lang ``` +### Windows +Still working on that... ## Configuration diff --git a/autokara/autokara.py b/autokara/autokara.py index 6077478..d981211 100644 --- a/autokara/autokara.py +++ b/autokara/autokara.py @@ -20,6 +20,7 @@ def main(opts=None): parser.add_argument("-o", "--output", help="Write output to specified file. If absent, overwrite source file") parser.add_argument("-v","--verbose", action="store_true", help="Increased verbosity") parser.add_argument("-l","--lang", help="Select language to use (default is Japanese Romaji)") + parser.add_argument("--uppercase-lang", help="Language to use when interpretinf uppercase words (fefault is English)") args = parser.parse_args(opts) @@ -40,9 +41,12 @@ def main(opts=None): 'bdr': config['Segment'].getboolean('bdr'), 'cuda': config['Segment'].getboolean('cuda'), 'syl_delay': config['Segment'].getint('syl_delay'), - 'uppercase_english': config['Segment'].getboolean('uppercase_as_english') + 'uppercase_lang': config['Segment']['uppercase_lang'] } + if args.uppercase_lang : + segment_config['uppercase_lang'] = args.uppercase_lang + if not args.vocals : print("Extracting audio from video file...") diff --git a/autokara/autosyl/LyricsAlignment/utils.py b/autokara/autosyl/LyricsAlignment/utils.py index d5cb782..748a84a 100644 --- a/autokara/autosyl/LyricsAlignment/utils.py +++ b/autokara/autosyl/LyricsAlignment/utils.py @@ -117,14 +117,11 @@ def load_lyrics(lyrics_file): def write_wav(path, audio, sr): soundfile.write(path, audio.T, sr, "PCM_16") -def gen_phone_gt(words, raw_lines, language="jp", uppercase_as_english=True): +def gen_phone_gt(words, raw_lines, language="jp", uppercase_lang="en"): - print(f"Translating lyrics to phonemes, language chosen : {language:s}, uppercase as English : {str(uppercase_as_english):s}") + print(f"Translating lyrics to phonemes, base language : {language:s}, uppercase language : {uppercase_lang:s}") g2p = G2p_Wrapper(language=language) - if uppercase_as_english: - g2p_uppercase = G2p_Wrapper(language="en") - else: - g2p_uppercase = g2p + g2p_uppercase = G2p_Wrapper(language=uppercase_lang) regex_uppercase = re.compile('[^a-zà -ÿ]') diff --git a/autokara/autosyl/LyricsAlignment/wrapper.py b/autokara/autosyl/LyricsAlignment/wrapper.py index d936e22..d752ac6 100644 --- a/autokara/autosyl/LyricsAlignment/wrapper.py +++ b/autokara/autosyl/LyricsAlignment/wrapper.py @@ -11,10 +11,10 @@ from .model import train_audio_transforms, AcousticModel, BoundaryDetection np.random.seed(7) -def preprocess_from_file(audio_file, lyrics_file, word_file=None, language="jp", uppercase_as_english=True): +def preprocess_from_file(audio_file, lyrics_file, word_file=None, language="jp", uppercase_lang="en"): y, sr = preprocess_audio(audio_file) - words, lyrics_p, idx_word_p, idx_line_p = preprocess_lyrics(lyrics_file, word_file, language=language, uppercase_as_english=uppercase_as_english) + words, lyrics_p, idx_word_p, idx_line_p = preprocess_lyrics(lyrics_file, word_file, language=language, uppercase_lang=uppercase_lang) return y, words, lyrics_p, idx_word_p, idx_line_p @@ -144,7 +144,7 @@ def preprocess_audio(audio_file, sr=22050): return y, curr_sr -def preprocess_lyrics(lyrics_lines, word_file=None, language="jp", uppercase_as_english=True): +def preprocess_lyrics(lyrics_lines, word_file=None, language="jp", uppercase_lang="en"): #from string import ascii_lowercase #d = {ascii_lowercase[i]: i for i in range(26)} #d["'"] = 26 @@ -169,7 +169,7 @@ def preprocess_lyrics(lyrics_lines, word_file=None, language="jp", uppercase_as_ else: words_lines = full_lyrics.split() - lyrics_p, words_p, idx_word_p, idx_line_p = utils.gen_phone_gt(words_lines, raw_lines, language=(language if language else "jp"), uppercase_as_english=uppercase_as_english) + lyrics_p, words_p, idx_word_p, idx_line_p = utils.gen_phone_gt(words_lines, raw_lines, language=(language if language else "jp"), uppercase_lang=uppercase_lang) return words_lines, lyrics_p, idx_word_p, idx_line_p diff --git a/autokara/autosyl/segment.py b/autokara/autosyl/segment.py index 0376de6..029b69f 100644 --- a/autokara/autosyl/segment.py +++ b/autokara/autosyl/segment.py @@ -26,7 +26,7 @@ def segment(songfile, config, reference_syls=None,syls_per_line=10, last_syl_dur checkpoint_folder = f"{str(Path(__file__).parent):s}/LyricsAlignment/checkpoints" language = language - uppercase_as_english = config['uppercase_english'] + uppercase_lang = config['uppercase_lang'] lyrics_lines = [" ".join([syl[1] for syl in line]) for line in reference_syls] #print(lyrics_lines) @@ -39,7 +39,7 @@ def segment(songfile, config, reference_syls=None,syls_per_line=10, last_syl_dur # lyrics_p: phoneme sequence of the target lyrics # idx_word_p: indices of word start in lyrics_p # idx_line_p: indices of line start in lyrics_p - audio, words, lyrics_p, idx_word_p, idx_line_p = preprocess_from_file(audio_file, lyrics_lines, word_file, language, uppercase_as_english=uppercase_as_english) + audio, words, lyrics_p, idx_word_p, idx_line_p = preprocess_from_file(audio_file, lyrics_lines, word_file, language, uppercase_lang=uppercase_lang) if verbose: print(lyrics_p) diff --git a/autokara/default.conf b/autokara/default.conf index 4d48d0e..e654554 100644 --- a/autokara/default.conf +++ b/autokara/default.conf @@ -18,5 +18,5 @@ bdr = true cuda = false # Default delay applied to detected syls, in centiseconds syl_delay = -4 -# Whether to treat uppercase words as English. If false, use song language everywhere -uppercase_as_english = true \ No newline at end of file +# Two-letter code for the language to use for uppercase words. Default is "en" for English. +uppercase_lang = en \ No newline at end of file -- GitLab