From a204811e5a7c7485534cb819785f4ca70596a713 Mon Sep 17 00:00:00 2001
From: Sting <lallegre26@gmail.com>
Date: Sat, 25 Nov 2023 22:35:31 +0100
Subject: [PATCH] Allow choice of language for uppercase words

---
 README.md                                   | 2 ++
 autokara/autokara.py                        | 6 +++++-
 autokara/autosyl/LyricsAlignment/utils.py   | 9 +++------
 autokara/autosyl/LyricsAlignment/wrapper.py | 8 ++++----
 autokara/autosyl/segment.py                 | 4 ++--
 autokara/default.conf                       | 4 ++--
 6 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 4bc86e1..acc798d 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,9 @@ To use the custom phonetic mappings for Japanese Romaji and other non-English la
 $ autokara-gen-lang
 ```
 
+### Windows
 
+Still working on that...
 
 ## Configuration
 
diff --git a/autokara/autokara.py b/autokara/autokara.py
index 6077478..d981211 100644
--- a/autokara/autokara.py
+++ b/autokara/autokara.py
@@ -20,6 +20,7 @@ def main(opts=None):
     parser.add_argument("-o", "--output", help="Write output to specified file. If absent, overwrite source file")
     parser.add_argument("-v","--verbose", action="store_true", help="Increased verbosity")
     parser.add_argument("-l","--lang", help="Select language to use (default is Japanese Romaji)")
+    parser.add_argument("--uppercase-lang", help="Language to use when interpretinf uppercase words (fefault is English)")
 
     args = parser.parse_args(opts)
 
@@ -40,9 +41,12 @@ def main(opts=None):
         'bdr': config['Segment'].getboolean('bdr'),
         'cuda': config['Segment'].getboolean('cuda'),
         'syl_delay': config['Segment'].getint('syl_delay'),
-        'uppercase_english': config['Segment'].getboolean('uppercase_as_english')
+        'uppercase_lang': config['Segment']['uppercase_lang']
     }
 
+    if args.uppercase_lang :
+        segment_config['uppercase_lang'] = args.uppercase_lang
+
 
     if not args.vocals :
         print("Extracting audio from video file...")
diff --git a/autokara/autosyl/LyricsAlignment/utils.py b/autokara/autosyl/LyricsAlignment/utils.py
index d5cb782..748a84a 100644
--- a/autokara/autosyl/LyricsAlignment/utils.py
+++ b/autokara/autosyl/LyricsAlignment/utils.py
@@ -117,14 +117,11 @@ def load_lyrics(lyrics_file):
 def write_wav(path, audio, sr):
     soundfile.write(path, audio.T, sr, "PCM_16")
 
-def gen_phone_gt(words, raw_lines, language="jp", uppercase_as_english=True):
+def gen_phone_gt(words, raw_lines, language="jp", uppercase_lang="en"):
 
-    print(f"Translating lyrics to phonemes, language chosen : {language:s}, uppercase as English : {str(uppercase_as_english):s}")
+    print(f"Translating lyrics to phonemes, base language : {language:s}, uppercase language : {uppercase_lang:s}")
     g2p = G2p_Wrapper(language=language)
-    if uppercase_as_english:
-        g2p_uppercase = G2p_Wrapper(language="en")
-    else:
-        g2p_uppercase = g2p
+    g2p_uppercase = G2p_Wrapper(language=uppercase_lang)
 
     regex_uppercase = re.compile('[^a-zÃ -Ã¿]')
 
diff --git a/autokara/autosyl/LyricsAlignment/wrapper.py b/autokara/autosyl/LyricsAlignment/wrapper.py
index d936e22..d752ac6 100644
--- a/autokara/autosyl/LyricsAlignment/wrapper.py
+++ b/autokara/autosyl/LyricsAlignment/wrapper.py
@@ -11,10 +11,10 @@ from .model import train_audio_transforms, AcousticModel, BoundaryDetection
 
 np.random.seed(7)
 
-def preprocess_from_file(audio_file, lyrics_file, word_file=None, language="jp", uppercase_as_english=True):
+def preprocess_from_file(audio_file, lyrics_file, word_file=None, language="jp", uppercase_lang="en"):
     y, sr = preprocess_audio(audio_file)
 
-    words, lyrics_p, idx_word_p, idx_line_p = preprocess_lyrics(lyrics_file, word_file, language=language, uppercase_as_english=uppercase_as_english)
+    words, lyrics_p, idx_word_p, idx_line_p = preprocess_lyrics(lyrics_file, word_file, language=language, uppercase_lang=uppercase_lang)
 
     return y, words, lyrics_p, idx_word_p, idx_line_p
 
@@ -144,7 +144,7 @@ def preprocess_audio(audio_file, sr=22050):
 
     return y, curr_sr
 
-def preprocess_lyrics(lyrics_lines, word_file=None, language="jp", uppercase_as_english=True):
+def preprocess_lyrics(lyrics_lines, word_file=None, language="jp", uppercase_lang="en"):
     #from string import ascii_lowercase
     #d = {ascii_lowercase[i]: i for i in range(26)}
     #d["'"] = 26
@@ -169,7 +169,7 @@ def preprocess_lyrics(lyrics_lines, word_file=None, language="jp", uppercase_as_
     else:
         words_lines = full_lyrics.split()
 
-    lyrics_p, words_p, idx_word_p, idx_line_p = utils.gen_phone_gt(words_lines, raw_lines, language=(language if language else "jp"), uppercase_as_english=uppercase_as_english)
+    lyrics_p, words_p, idx_word_p, idx_line_p = utils.gen_phone_gt(words_lines, raw_lines, language=(language if language else "jp"), uppercase_lang=uppercase_lang)
 
     return words_lines, lyrics_p, idx_word_p, idx_line_p
 
diff --git a/autokara/autosyl/segment.py b/autokara/autosyl/segment.py
index 0376de6..029b69f 100644
--- a/autokara/autosyl/segment.py
+++ b/autokara/autosyl/segment.py
@@ -26,7 +26,7 @@ def segment(songfile, config, reference_syls=None,syls_per_line=10, last_syl_dur
     checkpoint_folder = f"{str(Path(__file__).parent):s}/LyricsAlignment/checkpoints"
     language = language
 
-    uppercase_as_english = config['uppercase_english']
+    uppercase_lang = config['uppercase_lang']
 
     lyrics_lines = [" ".join([syl[1] for syl in line]) for line in reference_syls]
     #print(lyrics_lines)
@@ -39,7 +39,7 @@ def segment(songfile, config, reference_syls=None,syls_per_line=10, last_syl_dur
     # lyrics_p:     phoneme sequence of the target lyrics
     # idx_word_p:   indices of word start in lyrics_p
     # idx_line_p:   indices of line start in lyrics_p
-    audio, words, lyrics_p, idx_word_p, idx_line_p = preprocess_from_file(audio_file, lyrics_lines, word_file, language, uppercase_as_english=uppercase_as_english)
+    audio, words, lyrics_p, idx_word_p, idx_line_p = preprocess_from_file(audio_file, lyrics_lines, word_file, language, uppercase_lang=uppercase_lang)
     if verbose:
         print(lyrics_p)
 
diff --git a/autokara/default.conf b/autokara/default.conf
index 4d48d0e..e654554 100644
--- a/autokara/default.conf
+++ b/autokara/default.conf
@@ -18,5 +18,5 @@ bdr = true
 cuda = false
 # Default delay applied to detected syls, in centiseconds
 syl_delay = -4
-# Whether to treat uppercase words as English. If false, use song language everywhere
-uppercase_as_english = true
\ No newline at end of file
+# Two-letter code for the language to use for uppercase words. Default is "en" for English.
+uppercase_lang = en
\ No newline at end of file
-- 
GitLab