From 98cfa51a5c9091aa7e722e605c93ed9613da5af1 Mon Sep 17 00:00:00 2001 From: Sting <loic.allegre@ensiie.fr> Date: Fri, 29 Sep 2023 14:36:50 +0200 Subject: [PATCH] Add French support --- autokara/__init__.py | 2 +- autokara/autosyl/LyricsAlignment/utils.py | 4 +++- autokara/autosyl/LyricsAlignment/wrapper.py | 15 +++++++++------ autokara/autosyl/assUtils.py | 2 +- autokara/update_lang_db.py | 9 ++++++++- 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/autokara/__init__.py b/autokara/__init__.py index b31ecc4..90276fd 100644 --- a/autokara/__init__.py +++ b/autokara/__init__.py @@ -1,2 +1,2 @@ -__version__ = "0.1.0" \ No newline at end of file +__version__ = "0.2.0" \ No newline at end of file diff --git a/autokara/autosyl/LyricsAlignment/utils.py b/autokara/autosyl/LyricsAlignment/utils.py index d6f996c..09f4703 100644 --- a/autokara/autosyl/LyricsAlignment/utils.py +++ b/autokara/autosyl/LyricsAlignment/utils.py @@ -22,6 +22,8 @@ class G2p_Wrapper(): def __init__(self, language="jp"): if language == "en": self.transducer = g2p_en.G2p() + elif language == "fr": + self.transducer = make_g2p('fra', 'eng-arpabet') else: # Only Japanese Romaji for now... self.transducer = make_g2p('rji', 'rji-eng-arpa') @@ -120,7 +122,7 @@ def gen_phone_gt(words, raw_lines, language="jp", uppercase_as_english=True): else: g2p_uppercase = g2p - regex_uppercase = re.compile('[^a-z]') + regex_uppercase = re.compile('[^a-zà -ÿ]') # helper function def getsubidx(x, y): # find y in x diff --git a/autokara/autosyl/LyricsAlignment/wrapper.py b/autokara/autosyl/LyricsAlignment/wrapper.py index 51cf853..d936e22 100644 --- a/autokara/autosyl/LyricsAlignment/wrapper.py +++ b/autokara/autosyl/LyricsAlignment/wrapper.py @@ -4,6 +4,7 @@ from time import time import torch import torch.nn as nn import torch.nn.functional as F +import re from . import utils from .model import train_audio_transforms, AcousticModel, BoundaryDetection @@ -144,18 +145,20 @@ def preprocess_audio(audio_file, sr=22050): return y, curr_sr def preprocess_lyrics(lyrics_lines, word_file=None, language="jp", uppercase_as_english=True): - from string import ascii_lowercase - d = {ascii_lowercase[i]: i for i in range(26)} - d["'"] = 26 - d[" "] = 27 - d["~"] = 28 + #from string import ascii_lowercase + #d = {ascii_lowercase[i]: i for i in range(26)} + #d["'"] = 26 + #d[" "] = 27 + #d["~"] = 28 + + relevant_chars = re.compile("[a-zA-ZÀ-ÿ' ~]") # process raw #with open(lyrics_file, 'r') as f: # raw_lines = f.read().splitlines() raw_lines = lyrics_lines - raw_lines = ["".join([c for c in line if c.lower() in d.keys()]).strip() for line in raw_lines] + raw_lines = ["".join([c for c in line if (re.match(relevant_chars, c) != None)]).strip() for line in raw_lines] raw_lines = [" ".join(line.split()) for line in raw_lines if len(line) > 0] # concat full_lyrics = " ".join(raw_lines) diff --git a/autokara/autosyl/assUtils.py b/autokara/autosyl/assUtils.py index 4b819a1..0362147 100644 --- a/autokara/autosyl/assUtils.py +++ b/autokara/autosyl/assUtils.py @@ -28,7 +28,7 @@ def getSyls(ass_file): META = [] with open(ass_file, 'r') as f: CONTENT = f.read() - strip_regex = re.compile('[^a-zA-Z]') + strip_regex = re.compile('[^a-zA-ZÀ-ÿ]') LINES_KARA = re.compile(r"(?:Comment|Dialogue):.*(\d+:\d{2}:\d{2}.\d{2}),(\d+:\d{2}:\d{2}.\d{2}),([^,]*),([^,]*),(\d+),(\d+),(\d+),(?:(?!fx|template|code)\w)*,(.*)\n") RGX_TAGS = re.compile(r"\{\\k(\d+)\}([^\{\n\r]*)") for line in LINES_KARA.findall(CONTENT): diff --git a/autokara/update_lang_db.py b/autokara/update_lang_db.py index 6e0c63e..88d0970 100644 --- a/autokara/update_lang_db.py +++ b/autokara/update_lang_db.py @@ -16,7 +16,14 @@ def main(): for map in mappings: subprocess.check_call(shlex.split(f'cp -r {map:s} {str(g2p_base):s}/mappings/langs/')) subprocess.check_call(shlex.split(f'g2p update')) - + + ipa_langs = ["fra"] + for lang in ipa_langs: + name = Path(lang).name + subprocess.check_call(shlex.split(f'g2p generate-mapping {name:s} --ipa')) + subprocess.check_call(shlex.split(f'g2p update')) + + if not Path.exists(g2p_base / "mappings/langs/rji"): print("ERROR : Failed to find language mapping") else: -- GitLab