diff --git a/autokara/__init__.py b/autokara/__init__.py index b31ecc406afe6e17bb27fe80f20ed10e8516b16f..90276fd22093df5e55cac1c56b8ea0767c7f451c 100644 --- a/autokara/__init__.py +++ b/autokara/__init__.py @@ -1,2 +1,2 @@ -__version__ = "0.1.0" \ No newline at end of file +__version__ = "0.2.0" \ No newline at end of file diff --git a/autokara/autosyl/LyricsAlignment/utils.py b/autokara/autosyl/LyricsAlignment/utils.py index d6f996cc86a18f583585ce9655d4387943a4bbb7..09f470313891efd49ec0e441c1acfdebf6d1e330 100644 --- a/autokara/autosyl/LyricsAlignment/utils.py +++ b/autokara/autosyl/LyricsAlignment/utils.py @@ -22,6 +22,8 @@ class G2p_Wrapper(): def __init__(self, language="jp"): if language == "en": self.transducer = g2p_en.G2p() + elif language == "fr": + self.transducer = make_g2p('fra', 'eng-arpabet') else: # Only Japanese Romaji for now... self.transducer = make_g2p('rji', 'rji-eng-arpa') @@ -120,7 +122,7 @@ def gen_phone_gt(words, raw_lines, language="jp", uppercase_as_english=True): else: g2p_uppercase = g2p - regex_uppercase = re.compile('[^a-z]') + regex_uppercase = re.compile('[^a-zà -ÿ]') # helper function def getsubidx(x, y): # find y in x diff --git a/autokara/autosyl/LyricsAlignment/wrapper.py b/autokara/autosyl/LyricsAlignment/wrapper.py index 51cf853c1886c5def0ed49ca2d1341f67d05b3a1..d936e2251189ed5c9e414826f971e7f36dbef5c1 100644 --- a/autokara/autosyl/LyricsAlignment/wrapper.py +++ b/autokara/autosyl/LyricsAlignment/wrapper.py @@ -4,6 +4,7 @@ from time import time import torch import torch.nn as nn import torch.nn.functional as F +import re from . import utils from .model import train_audio_transforms, AcousticModel, BoundaryDetection @@ -144,18 +145,20 @@ def preprocess_audio(audio_file, sr=22050): return y, curr_sr def preprocess_lyrics(lyrics_lines, word_file=None, language="jp", uppercase_as_english=True): - from string import ascii_lowercase - d = {ascii_lowercase[i]: i for i in range(26)} - d["'"] = 26 - d[" "] = 27 - d["~"] = 28 + #from string import ascii_lowercase + #d = {ascii_lowercase[i]: i for i in range(26)} + #d["'"] = 26 + #d[" "] = 27 + #d["~"] = 28 + + relevant_chars = re.compile("[a-zA-ZÀ-ÿ' ~]") # process raw #with open(lyrics_file, 'r') as f: # raw_lines = f.read().splitlines() raw_lines = lyrics_lines - raw_lines = ["".join([c for c in line if c.lower() in d.keys()]).strip() for line in raw_lines] + raw_lines = ["".join([c for c in line if (re.match(relevant_chars, c) != None)]).strip() for line in raw_lines] raw_lines = [" ".join(line.split()) for line in raw_lines if len(line) > 0] # concat full_lyrics = " ".join(raw_lines) diff --git a/autokara/autosyl/assUtils.py b/autokara/autosyl/assUtils.py index 4b819a1188d348cda707eb4976b59fb629b123d8..0362147b20ee298fdd7289d1e0047fe2d65cf5f4 100644 --- a/autokara/autosyl/assUtils.py +++ b/autokara/autosyl/assUtils.py @@ -28,7 +28,7 @@ def getSyls(ass_file): META = [] with open(ass_file, 'r') as f: CONTENT = f.read() - strip_regex = re.compile('[^a-zA-Z]') + strip_regex = re.compile('[^a-zA-ZÀ-ÿ]') LINES_KARA = re.compile(r"(?:Comment|Dialogue):.*(\d+:\d{2}:\d{2}.\d{2}),(\d+:\d{2}:\d{2}.\d{2}),([^,]*),([^,]*),(\d+),(\d+),(\d+),(?:(?!fx|template|code)\w)*,(.*)\n") RGX_TAGS = re.compile(r"\{\\k(\d+)\}([^\{\n\r]*)") for line in LINES_KARA.findall(CONTENT): diff --git a/autokara/update_lang_db.py b/autokara/update_lang_db.py index 6e0c63e926ebef29287967a04d7fbed6c53cb9f8..88d09704696ab542d0367985843ce8c6d818d43e 100644 --- a/autokara/update_lang_db.py +++ b/autokara/update_lang_db.py @@ -16,7 +16,14 @@ def main(): for map in mappings: subprocess.check_call(shlex.split(f'cp -r {map:s} {str(g2p_base):s}/mappings/langs/')) subprocess.check_call(shlex.split(f'g2p update')) - + + ipa_langs = ["fra"] + for lang in ipa_langs: + name = Path(lang).name + subprocess.check_call(shlex.split(f'g2p generate-mapping {name:s} --ipa')) + subprocess.check_call(shlex.split(f'g2p update')) + + if not Path.exists(g2p_base / "mappings/langs/rji"): print("ERROR : Failed to find language mapping") else: