Sting · Sting
--- a/autosyl/LyricsAlignment/utils.py

+ 24

− 2
+++ b/autosyl/LyricsAlignment/utils.py

+ 24

− 2
 @@ -5,15 +5,37 @@ import numpy as np
 import librosa
 import string
 import warnings
-from g2p_en import G2p
+import g2p_en
+from g2p import make_g2p
+

-g2p = G2p()

 phone_dict = ['AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'B', 'CH', 'D', 'DH', 'EH', 'ER', 'EY', 'F', 'G', 'HH', 'IH', 'IY',
             'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', 'OY', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UH', 'UW', 'V', 'W', 'Y',
             'Z', 'ZH', ' ']
 phone2int = {phone_dict[i]: i for i in range(len(phone_dict))}

+
+class G2p_Wrapper():
+
+    def __init__(self, language="jp"):
+        if language == "en":
+            self.transducer = g2p_en.G2p()
+        else:                                                   # Only Japanese Romaji for now...
+            self.transducer = make_g2p('rji', 'rji-eng-arpa')
+
+        self.language = language
+
+    def __call__(self, word):
+        if self.language == "en":
+            return self.transducer(word)
+        else:
+            return self.transducer(word).output_string.split()
+
+g2p = G2p_Wrapper(language="jp")
+
+
+
 def my_collate(batch):
    audio, targets, seqs = zip(*batch)
    audio = np.array(audio)