Skip to content
Extraits de code Groupes Projets
Valider 7700034c rédigé par Sting's avatar Sting
Parcourir les fichiers

Add a proper phonetic trancription for Japanese

parent eaaa83d0
Aucune branche associée trouvée
Aucune étiquette associée trouvée
1 requête de fusion!4Lyrics Alignment
Ce commit fait partie de la requête de fusion !4. Les commentaires créés ici seront créés dans le contexte de cette requête de fusion.
......@@ -5,15 +5,37 @@ import numpy as np
import librosa
import string
import warnings
from g2p_en import G2p
import g2p_en
from g2p import make_g2p
g2p = G2p()
phone_dict = ['AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'B', 'CH', 'D', 'DH', 'EH', 'ER', 'EY', 'F', 'G', 'HH', 'IH', 'IY',
'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', 'OY', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UH', 'UW', 'V', 'W', 'Y',
'Z', 'ZH', ' ']
phone2int = {phone_dict[i]: i for i in range(len(phone_dict))}
class G2p_Wrapper():
def __init__(self, language="jp"):
if language == "en":
self.transducer = g2p_en.G2p()
else: # Only Japanese Romaji for now...
self.transducer = make_g2p('rji', 'rji-eng-arpa')
self.language = language
def __call__(self, word):
if self.language == "en":
return self.transducer(word)
else:
return self.transducer(word).output_string.split()
g2p = G2p_Wrapper(language="jp")
def my_collate(batch):
audio, targets, seqs = zip(*batch)
audio = np.array(audio)
......
......@@ -37,6 +37,8 @@ def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500, v
# idx_word_p: indices of word start in lyrics_p
# idx_line_p: indices of line start in lyrics_p
audio, words, lyrics_p, idx_word_p, idx_line_p = preprocess_from_file(audio_file, lyrics_lines, word_file)
if verbose:
print(lyrics_p)
if verbose:
print("Retrieving syls from lyrics...")
......
<<: &shared
- language_name: Romaji
mappings:
- display_name: Romaji (Hepburn) to English Arpabet
in_lang: rji
out_lang: rji-eng-arpa
authors:
- Loïc Allègre
type: mapping
mapping: romaji_to_eng-arpa.csv
abbreviations: rji_abbs.csv
rule_ordering: as-written
case_sensitive: false
norm_form: 'NFC'
prevent_feeding: true
<<: *shared
\ No newline at end of file
VOWEL,a,e,i,o,u
EI_VOW,e,i
AOU_VOW,a,o,u
\ No newline at end of file
ch,CH ,,,true
ou,OW ,,,true
sh,SH ,,,true
dj,JH ,,,true
dz,Z ,,,true
a,AA ,,,false
e,EH ,,,false
i,IY ,,,false
o,OW ,,,false
u,UW ,,,false
k,K ,,y,false
k,K ,,VOWEL,false
k,KUW ,,CONS,false
k,K ,,,false
s,S ,,VOWEL,false
s,SUW ,,CONS,false
s,S ,,,false
t,T ,,,false
n,N ,,,false
h,H ,,,false
m,M ,,,false
y,Y ,,,false
r,L ,,,false
w,W ,,,false
g,G ,,,false
z,Z ,,,false
d,D ,,,false
b,B ,,,false
p,P ,,,false
f,F ,,,false
v,V ,,,false
j,JH ,,,false
q,K ,,,false
l,L ,,,false
\ No newline at end of file
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter