Skip to content
Extraits de code Groupes Projets
Valider eaaa83d0 rédigé par Sting's avatar Sting
Parcourir les fichiers

Add verbose option

parent 698ac7e7
Aucune branche associée trouvée
Aucune étiquette associée trouvée
1 requête de fusion!4Lyrics Alignment
...@@ -14,10 +14,12 @@ parser.add_argument("source_file", type=str, help="The video/audio file to time" ...@@ -14,10 +14,12 @@ parser.add_argument("source_file", type=str, help="The video/audio file to time"
parser.add_argument("ass_file", type=str, help="The ASS file with lyrics to time") parser.add_argument("ass_file", type=str, help="The ASS file with lyrics to time")
parser.add_argument("--vocals", action="store_true", help="Treat the input as vocals file, i.e. do not perform vocals extraction") parser.add_argument("--vocals", action="store_true", help="Treat the input as vocals file, i.e. do not perform vocals extraction")
parser.add_argument("-o", "--output", help="Write output to specified file. If absent, overwrite source file") parser.add_argument("-o", "--output", help="Write output to specified file. If absent, overwrite source file")
parser.add_argument("-v","--verbose", action="store_true", help="Increased verbosity")
args = parser.parse_args() args = parser.parse_args()
ass_file = args.ass_file ass_file = args.ass_file
verbose = args.verbose
if not args.vocals : if not args.vocals :
print("Extracting audio from video file...") print("Extracting audio from video file...")
...@@ -45,8 +47,13 @@ else: ...@@ -45,8 +47,13 @@ else:
print("Identifying syl starts...") print("Identifying syl starts...")
if verbose:
print("Retrieving syls from lyrics...")
reference_syls, line_meta = getSyls(ass_file) reference_syls, line_meta = getSyls(ass_file)
syls = segment(vocals_file, reference_syls=reference_syls)
if verbose:
print("Starting syl detection...")
syls = segment(vocals_file, reference_syls=reference_syls, verbose=verbose)
print(syls) print(syls)
print(line_meta) print(line_meta)
......
...@@ -11,7 +11,7 @@ from autosyl.LyricsAlignment.wrapper import align, preprocess_from_file ...@@ -11,7 +11,7 @@ from autosyl.LyricsAlignment.wrapper import align, preprocess_from_file
def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500): def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500, verbose=False):
delay = -4 delay = -4
backtrack = False backtrack = False
...@@ -21,16 +21,16 @@ def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500): ...@@ -21,16 +21,16 @@ def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500):
audio_file = songfile # pre-computed source-separated vocals; These models do not work with mixture input. audio_file = songfile # pre-computed source-separated vocals; These models do not work with mixture input.
word_file = None # example: jamendolyrics/lyrics/*.words.txt"; Set to None if you don't have it word_file = None # example: jamendolyrics/lyrics/*.words.txt"; Set to None if you don't have it
method = "MTL_BDR" # "Baseline", "MTL", "Baseline_BDR", "MTL_BDR" method = "MTL_BDR" # "Baseline", "MTL", "Baseline_BDR", "MTL_BDR"
cuda=True # set True if you have access to a GPU cuda=False # set True if you have access to a GPU
checkpoint_folder = "./autosyl/LyricsAlignment/checkpoints" checkpoint_folder = "./autosyl/LyricsAlignment/checkpoints"
pred_file = "./MTL.csv" # saved alignment results, "(float) start_time, (float) end_time, (string) word"
lyrics_lines = [" ".join([syl[1] for syl in line]) for line in reference_syls] lyrics_lines = [" ".join([syl[1] for syl in line]) for line in reference_syls]
#print(lyrics_lines) #print(lyrics_lines)
if verbose:
print("Preprocessing audio and lyrics...")
# load audio and lyrics # load audio and lyrics
# words: a list of words # words: a list of words
# lyrics_p: phoneme sequence of the target lyrics # lyrics_p: phoneme sequence of the target lyrics
...@@ -38,10 +38,12 @@ def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500): ...@@ -38,10 +38,12 @@ def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500):
# idx_line_p: indices of line start in lyrics_p # idx_line_p: indices of line start in lyrics_p
audio, words, lyrics_p, idx_word_p, idx_line_p = preprocess_from_file(audio_file, lyrics_lines, word_file) audio, words, lyrics_p, idx_word_p, idx_line_p = preprocess_from_file(audio_file, lyrics_lines, word_file)
if verbose:
print("Retrieving syls from lyrics...")
# compute alignment # compute alignment
# word_align: a list of frame indices aligned to each word # word_align: a list of frame indices aligned to each word
# words: a list of words # words: a list of words
word_align, words = align(audio, words, lyrics_p, idx_word_p, idx_line_p, method=method, cuda=False, checkpoint_folder=checkpoint_folder) word_align, words = align(audio, words, lyrics_p, idx_word_p, idx_line_p, method=method, cuda=cuda, checkpoint_folder=checkpoint_folder)
words_onsets = np.array([word_align[i][0] for i in range(len(word_align))]) words_onsets = np.array([word_align[i][0] for i in range(len(word_align))])
......
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Veuillez vous inscrire ou vous pour commenter