Skip to content
Extraits de code Groupes Projets
Valider eaaa83d0 rédigé par Sting's avatar Sting
Parcourir les fichiers

Add verbose option

parent 698ac7e7
Aucune branche associée trouvée
Aucune étiquette associée trouvée
1 requête de fusion!4Lyrics Alignment
......@@ -14,10 +14,12 @@ parser.add_argument("source_file", type=str, help="The video/audio file to time"
parser.add_argument("ass_file", type=str, help="The ASS file with lyrics to time")
parser.add_argument("--vocals", action="store_true", help="Treat the input as vocals file, i.e. do not perform vocals extraction")
parser.add_argument("-o", "--output", help="Write output to specified file. If absent, overwrite source file")
parser.add_argument("-v","--verbose", action="store_true", help="Increased verbosity")
args = parser.parse_args()
ass_file = args.ass_file
verbose = args.verbose
if not args.vocals :
print("Extracting audio from video file...")
......@@ -45,8 +47,13 @@ else:
print("Identifying syl starts...")
if verbose:
print("Retrieving syls from lyrics...")
reference_syls, line_meta = getSyls(ass_file)
syls = segment(vocals_file, reference_syls=reference_syls)
if verbose:
print("Starting syl detection...")
syls = segment(vocals_file, reference_syls=reference_syls, verbose=verbose)
print(syls)
print(line_meta)
......
......@@ -11,7 +11,7 @@ from autosyl.LyricsAlignment.wrapper import align, preprocess_from_file
def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500):
def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500, verbose=False):
delay = -4
backtrack = False
......@@ -21,16 +21,16 @@ def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500):
audio_file = songfile # pre-computed source-separated vocals; These models do not work with mixture input.
word_file = None # example: jamendolyrics/lyrics/*.words.txt"; Set to None if you don't have it
method = "MTL_BDR" # "Baseline", "MTL", "Baseline_BDR", "MTL_BDR"
cuda=True # set True if you have access to a GPU
cuda=False # set True if you have access to a GPU
checkpoint_folder = "./autosyl/LyricsAlignment/checkpoints"
pred_file = "./MTL.csv" # saved alignment results, "(float) start_time, (float) end_time, (string) word"
lyrics_lines = [" ".join([syl[1] for syl in line]) for line in reference_syls]
#print(lyrics_lines)
if verbose:
print("Preprocessing audio and lyrics...")
# load audio and lyrics
# words: a list of words
# lyrics_p: phoneme sequence of the target lyrics
......@@ -38,10 +38,12 @@ def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500):
# idx_line_p: indices of line start in lyrics_p
audio, words, lyrics_p, idx_word_p, idx_line_p = preprocess_from_file(audio_file, lyrics_lines, word_file)
if verbose:
print("Retrieving syls from lyrics...")
# compute alignment
# word_align: a list of frame indices aligned to each word
# words: a list of words
word_align, words = align(audio, words, lyrics_p, idx_word_p, idx_line_p, method=method, cuda=False, checkpoint_folder=checkpoint_folder)
word_align, words = align(audio, words, lyrics_p, idx_word_p, idx_line_p, method=method, cuda=cuda, checkpoint_folder=checkpoint_folder)
words_onsets = np.array([word_align[i][0] for i in range(len(word_align))])
......
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter