From eaaa83d011b6a881854d13a3e2bb9bcd68d481c9 Mon Sep 17 00:00:00 2001
From: Sting <loic.allegre@ensiie.fr>
Date: Mon, 24 Jul 2023 12:33:15 +0200
Subject: [PATCH] Add verbose option

---
 autokara.py        |  9 ++++++++-
 autosyl/segment.py | 12 +++++++-----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/autokara.py b/autokara.py
index c77abb4..a84a051 100644
--- a/autokara.py
+++ b/autokara.py
@@ -14,10 +14,12 @@ parser.add_argument("source_file", type=str, help="The video/audio file to time"
 parser.add_argument("ass_file", type=str, help="The ASS file with lyrics to time")
 parser.add_argument("--vocals", action="store_true", help="Treat the input as vocals file, i.e. do not perform vocals extraction")
 parser.add_argument("-o", "--output", help="Write output to specified file. If absent, overwrite source file")
+parser.add_argument("-v","--verbose", action="store_true", help="Increased verbosity")
 
 args = parser.parse_args()
 
 ass_file = args.ass_file
+verbose = args.verbose
 
 if not args.vocals :
     print("Extracting audio from video file...")
@@ -45,8 +47,13 @@ else:
 print("Identifying syl starts...")
 
 
+if verbose:
+    print("Retrieving syls from lyrics...")
 reference_syls, line_meta = getSyls(ass_file)
-syls = segment(vocals_file, reference_syls=reference_syls)
+
+if verbose:
+    print("Starting syl detection...")
+syls = segment(vocals_file, reference_syls=reference_syls, verbose=verbose)
 print(syls)
 print(line_meta)
 
diff --git a/autosyl/segment.py b/autosyl/segment.py
index cc8e3fe..cc1ae1e 100644
--- a/autosyl/segment.py
+++ b/autosyl/segment.py
@@ -11,7 +11,7 @@ from autosyl.LyricsAlignment.wrapper import align, preprocess_from_file
 
 
 
-def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500):
+def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500, verbose=False):
 
     delay = -4
     backtrack = False
@@ -21,16 +21,16 @@ def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500):
     audio_file = songfile                      # pre-computed source-separated vocals; These models do not work with mixture input.
     word_file = None                           # example: jamendolyrics/lyrics/*.words.txt"; Set to None if you don't have it
     method = "MTL_BDR"                             # "Baseline", "MTL", "Baseline_BDR", "MTL_BDR"
-    cuda=True                                 # set True if you have access to a GPU
+    cuda=False                                 # set True if you have access to a GPU
     checkpoint_folder = "./autosyl/LyricsAlignment/checkpoints"
 
-    pred_file = "./MTL.csv"                    # saved alignment results, "(float) start_time, (float) end_time, (string) word"
-
 
     lyrics_lines = [" ".join([syl[1] for syl in line]) for line in reference_syls]
     #print(lyrics_lines)
 
 
+    if verbose:
+        print("Preprocessing audio and lyrics...")
     # load audio and lyrics
     # words:        a list of words
     # lyrics_p:     phoneme sequence of the target lyrics
@@ -38,10 +38,12 @@ def segment(songfile, reference_syls=None, syls_per_line=10, last_syl_dur=500):
     # idx_line_p:   indices of line start in lyrics_p
     audio, words, lyrics_p, idx_word_p, idx_line_p = preprocess_from_file(audio_file, lyrics_lines, word_file)
 
+    if verbose:
+        print("Retrieving syls from lyrics...")
     # compute alignment
     # word_align:   a list of frame indices aligned to each word
     # words:        a list of words
-    word_align, words = align(audio, words, lyrics_p, idx_word_p, idx_line_p, method=method, cuda=False, checkpoint_folder=checkpoint_folder)
+    word_align, words = align(audio, words, lyrics_p, idx_word_p, idx_line_p, method=method, cuda=cuda, checkpoint_folder=checkpoint_folder)
 
 
     words_onsets = np.array([word_align[i][0] for i in range(len(word_align))])
-- 
GitLab