diff --git a/.gitignore b/.gitignore index 3aae0831a6395a910f840b4fec621d64d760f45c..36798834c989bdb309d32e7cfb821346299e99df 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ * !.gitignore !README.md +!requirements.txt !extractWav.sh !extractAss.sh !karaUtils.py diff --git a/README.md b/README.md index 3ff4855041d17a59e6d8ad3838e1740dcc281a4b..f9362906e6f0e4cc1b3ed20965285b9093ff881b 100644 --- a/README.md +++ b/README.md @@ -45,10 +45,9 @@ If we ever want to use an AI to identify syllables without a reference lyrics fi - MKVToolnix (at least the CLI utils) - Python >= 3.8 +- PyTorch : follow the instructions [here](https://pytorch.org/get-started/locally/) -Having a CUDA-capable GPU is optional, but can greatly reduce processing time. - -## Setup +All other python modules can be installed directly through pip, see further. This project requires at least Python 3.8, and using a virtual environment is strongly recommended. To install the dependencies, execute in the project directory : @@ -56,16 +55,33 @@ To install the dependencies, execute in the project directory : $ python -m venv env # create the virtual environment, do it once $ source env/bin/activate # use the virtual environement -# Install the Demucs (vocal separation tool) -$ pip install -U demucs -$ pip install librosa +# Install the required python modules +$ pip install -r requirements.txt # To exit the virtual environment $ deactivate ``` +Having a CUDA-capable GPU is optional, but can greatly reduce processing time. + + # Use + +## Training + +To extract vocals and ASS from MKV video files: +```bash +$ ./process_train_data video_folder train_folder +``` + +To prepare the training data for the model : +```bash +$ python music_processor.py train train_folder +``` + +## Infer + To execute AutoKara on a MKV video file : ```bash $ python autokara.py video.mkv output.ass diff --git a/music_processor.py b/music_processor.py index 81d1f2572ef5ef8fc92bd9d2858c0aa5f5ae5198..78890e8d17b25ba8ef5ede6e6271cfd6619d11e1 100644 --- a/music_processor.py +++ b/music_processor.py @@ -295,13 +295,13 @@ if __name__ == "__main__": if sys.argv[1] == 'train': print("preparing all train data processing...") - serv = "./media/train/*" + serv = f'./{sys.argv[2]:s}/*' music_for_train(serv, verbose=True) print("all train data processing done!") if sys.argv[1] == 'test': print("test data proccesing...") - serv = "./media/test/" + serv = f'./{sys.argv[2]:s}/*' music_for_test(serv) print("test data processing done!") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..4a1d9cbfadfab62043cc52f5798be97e66bc7b97 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +librosa +demucs +chainer +soundfile +sklearn +matplotlib +numpy +tqdm \ No newline at end of file