clean minimal VITS2 implementation focused on multilingual synthesis and fine-grained speech control
conda create -n aligner -c conda-forge montreal-forced-aligner
conda activate aligner
mfa model download acoustic english_mfa
mfa model download dictionary english_mfa
mfa align -j 8 \
--use_mp \
--clean \
--single_speaker \
--no_textgrid_cleanup \
data/LJSpeech-1.1/wavs/ \
english_mfa \
english_mfa \
data/LJSpeech-1.1/alignment \
--config_path mfa.yaml
cd vits2/monotonic_align
python setup.py build_ext --inplace