åãã«
CosyVoiceã®æ師ããé³å£°ãã¼ã¯ãã¤ã¶ã¼ã«é¢ããã³ã¼ããåºã¦ããªãã£ããã issueã以ä¸ã®Repositoryã«ã¦åç¾å®è£
ãè¡ããã¾ããããã¡ããåããã¦ããã¾ã
github.com
以ä¸ã§ã©ã¤ãã©ãªã®verãåºå®ããRepositoryãå
¬éãã¦ãã¾ã
github.com
éçºç°å¢
ã»ããã¢ãã
uvã®ç°å¢ãä½ãã¾ã
uv venv -p 3.11
.venv\Scripts\activate
å¿
è¦ãªã©ã¤ãã©ãªãã¤ã³ã¹ãã¼ã«ãã¾ã
uv pip install -r requirements.txt
uv pip install torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121 --reinstall-force
uv pip install onnxruntime soundfile
ã¤ã³ã¹ãã¼ã«ãçµãã£ãã以ä¸ã®ãããªã©ã¤ãã©ãªãå
¥ã£ã¦ãã¾ã
cffi==1.17.1
colorama==0.4.6
coloredlogs==15.0.1
einops==0.8.0
filelock==3.13.1
flatbuffers==24.3.25
fsspec==2024.2.0
humanfriendly==10.0
jinja2==3.1.3
markupsafe==2.1.5
mpmath==1.3.0
networkx==3.2.1
numpy==2.2.0
onnx==1.17.0
onnxruntime==1.20.1
packaging==24.2
protobuf==5.29.1
pycparser==2.22
pyreadline3==3.5.4
s3tokenizer==0.0.8
soundfile==0.12.1
sympy==1.13.1
torch==2.5.1+cu121
tqdm==4.67.1
typing-extensions==4.9.0
å®è¡
ãµã³ãã«ã®é³å£°ãå
¥ã£ã¦ããããããã¡ãã使ã£ã¦åããã¦ããã¾ã
python .\test\test_onnx.py
çµæã¯ä»¥ä¸ã®ããã«ãªãã¾ã
=========torch=============
mels.size: torch.Size([2, 128, 420]), mels_lens: tensor([420, 411], dtype=torch.int32)
codes.size: torch.Size([2, 210]), codes_lens: tensor([210, 206], dtype=torch.int32)
wav[0]
tensor([ 143, 602, 702, 473, 2530, 2530, 646, 646, 646, 279, 1145, 440,
279, 279, 2530, 2530, 2530, 2530, 1019, 501, 501, 631, 287, 465,
465, 478, 3766, 183, 391, 621, 11, 3480, 3480, 16, 402, 1037,
253, 55, 720, 227, 227, 159, 110, 103, 103, 103, 110, 110,
2386, 28, 704, 143, 371, 371, 224, 389, 295, 295, 323, 557,
681, 455, 4, 386, 403, 629, 2989, 2782, 507, 3290, 57, 215,
626, 55, 342, 375, 596, 502, 12, 39, 90, 344, 1593, 1593,
170, 471, 170, 2299, 2299, 112, 94, 629, 33, 33, 33, 348,
348, 199, 137, 715, 750, 119, 490, 357, 4, 648, 1700, 468,
468, 422, 47, 732, 224, 224, 224, 2691, 389, 174, 174, 1381,
1381, 74, 100, 6, 584, 363, 225, 214, 197, 621, 209, 59,
284, 17, 538, 460, 87, 720, 227, 618, 594, 151, 173, 540,
1923, 361, 361, 96, 355, 223, 341, 297, 1, 137, 123, 662,
4, 386, 386, 225, 214, 427, 215, 477, 1405, 3108, 122, 122,
310, 149, 1479, 314, 486, 946, 3945, 202, 3539, 3539, 3539, 568,
312, 312, 121, 139, 530, 395, 607, 443, 443, 367, 367, 81,
367, 367, 367, 367, 727, 367, 221, 508, 508, 508, 1145, 691,
367, 367, 573, 221, 221, 2530])
wav[1]
tensor([ 644, 404, 329, 329, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 221,
508, 221, 221, 8, 650, 36, 501, 573, 443, 443, 1849, 435,
19, 293, 27, 468, 468, 27, 27, 468, 247, 179, 184, 580,
1593, 1593, 3539, 2299, 266, 344, 606, 2330, 401, 90, 95, 716,
4, 1935, 4, 4, 160, 495, 2989, 2782, 2782, 2782, 7, 7,
84, 342, 375, 584, 227, 452, 141, 488, 4013, 568, 312, 312,
314, 312, 17, 595, 52, 465, 465, 483, 483, 173, 173, 628,
628, 96, 455, 688, 153, 153, 527, 628, 1529, 612, 514, 304,
2579, 2579, 173, 20, 2579, 20, 562, 15, 2828, 2828, 620, 261,
3480, 188, 88, 33, 33, 27, 39, 39, 2031, 734, 158, 50,
50, 661, 309, 704, 143, 371, 371, 224, 563, 563, 563, 277,
300, 159, 445, 133, 343, 343, 2514, 318, 377, 584, 386, 448,
366, 427, 562, 2828, 1006, 1006, 2185, 2299, 732, 100, 211, 504,
100, 457, 570, 349, 349, 523, 523, 84, 342, 482, 704, 2386,
498, 277, 92, 2185, 579, 579, 579, 41, 41, 41, 406, 1104,
236, 607, 395, 395, 642, 607, 642, 607, 607, 368, 368, 3158,
573, 1145, 367, 367, 1145, 367, 636, 367, 367, 367, 607, 1145,
636, 2189])
=========onnx===============
wav[0]
tensor([ 143, 602, 702, 473, 2530, 2530, 646, 646, 646, 279, 1145, 440,
279, 279, 2530, 2530, 2530, 2530, 1019, 501, 501, 631, 287, 465,
465, 478, 3766, 183, 391, 621, 11, 3480, 3480, 16, 402, 1037,
253, 55, 720, 227, 227, 159, 110, 103, 103, 103, 110, 110,
2386, 28, 704, 143, 371, 371, 224, 389, 295, 295, 323, 557,
681, 455, 4, 386, 403, 629, 2989, 2782, 507, 3290, 57, 215,
626, 55, 342, 375, 596, 502, 12, 39, 90, 344, 1593, 1593,
170, 471, 170, 2299, 2299, 112, 94, 629, 33, 33, 33, 348,
348, 199, 137, 715, 750, 119, 490, 357, 4, 648, 1700, 468,
468, 422, 47, 732, 224, 224, 224, 2691, 389, 174, 174, 1381,
1381, 74, 100, 6, 584, 363, 225, 214, 197, 621, 209, 59,
284, 17, 538, 460, 87, 720, 227, 618, 594, 151, 173, 540,
1923, 361, 361, 96, 355, 223, 341, 297, 1, 137, 123, 662,
4, 386, 386, 225, 214, 427, 215, 477, 1405, 3108, 122, 122,
310, 149, 1479, 314, 486, 946, 3945, 202, 3539, 3539, 3539, 568,
312, 312, 121, 139, 530, 395, 607, 443, 443, 367, 367, 81,
367, 367, 367, 367, 727, 367, 221, 508, 508, 508, 1145, 691,
367, 367, 573, 221, 221, 2530])
all equal: True
miss rate: 0.0%
wav[1]
tensor([ 644, 404, 329, 329, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 221,
508, 221, 221, 8, 650, 36, 501, 573, 443, 443, 1849, 435,
19, 293, 27, 468, 468, 27, 27, 468, 247, 179, 184, 580,
1593, 1593, 3539, 2299, 266, 344, 606, 2330, 401, 90, 95, 716,
4, 1935, 4, 4, 160, 495, 2989, 2782, 2782, 2782, 7, 7,
84, 342, 375, 584, 227, 452, 141, 488, 4013, 568, 312, 312,
314, 312, 17, 595, 52, 465, 465, 483, 483, 173, 173, 628,
628, 96, 455, 688, 153, 153, 527, 628, 1529, 612, 514, 304,
2579, 2579, 173, 20, 2579, 20, 562, 15, 2828, 2828, 620, 261,
3480, 188, 88, 33, 33, 27, 39, 39, 2031, 734, 158, 50,
50, 661, 309, 704, 143, 371, 371, 224, 563, 563, 563, 277,
300, 159, 445, 133, 343, 343, 2514, 318, 377, 584, 386, 448,
366, 427, 562, 2828, 1006, 1006, 2185, 2299, 732, 100, 211, 504,
100, 457, 570, 349, 349, 523, 523, 84, 342, 482, 704, 2386,
498, 277, 92, 2185, 579, 579, 579, 41, 41, 41, 406, 1104,
236, 607, 395, 395, 642, 607, 642, 607, 607, 368, 368, 3158,
573, 1145, 367, 367, 1145, 367, 1145, 367, 367, 367, 367, 367,
692, 404])
all equal: False
miss rate: 2.427184581756592%