模型:
voidful/mhubert-base
asrp==0.0.35 # extracted from fairseq repo
# https://huggingface.co/voidful/mhubert-base/resolve/main/mhubert_base_vp_en_es_fr_it3_L11_km1000.bin # https://keithito.com/LJ-Speech-Dataset/LJ037-0171.wav import asrp hc = asrp.HubertCode("voidful/mhubert-base", './mhubert_base_vp_en_es_fr_it3_L11_km1000.bin', 11) code = hc('./LJ037-0171.wav')['code']
result:
array([991, 393, 946, 215, 215, 327, 487, 487, 219, 219, 522, 522, 975, 975, 975, 975, 668, 576, 576, 384, 761, 907, 430, 748, 12, 12, 977, 877, 179, 961, 428, 428, 822, 89, 194, 194, 664, 817, 817, 146, 146, 146, 283, 283, 352, 352, 428, 428, 812, 523, 143, 105, 105, 244, 244, 583, 583, 576, 384, 879, 32, 170, 683, 731, 600, 600, 702, 15, 59, 754, 872, 324, 789, 789, 402, 908, 380, 211, 179, 961, 207, 950, 321, 113, 327, 327, 932, 148, 148, 202, 393, 946, 215, 215, 406, 406, 423, 423, 6, 384, 879, 879, 219, 219, 522, 522, 589, 589, 337, 126, 126, 126, 323, 740, 663, 663, 969, 969, 969, 506, 506, 506, 545, 545, 85, 85, 297, 297, 265, 675, 237, 237, 307, 407, 407, 499, 407, 334, 334, 334, 111, 666, 666, 277, 128, 665, 644, 644, 389, 771, 46, 46, 179, 961, 931, 428, 822, 822, 89, 194, 194, 664, 765, 765, 302, 302, 205, 205, 521, 521, 29, 29, 537, 393, 393, 946, 734, 263, 45, 914, 445, 469, 469, 469, 482, 972, 972, 972, 972, 333, 333, 817, 817, 817, 146, 146, 146, 283, 88, 352, 352, 915, 143, 79, 79, 868, 868, 220, 220, 870, 45, 272, 313, 313, 367, 367, 729, 729, 409, 409, 409, 45, 468, 468, 468, 468, 468, 468, 468, 468, 340, 340, 340, 340, 340, 340, 340, 340, 380, 660, 555, 555, 208, 417, 942, 605, 193, 121, 407, 704, 704, 704, 704, 334, 499, 226, 226, 621, 128, 665, 665, 991, 991, 459, 459, 459, 173, 945, 945, 945, 233, 233, 479, 479, 479, 479, 330, 776, 776, 655, 655, 655, 837, 837, 81, 81, 664, 429, 148, 431, 431, 531, 531, 531, 531, 531, 668, 167, 104, 104, 104, 70, 70, 185, 686, 85, 85, 85, 297, 243, 243, 172, 172, 871, 877, 89, 194, 664, 470, 470, 152, 152, 152, 429, 429, 429, 429, 290, 943, 943, 943, 484, 488, 620, 352, 915, 143, 38, 479, 479, 479, 479, 330, 330, 776, 167, 655, 655, 655, 837, 837, 81, 81, 81, 284, 284, 377, 377, 663, 969, 969, 969, 555, 555, 208, 433, 755, 942, 942, 605, 193, 121, 121, 121, 704, 704, 334])
# https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj/g_00500000 import asrp hc = Code2Speech('./g_00500000', vocoder='hifigan', end_tok=999, code_begin_pad=0) # play on notebook import IPython.display as ipd ipd.Audio(data=hc(code), autoplay=False, rate=16000)对以上内容翻译成中文,不要翻译大写的英文, 保留a标签以及所有属性,按照此约束返回翻译后的中文
asrp==0.0.35 # extracted from fairseq repo
# https://huggingface.co/voidful/mhubert-base/resolve/main/mhubert_base_vp_en_es_fr_it3_L11_km1000.bin # https://keithito.com/LJ-Speech-Dataset/LJ037-0171.wav import asrp hc = asrp.HubertCode("voidful/mhubert-base", './mhubert_base_vp_en_es_fr_it3_L11_km1000.bin', 11) code = hc('./LJ037-0171.wav')['code']
结果:
array([991, 393, 946, 215, 215, 327, 487, 487, 219, 219, 522, 522, 975, 975, 975, 975, 668, 576, 576, 384, 761, 907, 430, 748, 12, 12, 977, 877, 179, 961, 428, 428, 822, 89, 194, 194, 664, 817, 817, 146, 146, 146, 283, 283, 352, 352, 428, 428, 812, 523, 143, 105, 105, 244, 244, 583, 583, 576, 384, 879, 32, 170, 683, 731, 600, 600, 702, 15, 59, 754, 872, 324, 789, 789, 402, 908, 380, 211, 179, 961, 207, 950, 321, 113, 327, 327, 932, 148, 148, 202, 393, 946, 215, 215, 406, 406, 423, 423, 6, 384, 879, 879, 219, 219, 522, 522, 589, 589, 337, 126, 126, 126, 323, 740, 663, 663, 969, 969, 969, 506, 506, 506, 545, 545, 85, 85, 297, 297, 265, 675, 237, 237, 307, 407, 407, 499, 407, 334, 334, 334, 111, 666, 666, 277, 128, 665, 644, 644, 389, 771, 46, 46, 179, 961, 931, 428, 822, 822, 89, 194, 194, 664, 765, 765, 302, 302, 205, 205, 521, 521, 29, 29, 537, 393, 393, 946, 734, 263, 45, 914, 445, 469, 469, 469, 482, 972, 972, 972, 972, 333, 333, 817, 817, 817, 146, 146, 146, 283, 88, 352, 352, 915, 143, 79, 79, 868, 868, 220, 220, 870, 45, 272, 313, 313, 367, 367, 729, 729, 409, 409, 409, 45, 468, 468, 468, 468, 468, 468, 468, 468, 340, 340, 340, 340, 340, 340, 340, 340, 380, 660, 555, 555, 208, 417, 942, 605, 193, 121, 407, 704, 704, 704, 704, 334, 499, 226, 226, 621, 128, 665, 665, 991, 991, 459, 459, 459, 173, 945, 945, 945, 233, 233, 479, 479, 479, 479, 330, 776, 776, 655, 655, 655, 837, 837, 81, 81, 664, 429, 148, 431, 431, 531, 531, 531, 531, 531, 668, 167, 104, 104, 104, 70, 70, 185, 686, 85, 85, 85, 297, 243, 243, 172, 172, 871, 877, 89, 194, 664, 470, 470, 152, 152, 152, 429, 429, 429, 429, 290, 943, 943, 943, 484, 488, 620, 352, 915, 143, 38, 479, 479, 479, 479, 330, 330, 776, 167, 655, 655, 655, 837, 837, 81, 81, 81, 284, 284, 377, 377, 663, 969, 969, 969, 555, 555, 208, 433, 755, 942, 942, 605, 193, 121, 121, 121, 704, 704, 334])
# https://dl.fbaipublicfiles.com/fairseq/speech_to_speech/vocoder/code_hifigan/mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj/g_00500000 import asrp hc = Code2Speech('./g_00500000', vocoder='hifigan', end_tok=999, code_begin_pad=0) # play on notebook import IPython.display as ipd ipd.Audio(data=hc(code), autoplay=False, rate=16000)