pip install datasets==2.10.1 pip install soundfile==0.12.1 pip install librosa==0.10.0.post2
from datasets import load_dataset dataset = load_dataset("Gae8J/gaepago_s")
输出
DatasetDict({ train: Dataset({ features: ['file', 'audio', 'label', 'is_unknown', 'youtube_id'], num_rows: 12 }) validation: Dataset({ features: ['file', 'audio', 'label', 'is_unknown', 'youtube_id'], num_rows: 12 }) test: Dataset({ features: ['file', 'audio', 'label', 'is_unknown', 'youtube_id'], num_rows: 12 }) })
dataset['train'][0]
输出
{'file': 'bark/1_Q80fDGLRM.wav', 'audio': {'path': 'bark/1_Q80fDGLRM.wav', 'array': array([-9.15838356e-08, 6.80501699e-08, 1.97052145e-07, ..., 0.00000000e+00, 0.00000000e+00, 0.00000000e+00]), 'sampling_rate': 16000}, 'label': 0, 'is_unknown': False, 'youtube_id': '1_Q80fDGLRM'}