Skip to content

Commit 6802e8a

Browse files
committed
Added Utilities
1 parent 0c2f68b commit 6802e8a

2 files changed

Lines changed: 85 additions & 0 deletions

File tree

data/datasets.txt

Whitespace-only changes.

utils.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import soundfile
2+
import numpy as np
3+
import librosa
4+
import glob
5+
import os
6+
from sklearn.model_selection import train_test_split
7+
8+
# all emotions on RAVDESS dataset
9+
int2emotion = {
10+
"01": "neutral",
11+
"02": "calm",
12+
"03": "happy",
13+
"04": "sad",
14+
"05": "angry",
15+
"06": "fearful",
16+
"07": "disgust",
17+
"08": "surprised"
18+
}
19+
20+
# we allow only these emotions
21+
AVAILABLE_EMOTIONS = {
22+
"angry",
23+
"sad",
24+
"neutral",
25+
"happy"
26+
}
27+
28+
def extract_feature(file_name, **kwargs):
29+
"""
30+
Extract feature from audio file `file_name`
31+
Features supported:
32+
- MFCC (mfcc)
33+
- Chroma (chroma)
34+
- MEL Spectrogram Frequency (mel)
35+
- Contrast (contrast)
36+
- Tonnetz (tonnetz)
37+
e.g:
38+
`features = extract_feature(path, mel=True, mfcc=True)`
39+
"""
40+
mfcc = kwargs.get("mfcc")
41+
chroma = kwargs.get("chroma")
42+
mel = kwargs.get("mel")
43+
contrast = kwargs.get("contrast")
44+
tonnetz = kwargs.get("tonnetz")
45+
with soundfile.SoundFile(file_name) as sound_file:
46+
X = sound_file.read(dtype="float32")
47+
sample_rate = sound_file.samplerate
48+
if chroma or contrast:
49+
stft = np.abs(librosa.stft(X))
50+
result = np.array([])
51+
if mfcc:
52+
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
53+
result = np.hstack((result, mfccs))
54+
if chroma:
55+
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
56+
result = np.hstack((result, chroma))
57+
if mel:
58+
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
59+
result = np.hstack((result, mel))
60+
if contrast:
61+
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
62+
result = np.hstack((result, contrast))
63+
if tonnetz:
64+
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
65+
result = np.hstack((result, tonnetz))
66+
return result
67+
68+
69+
def load_data(test_size=0.2):
70+
X, y = [], []
71+
for file in glob.glob("data/Actor_*/*.wav"):
72+
# get the base name of the audio file
73+
basename = os.path.basename(file)
74+
# get the emotion label
75+
emotion = int2emotion[basename.split("-")[2]]
76+
# we allow only AVAILABLE_EMOTIONS we set
77+
if emotion not in AVAILABLE_EMOTIONS:
78+
continue
79+
# extract speech features
80+
features = extract_feature(file, mfcc=True, chroma=True, mel=True)
81+
# add to data
82+
X.append(features)
83+
y.append(emotion)
84+
# split the data to training and testing and return it
85+
return train_test_split(np.array(X), y, test_size=test_size, random_state=7)

0 commit comments

Comments
 (0)