Added Test functinoality

isaac-rnd · isaac-rnd · commit 42f6b5588c62 · 2022-09-25T13:26:14.000+05:30
diff --git a/ser.py b/ser.py
@@ -0,0 +1,50 @@
+from sklearn.neural_network import MLPClassifier
+
+from sklearn.metrics import accuracy_score
+from utils import load_data
+
+import os
+import pickle
+
+# load RAVDESS dataset
+X_train, X_test, y_train, y_test = load_data(test_size=0.25)
+# print some details
+# number of samples in training data
+print("[+] Number of training samples:", X_train.shape[0])
+# number of samples in testing data
+print("[+] Number of testing samples:", X_test.shape[0])
+# number of features used
+# this is a vector of features extracted 
+# using utils.extract_features() method
+print("[+] Number of features:", X_train.shape[1])
+# best model, determined by a grid search
+model_params = {
+    'alpha': 0.01,
+    'batch_size': 256,
+    'epsilon': 1e-08, 
+    'hidden_layer_sizes': (300,), 
+    'learning_rate': 'adaptive', 
+    'max_iter': 500, 
+}
+# initialize Multi Layer Perceptron classifier
+# with best parameters ( so far )
+model = MLPClassifier(**model_params)
+
+# train the model
+print("[*] Training the model...")
+model.fit(X_train, y_train)
+
+# predict 25% of data to measure how good we are
+y_pred = model.predict(X_test)
+
+# calculate the accuracy
+accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
+
+print("Accuracy: {:.2f}%".format(accuracy*100))
+
+# now we save the model
+# make result directory if doesn't exist yet
+if not os.path.isdir("result"):
+    os.mkdir("result")
+
+pickle.dump(model, open("result/mlp_classifier.model", "wb"))
diff --git a/test.py b/test.py
@@ -0,0 +1,135 @@
+import pyaudio
+import os
+import wave
+import pickle
+from sys import byteorder
+from array import array
+from struct import pack
+from sklearn.neural_network import MLPClassifier
+
+from utils import extract_feature
+
+THRESHOLD = 500
+CHUNK_SIZE = 1024
+FORMAT = pyaudio.paInt16
+RATE = 16000
+
+SILENCE = 30
+
+def is_silent(snd_data):
+    "Returns 'True' if below the 'silent' threshold"
+    return max(snd_data) < THRESHOLD
+
+def normalize(snd_data):
+    "Average the volume out"
+    MAXIMUM = 16384
+    times = float(MAXIMUM)/max(abs(i) for i in snd_data)
+
+    r = array('h')
+    for i in snd_data:
+        r.append(int(i*times))
+    return r
+
+def trim(snd_data):
+    "Trim the blank spots at the start and end"
+    def _trim(snd_data):
+        snd_started = False
+        r = array('h')
+
+        for i in snd_data:
+            if not snd_started and abs(i)>THRESHOLD:
+                snd_started = True
+                r.append(i)
+
+            elif snd_started:
+                r.append(i)
+        return r
+
+    # Trim to the left
+    snd_data = _trim(snd_data)
+
+    # Trim to the right
+    snd_data.reverse()
+    snd_data = _trim(snd_data)
+    snd_data.reverse()
+    return snd_data
+
+def add_silence(snd_data, seconds):
+    "Add silence to the start and end of 'snd_data' of length 'seconds' (float)"
+    r = array('h', [0 for i in range(int(seconds*RATE))])
+    r.extend(snd_data)
+    r.extend([0 for i in range(int(seconds*RATE))])
+    return r
+
+def record():
+    """
+    Record a word or words from the microphone and 
+    return the data as an array of signed shorts.
+    Normalizes the audio, trims silence from the 
+    start and end, and pads with 0.5 seconds of 
+    blank sound to make sure VLC et al can play 
+    it without getting chopped off.
+    """
+    p = pyaudio.PyAudio()
+    stream = p.open(format=FORMAT, channels=1, rate=RATE,
+        input=True, output=True,
+        frames_per_buffer=CHUNK_SIZE)
+
+    num_silent = 0
+    snd_started = False
+
+    r = array('h')
+
+    while 1:
+        # little endian, signed short
+        snd_data = array('h', stream.read(CHUNK_SIZE))
+        if byteorder == 'big':
+            snd_data.byteswap()
+        r.extend(snd_data)
+
+        silent = is_silent(snd_data)
+
+        if silent and snd_started:
+            num_silent += 1
+        elif not silent and not snd_started:
+            snd_started = True
+
+        if snd_started and num_silent > SILENCE:
+            break
+
+    sample_width = p.get_sample_size(FORMAT)
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
+
+    r = normalize(r)
+    r = trim(r)
+    r = add_silence(r, 0.5)
+    return sample_width, r
+
+def record_to_file(path):
+    "Records from the microphone and outputs the resulting data to 'path'"
+    sample_width, data = record()
+    data = pack('<' + ('h'*len(data)), *data)
+
+    wf = wave.open(path, 'wb')
+    wf.setnchannels(1)
+    wf.setsampwidth(sample_width)
+    wf.setframerate(RATE)
+    wf.writeframes(data)
+    wf.close()
+
+
+if __name__ == "__main__":
+    # load the saved model (after training)
+    model = pickle.load(open("result/mlp_classifier.model", "rb"))
+    print("Please talk")
+    filename = "test.wav"
+    # record the file (start talking)
+    record_to_file(filename)
+    # extract features and reshape it
+    features = extract_feature(filename, mfcc=True, chroma=True, mel=True).reshape(1, -1)
+    # predict
+    result = model.predict(features)[0]
+    # show the result !
+    print("result:", result)