microsoft · pushkalkatara · Aug 21, 2019 · Aug 21, 2019 · Aug 21, 2019
diff --git a/examples/pytorch/SRNN/SRNN_Example.ipynb b/examples/pytorch/SRNN/SRNN_Example.ipynb
@@ -25,7 +25,8 @@
     "import sys\n",
     "import os\n",
     "import numpy as np\n",
-    "import torch"
+    "import torch\n",
+    "import h5py"
    ]
   },
   {
@@ -80,9 +81,16 @@
     }
    ],
    "source": [
-    "x_train_, y_train = np.load(DATA_DIR + 'x_train.npy'), np.load(DATA_DIR + 'y_train.npy')\n",
-    "x_val_, y_val = np.load(DATA_DIR + 'x_val.npy'), np.load(DATA_DIR + 'y_val.npy')\n",
-    "x_test_, y_test = np.load(DATA_DIR + 'x_test.npy'), np.load(DATA_DIR + 'y_test.npy')\n",
+    "# Copyright (c) Microsoft Corporation. All rights reserved.\n",
+    "f = h5py.File(DATA_DIR + 'train.h5','r')\n",
+    "x_train_ = np.array(f.get('X'))\n",
+    "y_train = np.array(f.get('Y'))\n",
+    "f = h5py.File(DATA_DIR + 'val.h5','r')\n",
+    "x_val_ = np.array(f.get('X'))\n",
+    "y_val = np.array(f.get('Y'))\n",
+    "f = h5py.File(DATA_DIR + 'test.h5','r')\n",
+    "x_test_ = np.array(f.get('X'))\n",
+    "y_test = np.array(f.get('Y'))\n",
     "# Mean-var normalize\n",
     "mean = np.mean(np.reshape(x_train_, [-1, x_train_.shape[-1]]), axis=0)\n",
     "std = np.std(np.reshape(x_train_, [-1, x_train_.shape[-1]]), axis=0)\n",
@@ -161,28 +169,28 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 0 batch 0 loss 4.295151 acc 0.031250\n",
-      "Epoch 0 batch 200 loss 1.002617 acc 0.718750\n",
-      "Epoch 1 batch 0 loss 0.647069 acc 0.796875\n",
-      "Epoch 1 batch 200 loss 0.469229 acc 0.835938\n",
-      "Epoch 2 batch 0 loss 0.388671 acc 0.882812\n",
-      "Epoch 2 batch 200 loss 0.396696 acc 0.859375\n",
-      "Epoch 3 batch 0 loss 0.266433 acc 0.921875\n",
-      "Epoch 3 batch 200 loss 0.281694 acc 0.867188\n",
-      "Epoch 4 batch 0 loss 0.302240 acc 0.906250\n",
-      "Epoch 4 batch 200 loss 0.245797 acc 0.929688\n",
-      "Validation accuracy: 0.911003\n",
-      "Epoch 5 batch 0 loss 0.202542 acc 0.945312\n",
-      "Epoch 5 batch 200 loss 0.192004 acc 0.929688\n",
-      "Epoch 6 batch 0 loss 0.256735 acc 0.921875\n",
-      "Epoch 6 batch 200 loss 0.279066 acc 0.921875\n",
-      "Epoch 7 batch 0 loss 0.228837 acc 0.945312\n",
-      "Epoch 7 batch 200 loss 0.222357 acc 0.937500\n",
-      "Epoch 8 batch 0 loss 0.164639 acc 0.960938\n",
-      "Epoch 8 batch 200 loss 0.160117 acc 0.945312\n",
-      "Epoch 9 batch 0 loss 0.173849 acc 0.953125\n",
-      "Epoch 9 batch 200 loss 0.201694 acc 0.929688\n",
-      "Validation accuracy: 0.912474\n"
+      "Epoch 0 batch 0 loss 2.049031 acc 0.632812\n",
+      "Epoch 0 batch 200 loss 0.739568 acc 0.695312\n",
+      "Epoch 1 batch 0 loss 0.536956 acc 0.843750\n",
+      "Epoch 1 batch 200 loss 0.402417 acc 0.882812\n",
+      "Epoch 2 batch 0 loss 0.299402 acc 0.921875\n",
+      "Epoch 2 batch 200 loss 0.316270 acc 0.882812\n",
+      "Epoch 3 batch 0 loss 0.237716 acc 0.929688\n",
+      "Epoch 3 batch 200 loss 0.215562 acc 0.929688\n",
+      "Epoch 4 batch 0 loss 0.235044 acc 0.929688\n",
+      "Epoch 4 batch 200 loss 0.177791 acc 0.945312\n",
+      "Validation accuracy: 0.913504\n",
+      "Epoch 5 batch 0 loss 0.181037 acc 0.945312\n",
+      "Epoch 5 batch 200 loss 0.167289 acc 0.937500\n",
+      "Epoch 6 batch 0 loss 0.201628 acc 0.921875\n",
+      "Epoch 6 batch 200 loss 0.266160 acc 0.914062\n",
+      "Epoch 7 batch 0 loss 0.199887 acc 0.937500\n",
+      "Epoch 7 batch 200 loss 0.154214 acc 0.929688\n",
+      "Epoch 8 batch 0 loss 0.193560 acc 0.945312\n",
+      "Epoch 8 batch 200 loss 0.194838 acc 0.937500\n",
+      "Epoch 9 batch 0 loss 0.205967 acc 0.921875\n",
+      "Epoch 9 batch 200 loss 0.186773 acc 0.937500\n",
+      "Validation accuracy: 0.913063\n"
      ]
     }
    ],

diff --git a/examples/pytorch/SRNN/SRNN_Example.py b/examples/pytorch/SRNN/SRNN_Example.py
@@ -6,6 +6,7 @@
 import os
 import numpy as np
 import torch
+import h5py
 
 from edgeml_pytorch.graph.rnn import SRNN2
 from edgeml_pytorch.trainer.srnnTrainer import SRNNTrainer
@@ -16,12 +17,15 @@
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 DATA_DIR = config.data_dir
-x_train_ = np.load(DATA_DIR + 'x_train.npy')
-y_train = np.load(DATA_DIR + 'y_train.npy')
-x_val_ = np.load(DATA_DIR + 'x_val.npy')
-y_val = np.load(DATA_DIR + 'y_val.npy')
-x_test_ = np.load(DATA_DIR + 'x_test.npy')
-y_test = np.load(DATA_DIR + 'y_test.npy')
+f = h5py.File(DATA_DIR + 'train.h5','r')
+x_train_ = np.array(f.get('X'))
+y_train = np.array(f.get('Y'))
+f = h5py.File(DATA_DIR + 'val.h5','r')
+x_val_ = np.array(f.get('X'))
+y_val = np.array(f.get('Y'))
+f = h5py.File(DATA_DIR + 'test.h5','r')
+x_test_ = np.array(f.get('X'))
+y_test = np.array(f.get('Y'))
 
 # Mean-var normalize
 mean = np.mean(np.reshape(x_train_, [-1, x_train_.shape[-1]]), axis=0)
@@ -60,15 +64,15 @@
 
 Example OPTIONAL args for FastGRNNCell
 cellArgs = {'gate_non_linearity':"sigmoid",'update_non_linearity':"tanh",
-				'wRank':None, 'uRank':None,'zetaInit':1.0, 'nuInit':-4.0, 
+				'wRank':None, 'uRank':None,'zetaInit':1.0, 'nuInit':-4.0,
 				'batch_first':False}
 
 '''
 cellArgs = {}
 
 srnn2 = SRNN2(numInput, numClasses, hiddenDim0, hiddenDim1, cellType,
 			 dropoutProbability_l0, dropoutProbability_l1,
-			 **cellArgs).to(device)  
+			 **cellArgs).to(device)
 trainer = SRNNTrainer(srnn2, learningRate, lossType='xentropy', device=device)
 
 trainer.train(brickSize, batchSize, epochs, x_train, x_val, y_train, y_val,

diff --git a/examples/pytorch/SRNN/process_google.py b/examples/pytorch/SRNN/process_google.py
@@ -1,4 +1,3 @@
-
 # Google Speech data feature extraction
 
 # Note that the 'testing_list.txt' and 'validation_list.txt'
@@ -27,7 +26,7 @@
 import numpy as np
 import scipy.io.wavfile as r
 import random
-
+import h5py
 
 # Various version can be created depending on which labels are chosen and which
 # are moved to the negative (noise) set. We use LABELMAP13 for most of our
@@ -131,78 +130,56 @@ def createFileList(audioFileDir, testingList,
     np.save(outPrefix + 'file_test.npy', testingList)
     np.save(outPrefix + 'file_val.npy', validationList)
 
-
-def extractFeatures(fileList, LABELMAP, maxlen, numFilt, samplerate, winlen,
-                    winstep):
+def extractFeatures(fileList, LABELMAP, numLabels, maxlen, numFilt, samplerate,
+                    winlen, winstep, X, Y):
     '''
     Reads audio from files specified in fileList, extracts features and assigns
     labels to them.
 
     fileList: List of audio file names.
     LABELMAP: The label map to use.
+    numLabels: No of labels
     maxlen: maximum length of the audio file. Every other
         files is zero padded to maxlen
     numFilt: number of filters to use in MFCC
     samplerate: sample rate of the audio file. All files are
         assumed to be of same sample rate
     winLen: winLen to use for fbank in seconds
     winstep: winstep for fbank in seconds
+    X: dataset input
+    Y: dataset ground-truth
     '''
-    def __extractFeatures(stackedWav, numSteps, numFilt,
-                          samplerate, winlen, winstep):
-        '''
-        [number of waves, Len(wave)]
-        returns [number of waves, numSteps, numFilt]
-        All waves are assumed to be of fixed length
-        '''
-        assert stackedWav.ndim == 2, 'Should be [number of waves, len(wav)]'
-        extractedList = []
+    def __extractFeatures(sample, numSteps, numFilt, samplerate, winLen,
+                           winstep):
         eps = 1e-10
-        for sample in stackedWav:
-            temp, _ = fbank(sample, samplerate=samplerate, winlen=winlen,
-                            winstep=winstep, nfilt=numFilt,
-                            winfunc=np.hamming)
-            temp = np.log(temp + eps)
-            assert temp.ndim == 2, 'Should be [numSteps, numFilt]'
-            assert temp.shape[0] == numSteps, 'Should be [numSteps, numFilt]'
-            extractedList.append(temp)
-        return np.array(extractedList)
+        temp, _ = fbank(sample, samplerate=samplerate, winlen=winlen,
+                        winstep=winstep, nfilt=numFilt, winfunc=np.hamming)
+        temp = np.log(temp + eps)
+        assert temp.ndim == 2, 'Should be [numSteps, numFilt]'
+        assert temp.shape[0] == numSteps, 'Should be [numSteps, numFilt]'
+        return np.array([temp])
 
     fileList = np.array(fileList)
-    assert(fileList.ndim == 1)
-    allSamples = np.zeros((len(fileList), maxlen))
     i = 0
-    for i,file in enumerate(fileList):
-        _, x = r.read(file)
-        assert(len(x) <= maxlen)
-        allSamples[i, maxlen - len(x):maxlen] += x
-        i += 1
-    assert allSamples.ndim == 2
     winstepSamples = winstep * samplerate
     winlenSamples = winlen * samplerate
-    assert(winstepSamples.is_integer())
-    assert(winlenSamples.is_integer())
     numSteps = int(np.ceil((maxlen - winlenSamples)/winstepSamples) + 1)
-    x = __extractFeatures(allSamples, numSteps, numFilt, samplerate, winlen,
-                          winstep)
-    y_ = [t.split('/') for t in fileList]
-    y_ = [t[-2] for t in y_]
-    y = []
-    for t in y_:
-        assert t in LABELMAP
-        y.append(LABELMAP[t])
-
-    def to_onehot(indices, numClasses):
-        assert indices.ndim == 1
-        n = max(indices) + 1
-        assert numClasses <= n
-        b = np.zeros((len(indices), numClasses))
-        b[np.arange(len(indices)), indices] = 1
-        return b
-    y = to_onehot(np.array(y), np.max(y) + 1)
-    return x, y
+    for i, file in enumerate(fileList):
+        print('Processing', file)
+        sample = np.zeros(maxlen)
+        _, data = r.read(file)
+        sample[maxlen-len(data):maxlen] += data
+        X[i] = __extractFeatures(sample, numSteps, numFilt, samplerate, winlen,
+                              winstep)
+        y_ = file.split('/')[-2]
+        y = LABELMAP[y_]
+        b = np.zeros(numLabels)
+        b[y] = 1
+        Y[i] = b
+        i += 1
+    print('Total Processed Samples:', i)
 
-if __name__=='__main__':
+if __name__ == '__main__':
     # ----------------------------------------- #
     # Configuration
     # ----------------------------------------- #
@@ -216,7 +193,7 @@ def to_onehot(indices, numClasses):
     numLabels = 13 # 0 not assigned
     samplerate=16000
     # For creation of training file list, testing file list
-    # and validation list. 
+    # and validation list.
     audioFileDir = './GoogleSpeech/Raw/'
     testingList = './GoogleSpeech/Raw/testing_list.txt'
     validationList = './GoogleSpeech/Raw/validation_list.txt'
@@ -249,17 +226,26 @@ def to_onehot(indices, numClasses):
     trainFileList_ = [audioFileDir + x for x in trainFileList]
     valFileList_ = [audioFileDir + x for x in valFileList]
     testFileList_ = [audioFileDir + x for x in testFileList]
-    x_test, y_test = extractFeatures(testFileList_, LABELMAP, maxlen, numFilt,
+    def generateDataset(datasetType, fileList, LABELMAP, numLabels, maxlen,
+                        numFilt, samplerate, winlen, winstep):
+        winstepSamples = winstep * samplerate
+        winlenSamples = winlen * samplerate
+        assert(winstepSamples.is_integer())
+        assert(winlenSamples.is_integer())
+        numSteps = int(np.ceil((maxlen - winlenSamples)/winstepSamples) + 1)
+
+        with h5py.File(outDir+datasetType+'.h5') as f:
+            x = f.create_dataset("X", shape=(len(fileList), numSteps, numFilt),
+                                dtype=np.float64)
+            y = f.create_dataset("Y", shape=(len(fileList), numLabels),
+                                dtype=np.float64)
+            extractFeatures(fileList, LABELMAP, numLabels, maxlen, numFilt,
+                            samplerate, winlen, winstep, x, y)
+        print(datasetType, 'dataset generated')
+
+    generateDataset('test', testFileList_, LABELMAP, numLabels, maxlen, numFilt,
                                      samplerate, winlen, winstep)
-    x_val, y_val = extractFeatures(valFileList_, LABELMAP, maxlen, numFilt,
+    generateDataset('val', valFileList_, LABELMAP, numLabels, maxlen, numFilt,
                                    samplerate, winlen, winstep)
-    x_train, y_train = extractFeatures(trainFileList_, LABELMAP, maxlen,
-                                       numFilt, samplerate, winlen, winstep)
-    np.save(outDir + 'x_train', x_train);np.save(outDir + 'y_train', y_train)
-    np.save(outDir + 'x_test', x_test);np.save(outDir + 'y_test', y_test)
-    np.save(outDir + 'x_val', x_val);np.save(outDir + 'y_val', y_val)
-    print("Shape train", x_train.shape, y_train.shape)
-    print("Shape test", x_test.shape, y_test.shape)
-    print("Shape val", x_val.shape, y_val.shape)
-
-
+    generateDataset('train', trainFileList_, LABELMAP, numLabels, maxlen,
+                                numFilt, samplerate, winlen, winstep)
diff --git a/examples/pytorch/requirements.txt b/examples/pytorch/requirements.txt
@@ -0,0 +1,2 @@
+python-speech-features==0.6
+h5py==2.9.0