Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SRNN Datapreprocessing script #124

Open
wants to merge 3 commits into
base: harsha/reorg
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 34 additions & 26 deletions examples/pytorch/SRNN/SRNN_Example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
"import sys\n",
"import os\n",
"import numpy as np\n",
"import torch"
"import torch\n",
"import h5py"
]
},
{
Expand Down Expand Up @@ -80,9 +81,16 @@
}
],
"source": [
"x_train_, y_train = np.load(DATA_DIR + 'x_train.npy'), np.load(DATA_DIR + 'y_train.npy')\n",
"x_val_, y_val = np.load(DATA_DIR + 'x_val.npy'), np.load(DATA_DIR + 'y_val.npy')\n",
"x_test_, y_test = np.load(DATA_DIR + 'x_test.npy'), np.load(DATA_DIR + 'y_test.npy')\n",
"# Copyright (c) Microsoft Corporation. All rights reserved.\n",
"f = h5py.File(DATA_DIR + 'train.h5','r')\n",
"x_train_ = np.array(f.get('X'))\n",
"y_train = np.array(f.get('Y'))\n",
"f = h5py.File(DATA_DIR + 'val.h5','r')\n",
"x_val_ = np.array(f.get('X'))\n",
"y_val = np.array(f.get('Y'))\n",
"f = h5py.File(DATA_DIR + 'test.h5','r')\n",
"x_test_ = np.array(f.get('X'))\n",
"y_test = np.array(f.get('Y'))\n",
"# Mean-var normalize\n",
"mean = np.mean(np.reshape(x_train_, [-1, x_train_.shape[-1]]), axis=0)\n",
"std = np.std(np.reshape(x_train_, [-1, x_train_.shape[-1]]), axis=0)\n",
Expand Down Expand Up @@ -161,28 +169,28 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 0 batch 0 loss 4.295151 acc 0.031250\n",
"Epoch 0 batch 200 loss 1.002617 acc 0.718750\n",
"Epoch 1 batch 0 loss 0.647069 acc 0.796875\n",
"Epoch 1 batch 200 loss 0.469229 acc 0.835938\n",
"Epoch 2 batch 0 loss 0.388671 acc 0.882812\n",
"Epoch 2 batch 200 loss 0.396696 acc 0.859375\n",
"Epoch 3 batch 0 loss 0.266433 acc 0.921875\n",
"Epoch 3 batch 200 loss 0.281694 acc 0.867188\n",
"Epoch 4 batch 0 loss 0.302240 acc 0.906250\n",
"Epoch 4 batch 200 loss 0.245797 acc 0.929688\n",
"Validation accuracy: 0.911003\n",
"Epoch 5 batch 0 loss 0.202542 acc 0.945312\n",
"Epoch 5 batch 200 loss 0.192004 acc 0.929688\n",
"Epoch 6 batch 0 loss 0.256735 acc 0.921875\n",
"Epoch 6 batch 200 loss 0.279066 acc 0.921875\n",
"Epoch 7 batch 0 loss 0.228837 acc 0.945312\n",
"Epoch 7 batch 200 loss 0.222357 acc 0.937500\n",
"Epoch 8 batch 0 loss 0.164639 acc 0.960938\n",
"Epoch 8 batch 200 loss 0.160117 acc 0.945312\n",
"Epoch 9 batch 0 loss 0.173849 acc 0.953125\n",
"Epoch 9 batch 200 loss 0.201694 acc 0.929688\n",
"Validation accuracy: 0.912474\n"
"Epoch 0 batch 0 loss 2.049031 acc 0.632812\n",
"Epoch 0 batch 200 loss 0.739568 acc 0.695312\n",
"Epoch 1 batch 0 loss 0.536956 acc 0.843750\n",
"Epoch 1 batch 200 loss 0.402417 acc 0.882812\n",
"Epoch 2 batch 0 loss 0.299402 acc 0.921875\n",
"Epoch 2 batch 200 loss 0.316270 acc 0.882812\n",
"Epoch 3 batch 0 loss 0.237716 acc 0.929688\n",
"Epoch 3 batch 200 loss 0.215562 acc 0.929688\n",
"Epoch 4 batch 0 loss 0.235044 acc 0.929688\n",
"Epoch 4 batch 200 loss 0.177791 acc 0.945312\n",
"Validation accuracy: 0.913504\n",
"Epoch 5 batch 0 loss 0.181037 acc 0.945312\n",
"Epoch 5 batch 200 loss 0.167289 acc 0.937500\n",
"Epoch 6 batch 0 loss 0.201628 acc 0.921875\n",
"Epoch 6 batch 200 loss 0.266160 acc 0.914062\n",
"Epoch 7 batch 0 loss 0.199887 acc 0.937500\n",
"Epoch 7 batch 200 loss 0.154214 acc 0.929688\n",
"Epoch 8 batch 0 loss 0.193560 acc 0.945312\n",
"Epoch 8 batch 200 loss 0.194838 acc 0.937500\n",
"Epoch 9 batch 0 loss 0.205967 acc 0.921875\n",
"Epoch 9 batch 200 loss 0.186773 acc 0.937500\n",
"Validation accuracy: 0.913063\n"
]
}
],
Expand Down
20 changes: 12 additions & 8 deletions examples/pytorch/SRNN/SRNN_Example.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import os
import numpy as np
import torch
import h5py

from edgeml_pytorch.graph.rnn import SRNN2
from edgeml_pytorch.trainer.srnnTrainer import SRNNTrainer
Expand All @@ -16,12 +17,15 @@
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

DATA_DIR = config.data_dir
x_train_ = np.load(DATA_DIR + 'x_train.npy')
y_train = np.load(DATA_DIR + 'y_train.npy')
x_val_ = np.load(DATA_DIR + 'x_val.npy')
y_val = np.load(DATA_DIR + 'y_val.npy')
x_test_ = np.load(DATA_DIR + 'x_test.npy')
y_test = np.load(DATA_DIR + 'y_test.npy')
f = h5py.File(DATA_DIR + 'train.h5','r')
x_train_ = np.array(f.get('X'))
y_train = np.array(f.get('Y'))
f = h5py.File(DATA_DIR + 'val.h5','r')
x_val_ = np.array(f.get('X'))
y_val = np.array(f.get('Y'))
f = h5py.File(DATA_DIR + 'test.h5','r')
x_test_ = np.array(f.get('X'))
y_test = np.array(f.get('Y'))

# Mean-var normalize
mean = np.mean(np.reshape(x_train_, [-1, x_train_.shape[-1]]), axis=0)
Expand Down Expand Up @@ -60,15 +64,15 @@

Example OPTIONAL args for FastGRNNCell
cellArgs = {'gate_non_linearity':"sigmoid",'update_non_linearity':"tanh",
'wRank':None, 'uRank':None,'zetaInit':1.0, 'nuInit':-4.0,
'wRank':None, 'uRank':None,'zetaInit':1.0, 'nuInit':-4.0,
'batch_first':False}

'''
cellArgs = {}

srnn2 = SRNN2(numInput, numClasses, hiddenDim0, hiddenDim1, cellType,
dropoutProbability_l0, dropoutProbability_l1,
**cellArgs).to(device)
**cellArgs).to(device)
trainer = SRNNTrainer(srnn2, learningRate, lossType='xentropy', device=device)

trainer.train(brickSize, batchSize, epochs, x_train, x_val, y_train, y_val,
Expand Down
116 changes: 51 additions & 65 deletions examples/pytorch/SRNN/process_google.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# Google Speech data feature extraction

# Note that the 'testing_list.txt' and 'validation_list.txt'
Expand Down Expand Up @@ -27,7 +26,7 @@
import numpy as np
import scipy.io.wavfile as r
import random

import h5py

# Various version can be created depending on which labels are chosen and which
# are moved to the negative (noise) set. We use LABELMAP13 for most of our
Expand Down Expand Up @@ -131,78 +130,56 @@ def createFileList(audioFileDir, testingList,
np.save(outPrefix + 'file_test.npy', testingList)
np.save(outPrefix + 'file_val.npy', validationList)


def extractFeatures(fileList, LABELMAP, maxlen, numFilt, samplerate, winlen,
winstep):
def extractFeatures(fileList, LABELMAP, numLabels, maxlen, numFilt, samplerate,
winlen, winstep, X, Y):
'''
Reads audio from files specified in fileList, extracts features and assigns
labels to them.

fileList: List of audio file names.
LABELMAP: The label map to use.
numLabels: No of labels
maxlen: maximum length of the audio file. Every other
files is zero padded to maxlen
numFilt: number of filters to use in MFCC
samplerate: sample rate of the audio file. All files are
assumed to be of same sample rate
winLen: winLen to use for fbank in seconds
winstep: winstep for fbank in seconds
X: dataset input
Y: dataset ground-truth
'''
def __extractFeatures(stackedWav, numSteps, numFilt,
samplerate, winlen, winstep):
'''
[number of waves, Len(wave)]
returns [number of waves, numSteps, numFilt]
All waves are assumed to be of fixed length
'''
assert stackedWav.ndim == 2, 'Should be [number of waves, len(wav)]'
extractedList = []
def __extractFeatures(sample, numSteps, numFilt, samplerate, winLen,
winstep):
eps = 1e-10
for sample in stackedWav:
temp, _ = fbank(sample, samplerate=samplerate, winlen=winlen,
winstep=winstep, nfilt=numFilt,
winfunc=np.hamming)
temp = np.log(temp + eps)
assert temp.ndim == 2, 'Should be [numSteps, numFilt]'
assert temp.shape[0] == numSteps, 'Should be [numSteps, numFilt]'
extractedList.append(temp)
return np.array(extractedList)
temp, _ = fbank(sample, samplerate=samplerate, winlen=winlen,
winstep=winstep, nfilt=numFilt, winfunc=np.hamming)
temp = np.log(temp + eps)
assert temp.ndim == 2, 'Should be [numSteps, numFilt]'
assert temp.shape[0] == numSteps, 'Should be [numSteps, numFilt]'
return np.array([temp])

fileList = np.array(fileList)
assert(fileList.ndim == 1)
allSamples = np.zeros((len(fileList), maxlen))
i = 0
for i,file in enumerate(fileList):
_, x = r.read(file)
assert(len(x) <= maxlen)
allSamples[i, maxlen - len(x):maxlen] += x
i += 1
assert allSamples.ndim == 2
winstepSamples = winstep * samplerate
winlenSamples = winlen * samplerate
assert(winstepSamples.is_integer())
assert(winlenSamples.is_integer())
numSteps = int(np.ceil((maxlen - winlenSamples)/winstepSamples) + 1)
x = __extractFeatures(allSamples, numSteps, numFilt, samplerate, winlen,
winstep)
y_ = [t.split('/') for t in fileList]
y_ = [t[-2] for t in y_]
y = []
for t in y_:
assert t in LABELMAP
y.append(LABELMAP[t])

def to_onehot(indices, numClasses):
assert indices.ndim == 1
n = max(indices) + 1
assert numClasses <= n
b = np.zeros((len(indices), numClasses))
b[np.arange(len(indices)), indices] = 1
return b
y = to_onehot(np.array(y), np.max(y) + 1)
return x, y
for i, file in enumerate(fileList):
print('Processing', file)
sample = np.zeros(maxlen)
_, data = r.read(file)
sample[maxlen-len(data):maxlen] += data
X[i] = __extractFeatures(sample, numSteps, numFilt, samplerate, winlen,
winstep)
y_ = file.split('/')[-2]
y = LABELMAP[y_]
b = np.zeros(numLabels)
b[y] = 1
Y[i] = b
i += 1
print('Total Processed Samples:', i)

if __name__=='__main__':
if __name__ == '__main__':
# ----------------------------------------- #
# Configuration
# ----------------------------------------- #
Expand All @@ -216,7 +193,7 @@ def to_onehot(indices, numClasses):
numLabels = 13 # 0 not assigned
samplerate=16000
# For creation of training file list, testing file list
# and validation list.
# and validation list.
audioFileDir = './GoogleSpeech/Raw/'
testingList = './GoogleSpeech/Raw/testing_list.txt'
validationList = './GoogleSpeech/Raw/validation_list.txt'
Expand Down Expand Up @@ -249,17 +226,26 @@ def to_onehot(indices, numClasses):
trainFileList_ = [audioFileDir + x for x in trainFileList]
valFileList_ = [audioFileDir + x for x in valFileList]
testFileList_ = [audioFileDir + x for x in testFileList]
x_test, y_test = extractFeatures(testFileList_, LABELMAP, maxlen, numFilt,
def generateDataset(datasetType, fileList, LABELMAP, numLabels, maxlen,
numFilt, samplerate, winlen, winstep):
winstepSamples = winstep * samplerate
winlenSamples = winlen * samplerate
assert(winstepSamples.is_integer())
assert(winlenSamples.is_integer())
numSteps = int(np.ceil((maxlen - winlenSamples)/winstepSamples) + 1)

with h5py.File(outDir+datasetType+'.h5') as f:
x = f.create_dataset("X", shape=(len(fileList), numSteps, numFilt),
dtype=np.float64)
y = f.create_dataset("Y", shape=(len(fileList), numLabels),
dtype=np.float64)
extractFeatures(fileList, LABELMAP, numLabels, maxlen, numFilt,
samplerate, winlen, winstep, x, y)
print(datasetType, 'dataset generated')

generateDataset('test', testFileList_, LABELMAP, numLabels, maxlen, numFilt,
samplerate, winlen, winstep)
x_val, y_val = extractFeatures(valFileList_, LABELMAP, maxlen, numFilt,
generateDataset('val', valFileList_, LABELMAP, numLabels, maxlen, numFilt,
samplerate, winlen, winstep)
x_train, y_train = extractFeatures(trainFileList_, LABELMAP, maxlen,
numFilt, samplerate, winlen, winstep)
np.save(outDir + 'x_train', x_train);np.save(outDir + 'y_train', y_train)
np.save(outDir + 'x_test', x_test);np.save(outDir + 'y_test', y_test)
np.save(outDir + 'x_val', x_val);np.save(outDir + 'y_val', y_val)
print("Shape train", x_train.shape, y_train.shape)
print("Shape test", x_test.shape, y_test.shape)
print("Shape val", x_val.shape, y_val.shape)


generateDataset('train', trainFileList_, LABELMAP, numLabels, maxlen,
numFilt, samplerate, winlen, winstep)
2 changes: 2 additions & 0 deletions examples/pytorch/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
python-speech-features==0.6
h5py==2.9.0