Skip to content

Commit 6e85d56

Browse files
committed
feat: create SIMD-accelerated version of compute_gru function
xiph/rnnoise#191
1 parent 73775f1 commit 6e85d56

File tree

8 files changed

+634
-152
lines changed

8 files changed

+634
-152
lines changed

src/rnnoise/include/rnnoise/rnn.h

+9-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
#ifndef RNN_H_
2828
#define RNN_H_
2929

30+
#include "rnnoise.h"
31+
3032
#include "opus_types.h"
3133

3234
#define WEIGHTS_SCALE (1.f/256)
@@ -58,10 +60,16 @@ typedef struct {
5860

5961
typedef struct RNNState RNNState;
6062

63+
int is_avx2_supported();
64+
6165
void compute_dense(const DenseLayer *layer, float *output, const float *input);
6266

6367
void compute_gru(const GRULayer *gru, float *state, const float *input);
6468

69+
#if defined(__AVX2__)
70+
void compute_gru_avx2(const GRULayer *gru, float *state, const float *input);
71+
#endif
72+
6573
void compute_rnn(RNNState *rnn, float *gains, float *vad, const float *input);
6674

67-
#endif /* _MLP_H_ */
75+
#endif /* RNN_H_ */

src/rnnoise/include/rnnoise/rnn_data.h

+19-17
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,34 @@
1-
/*This file is automatically generated from a Keras model*/
2-
31
#ifndef RNN_DATA_H
42
#define RNN_DATA_H
53

64
#include "rnn.h"
75

8-
#define INPUT_DENSE_SIZE 24
9-
extern const DenseLayer input_dense;
6+
struct RNNModel {
7+
int input_dense_size;
8+
const DenseLayer *input_dense;
109

11-
#define VAD_GRU_SIZE 24
12-
extern const GRULayer vad_gru;
10+
int vad_gru_size;
11+
const GRULayer *vad_gru;
1312

14-
#define NOISE_GRU_SIZE 48
15-
extern const GRULayer noise_gru;
13+
int noise_gru_size;
14+
const GRULayer *noise_gru;
1615

17-
#define DENOISE_GRU_SIZE 96
18-
extern const GRULayer denoise_gru;
16+
int denoise_gru_size;
17+
const GRULayer *denoise_gru;
1918

20-
#define DENOISE_OUTPUT_SIZE 22
21-
extern const DenseLayer denoise_output;
19+
int denoise_output_size;
20+
const DenseLayer *denoise_output;
2221

23-
#define VAD_OUTPUT_SIZE 1
24-
extern const DenseLayer vad_output;
22+
int vad_output_size;
23+
const DenseLayer *vad_output;
24+
};
2525

2626
struct RNNState {
27-
float vad_gru_state[VAD_GRU_SIZE];
28-
float noise_gru_state[NOISE_GRU_SIZE];
29-
float denoise_gru_state[DENOISE_GRU_SIZE];
27+
const RNNModel *model;
28+
float *vad_gru_state;
29+
float *noise_gru_state;
30+
float *denoise_gru_state;
31+
void (*compute_gru_fct)(const GRULayer *gru, float *state, const float *input);
3032
};
3133

3234

src/rnnoise/include/rnnoise/rnnoise.h

+62-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
/* Copyright (c) 2017 Mozilla */
1+
/* Copyright (c) 2018 Gregor Richards
2+
* Copyright (c) 2017 Mozilla */
23
/*
34
Redistribution and use in source and binary forms, with or without
45
modification, are permitted provided that the following conditions
@@ -24,6 +25,15 @@
2425
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2526
*/
2627

28+
#ifndef RNNOISE_H
29+
#define RNNOISE_H 1
30+
31+
#include <stdio.h>
32+
33+
#ifdef __cplusplus
34+
extern "C" {
35+
#endif
36+
2737
#ifndef RNNOISE_EXPORT
2838
# if defined(WIN32)
2939
# if defined(RNNOISE_BUILD) && defined(DLL_EXPORT)
@@ -39,21 +49,66 @@
3949
#endif
4050

4151
typedef struct DenoiseState DenoiseState;
52+
typedef struct RNNModel RNNModel;
4253

43-
#ifdef __cplusplus
44-
extern "C" {
45-
#endif
46-
54+
/**
55+
* Return the size of DenoiseState
56+
*/
4757
RNNOISE_EXPORT int rnnoise_get_size();
4858

49-
RNNOISE_EXPORT int rnnoise_init(DenoiseState *st);
59+
/**
60+
* Return the number of samples processed by rnnoise_process_frame at a time
61+
*/
62+
RNNOISE_EXPORT int rnnoise_get_frame_size();
5063

51-
RNNOISE_EXPORT DenoiseState *rnnoise_create();
64+
/**
65+
* Initializes a pre-allocated DenoiseState
66+
*
67+
* If model is NULL the default model is used.
68+
*
69+
* See: rnnoise_create() and rnnoise_model_from_file()
70+
*/
71+
RNNOISE_EXPORT int rnnoise_init(DenoiseState *st, RNNModel *model);
5272

73+
/**
74+
* Allocate and initialize a DenoiseState
75+
*
76+
* If model is NULL the default model is used.
77+
*
78+
* The returned pointer MUST be freed with rnnoise_destroy().
79+
*/
80+
RNNOISE_EXPORT DenoiseState *rnnoise_create(RNNModel *model);
81+
82+
/**
83+
* Free a DenoiseState produced by rnnoise_create.
84+
*
85+
* The optional custom model must be freed by rnnoise_model_free() after.
86+
*/
5387
RNNOISE_EXPORT void rnnoise_destroy(DenoiseState *st);
5488

89+
/**
90+
* Denoise a frame of samples
91+
*
92+
* in and out must be at least rnnoise_get_frame_size() large.
93+
*/
5594
RNNOISE_EXPORT float rnnoise_process_frame(DenoiseState *st, float *out, const float *in);
5695

96+
/**
97+
* Load a model from a file
98+
*
99+
* It must be deallocated with rnnoise_model_free()
100+
*/
101+
RNNOISE_EXPORT RNNModel *rnnoise_model_from_file(FILE *f);
102+
103+
/**
104+
* Free a custom model
105+
*
106+
* It must be called after all the DenoiseStates referring to it are freed.
107+
*/
108+
RNNOISE_EXPORT void rnnoise_model_free(RNNModel *model);
109+
57110
#ifdef __cplusplus
58111
}
59112
#endif
113+
114+
#endif

src/rnnoise/src/denoise.c

+46-74
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
/* Copyright (c) 2017 Mozilla */
1+
/* Copyright (c) 2018 Gregor Richards
2+
* Copyright (c) 2017 Mozilla */
23
/*
34
Redistribution and use in source and binary forms, with or without
45
modification, are permitted provided that the following conditions
@@ -55,13 +56,7 @@
5556

5657
#define SQUARE(x) ((x)*(x))
5758

58-
#define SMOOTH_BANDS 1
59-
60-
#if SMOOTH_BANDS
6159
#define NB_BANDS 22
62-
#else
63-
#define NB_BANDS 21
64-
#endif
6560

6661
#define CEPS_MEM 8
6762
#define NB_DELTA_CEPS 6
@@ -73,6 +68,11 @@
7368
#define TRAINING 0
7469
#endif
7570

71+
72+
/* The built-in model, used if no file is given as input */
73+
extern const struct RNNModel rnnoise_model_orig;
74+
75+
7676
static const opus_int16 eband5ms[] = {
7777
/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 20k*/
7878
0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
@@ -100,7 +100,6 @@ struct DenoiseState {
100100
RNNState rnn;
101101
};
102102

103-
#if SMOOTH_BANDS
104103
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
105104
int i;
106105
float sum[NB_BANDS] = {0};
@@ -165,32 +164,6 @@ void interp_band_gain(float *g, const float *bandE) {
165164
}
166165
}
167166
}
168-
#else
169-
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
170-
int i;
171-
for (i=0;i<NB_BANDS;i++)
172-
{
173-
int j;
174-
opus_val32 sum = 0;
175-
for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++) {
176-
sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
177-
sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
178-
}
179-
bandE[i] = sum;
180-
}
181-
}
182-
183-
void interp_band_gain(float *g, const float *bandE) {
184-
int i;
185-
memset(g, 0, FREQ_SIZE);
186-
for (i=0;i<NB_BANDS;i++)
187-
{
188-
int j;
189-
for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++)
190-
g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = bandE[i];
191-
}
192-
}
193-
#endif
194167

195168

196169
CommonState common;
@@ -287,19 +260,41 @@ int rnnoise_get_size() {
287260
return sizeof(DenoiseState);
288261
}
289262

290-
int rnnoise_init(DenoiseState *st) {
263+
int rnnoise_get_frame_size() {
264+
return FRAME_SIZE;
265+
}
266+
267+
int rnnoise_init(DenoiseState *st, RNNModel *model) {
291268
memset(st, 0, sizeof(*st));
269+
if (model)
270+
st->rnn.model = model;
271+
else
272+
st->rnn.model = &rnnoise_model_orig;
273+
st->rnn.vad_gru_state = calloc(sizeof(float), st->rnn.model->vad_gru_size);
274+
st->rnn.noise_gru_state = calloc(sizeof(float), st->rnn.model->noise_gru_size);
275+
st->rnn.denoise_gru_state = calloc(sizeof(float), st->rnn.model->denoise_gru_size);
276+
st->rnn.compute_gru_fct = &compute_gru;
277+
278+
#if defined(__AVX2__)
279+
if(is_avx2_supported() == 1) {
280+
st->rnn.compute_gru_fct = &compute_gru_avx2;
281+
}
282+
#endif
283+
292284
return 0;
293285
}
294286

295-
DenoiseState *rnnoise_create() {
287+
DenoiseState *rnnoise_create(RNNModel *model) {
296288
DenoiseState *st;
297289
st = malloc(rnnoise_get_size());
298-
rnnoise_init(st);
290+
rnnoise_init(st, model);
299291
return st;
300292
}
301293

302294
void rnnoise_destroy(DenoiseState *st) {
295+
free(st->rnn.vad_gru_state);
296+
free(st->rnn.noise_gru_state);
297+
free(st->rnn.denoise_gru_state);
303298
free(st);
304299
}
305300

@@ -424,13 +419,11 @@ static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y)
424419
}
425420

426421
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
427-
int i;
428-
for (i=0;i<N;i++) {
429-
float xi, yi;
430-
xi = x[i];
431-
yi = x[i] + mem[0];
432-
mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
433-
mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
422+
for (int i=0;i<N;i++) {
423+
float xi = x[i];
424+
float yi = xi + mem[0];
425+
mem[0] = mem[1] + (b[0] * xi - a[0] * yi);
426+
mem[1] = (b[1] * xi - a[1] * yi);
434427
y[i] = yi;
435428
}
436429
}
@@ -541,20 +534,21 @@ int main(int argc, char **argv) {
541534
int vad_cnt=0;
542535
int gain_change_count=0;
543536
float speech_gain = 1, noise_gain = 1;
544-
FILE *f1, *f2, *fout;
537+
FILE *f1, *f2;
538+
int maxCount;
545539
DenoiseState *st;
546540
DenoiseState *noise_state;
547541
DenoiseState *noisy;
548-
st = rnnoise_create();
549-
noise_state = rnnoise_create();
550-
noisy = rnnoise_create();
542+
st = rnnoise_create(NULL);
543+
noise_state = rnnoise_create(NULL);
544+
noisy = rnnoise_create(NULL);
551545
if (argc!=4) {
552-
fprintf(stderr, "usage: %s <speech> <noise> <output denoised>\n", argv[0]);
546+
fprintf(stderr, "usage: %s <speech> <noise> <count>\n", argv[0]);
553547
return 1;
554548
}
555549
f1 = fopen(argv[1], "r");
556550
f2 = fopen(argv[2], "r");
557-
fout = fopen(argv[3], "w");
551+
maxCount = atoi(argv[3]);
558552
for(i=0;i<150;i++) {
559553
short tmp[FRAME_SIZE];
560554
fread(tmp, sizeof(short), FRAME_SIZE, f2);
@@ -566,12 +560,11 @@ int main(int argc, char **argv) {
566560
float Ln[NB_BANDS];
567561
float features[NB_FEATURES];
568562
float g[NB_BANDS];
569-
float gf[FREQ_SIZE]={1};
570563
short tmp[FRAME_SIZE];
571564
float vad=0;
572-
float vad_prob;
573565
float E=0;
574-
if (count==50000000) break;
566+
if (count==maxCount) break;
567+
if ((count%1000)==0) fprintf(stderr, "%d\r", count);
575568
if (++gain_change_count > 2821) {
576569
speech_gain = pow(10., (-40+(rand()%60))/20.);
577570
noise_gain = pow(10., (-30+(rand()%50))/20.);
@@ -646,37 +639,16 @@ int main(int argc, char **argv) {
646639
if (vad==0 && noise_gain==0) g[i] = -1;
647640
}
648641
count++;
649-
#if 0
650-
for (i=0;i<NB_FEATURES;i++) printf("%f ", features[i]);
651-
for (i=0;i<NB_BANDS;i++) printf("%f ", g[i]);
652-
for (i=0;i<NB_BANDS;i++) printf("%f ", Ln[i]);
653-
printf("%f\n", vad);
654-
#endif
655642
#if 1
656643
fwrite(features, sizeof(float), NB_FEATURES, stdout);
657644
fwrite(g, sizeof(float), NB_BANDS, stdout);
658645
fwrite(Ln, sizeof(float), NB_BANDS, stdout);
659646
fwrite(&vad, sizeof(float), 1, stdout);
660-
#endif
661-
#if 0
662-
compute_rnn(&noisy->rnn, g, &vad_prob, features);
663-
interp_band_gain(gf, g);
664-
#if 1
665-
for (i=0;i<FREQ_SIZE;i++) {
666-
X[i].r *= gf[i];
667-
X[i].i *= gf[i];
668-
}
669-
#endif
670-
frame_synthesis(noisy, xn, X);
671-
672-
for (i=0;i<FRAME_SIZE;i++) tmp[i] = xn[i];
673-
fwrite(tmp, sizeof(short), FRAME_SIZE, fout);
674647
#endif
675648
}
676649
fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1);
677650
fclose(f1);
678651
fclose(f2);
679-
fclose(fout);
680652
return 0;
681653
}
682654

0 commit comments

Comments
 (0)