1
- /* Copyright (c) 2017 Mozilla */
1
+ /* Copyright (c) 2018 Gregor Richards
2
+ * Copyright (c) 2017 Mozilla */
2
3
/*
3
4
Redistribution and use in source and binary forms, with or without
4
5
modification, are permitted provided that the following conditions
55
56
56
57
#define SQUARE (x ) ((x)*(x))
57
58
58
- #define SMOOTH_BANDS 1
59
-
60
- #if SMOOTH_BANDS
61
59
#define NB_BANDS 22
62
- #else
63
- #define NB_BANDS 21
64
- #endif
65
60
66
61
#define CEPS_MEM 8
67
62
#define NB_DELTA_CEPS 6
73
68
#define TRAINING 0
74
69
#endif
75
70
71
+
72
+ /* The built-in model, used if no file is given as input */
73
+ extern const struct RNNModel rnnoise_model_orig ;
74
+
75
+
76
76
static const opus_int16 eband5ms [] = {
77
77
/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 20k*/
78
78
0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 10 , 12 , 14 , 16 , 20 , 24 , 28 , 34 , 40 , 48 , 60 , 78 , 100
@@ -100,7 +100,6 @@ struct DenoiseState {
100
100
RNNState rnn ;
101
101
};
102
102
103
- #if SMOOTH_BANDS
104
103
void compute_band_energy (float * bandE , const kiss_fft_cpx * X ) {
105
104
int i ;
106
105
float sum [NB_BANDS ] = {0 };
@@ -165,32 +164,6 @@ void interp_band_gain(float *g, const float *bandE) {
165
164
}
166
165
}
167
166
}
168
- #else
169
- void compute_band_energy (float * bandE , const kiss_fft_cpx * X ) {
170
- int i ;
171
- for (i = 0 ;i < NB_BANDS ;i ++ )
172
- {
173
- int j ;
174
- opus_val32 sum = 0 ;
175
- for (j = 0 ;j < (eband5ms [i + 1 ]- eband5ms [i ])<<FRAME_SIZE_SHIFT ;j ++ ) {
176
- sum += SQUARE (X [(eband5ms [i ]<<FRAME_SIZE_SHIFT ) + j ].r );
177
- sum += SQUARE (X [(eband5ms [i ]<<FRAME_SIZE_SHIFT ) + j ].i );
178
- }
179
- bandE [i ] = sum ;
180
- }
181
- }
182
-
183
- void interp_band_gain (float * g , const float * bandE ) {
184
- int i ;
185
- memset (g , 0 , FREQ_SIZE );
186
- for (i = 0 ;i < NB_BANDS ;i ++ )
187
- {
188
- int j ;
189
- for (j = 0 ;j < (eband5ms [i + 1 ]- eband5ms [i ])<<FRAME_SIZE_SHIFT ;j ++ )
190
- g [(eband5ms [i ]<<FRAME_SIZE_SHIFT ) + j ] = bandE [i ];
191
- }
192
- }
193
- #endif
194
167
195
168
196
169
CommonState common ;
@@ -287,19 +260,41 @@ int rnnoise_get_size() {
287
260
return sizeof (DenoiseState );
288
261
}
289
262
290
- int rnnoise_init (DenoiseState * st ) {
263
+ int rnnoise_get_frame_size () {
264
+ return FRAME_SIZE ;
265
+ }
266
+
267
+ int rnnoise_init (DenoiseState * st , RNNModel * model ) {
291
268
memset (st , 0 , sizeof (* st ));
269
+ if (model )
270
+ st -> rnn .model = model ;
271
+ else
272
+ st -> rnn .model = & rnnoise_model_orig ;
273
+ st -> rnn .vad_gru_state = calloc (sizeof (float ), st -> rnn .model -> vad_gru_size );
274
+ st -> rnn .noise_gru_state = calloc (sizeof (float ), st -> rnn .model -> noise_gru_size );
275
+ st -> rnn .denoise_gru_state = calloc (sizeof (float ), st -> rnn .model -> denoise_gru_size );
276
+ st -> rnn .compute_gru_fct = & compute_gru ;
277
+
278
+ #if defined(__AVX2__ )
279
+ if (is_avx2_supported () == 1 ) {
280
+ st -> rnn .compute_gru_fct = & compute_gru_avx2 ;
281
+ }
282
+ #endif
283
+
292
284
return 0 ;
293
285
}
294
286
295
- DenoiseState * rnnoise_create () {
287
+ DenoiseState * rnnoise_create (RNNModel * model ) {
296
288
DenoiseState * st ;
297
289
st = malloc (rnnoise_get_size ());
298
- rnnoise_init (st );
290
+ rnnoise_init (st , model );
299
291
return st ;
300
292
}
301
293
302
294
void rnnoise_destroy (DenoiseState * st ) {
295
+ free (st -> rnn .vad_gru_state );
296
+ free (st -> rnn .noise_gru_state );
297
+ free (st -> rnn .denoise_gru_state );
303
298
free (st );
304
299
}
305
300
@@ -424,13 +419,11 @@ static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y)
424
419
}
425
420
426
421
static void biquad (float * y , float mem [2 ], const float * x , const float * b , const float * a , int N ) {
427
- int i ;
428
- for (i = 0 ;i < N ;i ++ ) {
429
- float xi , yi ;
430
- xi = x [i ];
431
- yi = x [i ] + mem [0 ];
432
- mem [0 ] = mem [1 ] + (b [0 ]* (double )xi - a [0 ]* (double )yi );
433
- mem [1 ] = (b [1 ]* (double )xi - a [1 ]* (double )yi );
422
+ for (int i = 0 ;i < N ;i ++ ) {
423
+ float xi = x [i ];
424
+ float yi = xi + mem [0 ];
425
+ mem [0 ] = mem [1 ] + (b [0 ] * xi - a [0 ] * yi );
426
+ mem [1 ] = (b [1 ] * xi - a [1 ] * yi );
434
427
y [i ] = yi ;
435
428
}
436
429
}
@@ -541,20 +534,21 @@ int main(int argc, char **argv) {
541
534
int vad_cnt = 0 ;
542
535
int gain_change_count = 0 ;
543
536
float speech_gain = 1 , noise_gain = 1 ;
544
- FILE * f1 , * f2 , * fout ;
537
+ FILE * f1 , * f2 ;
538
+ int maxCount ;
545
539
DenoiseState * st ;
546
540
DenoiseState * noise_state ;
547
541
DenoiseState * noisy ;
548
- st = rnnoise_create ();
549
- noise_state = rnnoise_create ();
550
- noisy = rnnoise_create ();
542
+ st = rnnoise_create (NULL );
543
+ noise_state = rnnoise_create (NULL );
544
+ noisy = rnnoise_create (NULL );
551
545
if (argc != 4 ) {
552
- fprintf (stderr , "usage: %s <speech> <noise> <output denoised >\n" , argv [0 ]);
546
+ fprintf (stderr , "usage: %s <speech> <noise> <count >\n" , argv [0 ]);
553
547
return 1 ;
554
548
}
555
549
f1 = fopen (argv [1 ], "r" );
556
550
f2 = fopen (argv [2 ], "r" );
557
- fout = fopen (argv [3 ], "w" );
551
+ maxCount = atoi (argv [3 ]);
558
552
for (i = 0 ;i < 150 ;i ++ ) {
559
553
short tmp [FRAME_SIZE ];
560
554
fread (tmp , sizeof (short ), FRAME_SIZE , f2 );
@@ -566,12 +560,11 @@ int main(int argc, char **argv) {
566
560
float Ln [NB_BANDS ];
567
561
float features [NB_FEATURES ];
568
562
float g [NB_BANDS ];
569
- float gf [FREQ_SIZE ]= {1 };
570
563
short tmp [FRAME_SIZE ];
571
564
float vad = 0 ;
572
- float vad_prob ;
573
565
float E = 0 ;
574
- if (count == 50000000 ) break ;
566
+ if (count == maxCount ) break ;
567
+ if ((count %1000 )== 0 ) fprintf (stderr , "%d\r" , count );
575
568
if (++ gain_change_count > 2821 ) {
576
569
speech_gain = pow (10. , (-40 + (rand ()%60 ))/20. );
577
570
noise_gain = pow (10. , (-30 + (rand ()%50 ))/20. );
@@ -646,37 +639,16 @@ int main(int argc, char **argv) {
646
639
if (vad == 0 && noise_gain == 0 ) g [i ] = -1 ;
647
640
}
648
641
count ++ ;
649
- #if 0
650
- for (i = 0 ;i < NB_FEATURES ;i ++ ) printf ("%f " , features [i ]);
651
- for (i = 0 ;i < NB_BANDS ;i ++ ) printf ("%f " , g [i ]);
652
- for (i = 0 ;i < NB_BANDS ;i ++ ) printf ("%f " , Ln [i ]);
653
- printf ("%f\n" , vad );
654
- #endif
655
642
#if 1
656
643
fwrite (features , sizeof (float ), NB_FEATURES , stdout );
657
644
fwrite (g , sizeof (float ), NB_BANDS , stdout );
658
645
fwrite (Ln , sizeof (float ), NB_BANDS , stdout );
659
646
fwrite (& vad , sizeof (float ), 1 , stdout );
660
- #endif
661
- #if 0
662
- compute_rnn (& noisy -> rnn , g , & vad_prob , features );
663
- interp_band_gain (gf , g );
664
- #if 1
665
- for (i = 0 ;i < FREQ_SIZE ;i ++ ) {
666
- X [i ].r *= gf [i ];
667
- X [i ].i *= gf [i ];
668
- }
669
- #endif
670
- frame_synthesis (noisy , xn , X );
671
-
672
- for (i = 0 ;i < FRAME_SIZE ;i ++ ) tmp [i ] = xn [i ];
673
- fwrite (tmp , sizeof (short ), FRAME_SIZE , fout );
674
647
#endif
675
648
}
676
649
fprintf (stderr , "matrix size: %d x %d\n" , count , NB_FEATURES + 2 * NB_BANDS + 1 );
677
650
fclose (f1 );
678
651
fclose (f2 );
679
- fclose (fout );
680
652
return 0 ;
681
653
}
682
654
0 commit comments