-
Notifications
You must be signed in to change notification settings - Fork 1
/
audio.go
62 lines (51 loc) · 1.44 KB
/
audio.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
package cubewhisper
import (
"math/rand"
"time"
"github.com/unixpickle/num-analysis/linalg"
"github.com/unixpickle/speechrecog/mfcc"
"github.com/unixpickle/wav"
)
const (
audioWindowTime = time.Millisecond * 10
audioWindowOverlap = time.Millisecond * 5
noiseAmount = 1e-5
)
// ReadAudioFile reads an audio file and converts it
// into a sequence of MFCC vectors.
func ReadAudioFile(file string) ([]linalg.Vector, error) {
wavFile, err := wav.ReadSoundFile(file)
if err != nil {
return nil, err
}
var audioData []float64
for i, x := range wavFile.Samples() {
if i%wavFile.Channels() == 0 {
audioData = append(audioData, float64(x))
}
}
return SeqForAudioSamples(audioData, wavFile.SampleRate()), nil
}
// SeqForAudioSamples turns a raw buffer of mono audio
// samples into an input sequence.
func SeqForAudioSamples(slice []float64, sampleRate int) []linalg.Vector {
audioData := make([]float64, len(slice))
for i, x := range slice {
// Add random noise to avoid zero-power chunks
// of signal which cause -Infs in the MFCCs.
audioData[i] = x + rand.NormFloat64()*noiseAmount
}
mfccSource := mfcc.MFCC(&mfcc.SliceSource{Slice: audioData}, sampleRate,
&mfcc.Options{Window: audioWindowTime, Overlap: audioWindowOverlap})
mfccSource = mfcc.AddVelocities(mfccSource)
var coeffs []linalg.Vector
for {
c, err := mfccSource.NextCoeffs()
if err == nil {
coeffs = append(coeffs, c)
} else {
break
}
}
return coeffs
}