-
-
Notifications
You must be signed in to change notification settings - Fork 7
/
main.py
58 lines (46 loc) · 1.85 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import librosa
from librosa import display
import matplotlib.pyplot as plt
import numpy as np
from functools import partial
import torch
from torch_specinv import *
if __name__ == '__main__':
nfft = 1024
winsize = 1024
hopsize = 128
y, sr = librosa.load(librosa.util.example_audio_file(), duration=30)
# librosa.output.write_wav('origin.wav', y, sr)
y = torch.Tensor(y).cuda()
window = torch.hann_window(winsize).cuda()
def spectrogram(x, *args, p=1, **kwargs):
return torch.stft(x, *args, **kwargs).pow(2).sum(2).add_(1e-7).pow(p / 2)
arg_dict = {
'win_length': winsize,
'window': window,
'hop_length': hopsize,
'pad_mode': 'reflect',
'onesided': True,
'normalized': False,
'center': True
}
#spec = spectrogram(y, nfft, **arg_dict)
func = partial(spectrogram, n_fft=nfft, **arg_dict)
spec = func(y)
# mag = spec.pow(0.5).cpu().numpy()
# phase = np.random.uniform(-np.pi, np.pi, mag.shape)
# _, init_x = istft(mag * np.exp(1j * phase), noverlap=1024 - 256)
estimated = L_BFGS(spec, func, [len(y)], max_iter=50, lr=1, history_size=10, eva_iter=5)
#estimated = griffin_lim(spec, max_iter=100, alpha=0.3, **arg_dict)
#estimated = ADMM(spec, max_iter=100, rho=0.2, **arg_dict)
# arg_dict['hop_length'] = 333
# estimated = RTISI_LA(spec, maxiter=4, look_ahead=3, asymmetric_window=True, **arg_dict)
#estimated = SPSI(spec, **arg_dict)
# arg_dict.pop('window')
# estimated = PGHI(spec, **arg_dict)
estimated_spec = func(estimated)
#estimated_spec = estimated.pow(2).sum(2).sqrt()
display.specshow(librosa.amplitude_to_db(estimated_spec.cpu().numpy(), ref=np.max), y_axis='log')
plt.show()
#print(spectral_convergence(estimated_spec, spec))
#librosa.output.write_wav('test.wav', estimated.cpu().numpy(), sr)