728x90
반응형
SMALL
librosa specshow
import librosa as li
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as sig
# load sample audio
file = li.ex('trumpet')
aud, sr = li.load(file, sr=None)
n_ftt = 512
rsr = 11025
# apply low pass filter before downsampling. Attenuate at resample rate divided by 2.
cutoff = rsr / 2
sos = sig.butter(10, cutoff, fs=sr, btype='lowpass', analog=False, output='sos')
aud = sig.sosfilt(sos, aud)
# downsample and update sample rate value
aud = librosa.resample(y=aud, orig_sr=sr, target_sr=rsr)
sr = rsr
# create both standard and mel spectrograms:
spec = li.stft(aud, n_fft=512, window=sig.windows.hamming)
spec = np.abs(spec)
mel_spec = li.feature.melspectrogram(S=spec, sr=11025)
# plot standard spectrogram and mel-scale spectrogram side by side:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))
specs = (li.amplitude_to_db(sp) for sp in (spec, mel_spec))
scales = ('hz', 'mel')
for i, (sp, sc) in enumerate(zip(specs, scales)):
li.display.specshow(sp, x_axis='time', y_axis=sc, sr=sr, ax=axes[i])
plt.tight_layout()
plt.show()
HTK
import librosa as li
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as sig
# load sample audio
file = li.example('trumpet')
aud, sr = li.load(file, sr=None)
n_ftt = 512
rsr = 11025
# apply low pass filter before downsampling. Attenuate at resample rate divided by 2.
cutoff = rsr / 2
sos = sig.butter(10, cutoff, fs=sr, btype='lowpass', analog=False, output='sos')
aud = sig.sosfilt(sos, aud)
# downsample and update sample rate value
aud = librosa.resample(y=aud, orig_sr=sr, target_sr=rsr)
sr = rsr
# create both standard and mel spectrograms:
spec = li.stft(aud, n_fft=512, window=sig.windows.hamming)
spec = np.abs(spec)
mel_spec = li.feature.melspectrogram(S=spec, sr=11025, htk=True)
# plot standard spectrogram and mel-scale spectrogram side by side:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))
specs = (li.amplitude_to_db(sp) for sp in (spec, mel_spec))
scales = ('hz', 'mel')
for i, (sp, sc) in enumerate(zip(specs, scales)):
li.display.specshow(sp, x_axis='time', y_axis=sc, sr=sr, ax=axes[i])
yticks_mel = np.arange(0, 41, 5)
yticks_hz = librosa.mel_to_hz(yticks_mel)
plt.gca().set_yticks([], minor=True)
plt.gca().set_yticks(yticks_hz, minor=False)
plt.gca().set_yticklabels(yticks_mel)
plt.gca().set_ylabel("Mel (HTK)")
plt.tight_layout()
plt.show()
Slaney
import librosa as li
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as sig
# load sample audio
file = li.example('trumpet')
aud, sr = li.load(file, sr=None)
n_ftt = 512
rsr = 11025
# apply low pass filter before downsampling. Attenuate at resample rate divided by 2.
cutoff = rsr / 2
sos = sig.butter(10, cutoff, fs=sr, btype='lowpass', analog=False, output='sos')
aud = sig.sosfilt(sos, aud)
# downsample and update sample rate value
aud = librosa.resample(y=aud, orig_sr=sr, target_sr=rsr)
sr = rsr
# create both standard and mel spectrograms:
spec = li.stft(aud, n_fft=512, window=sig.windows.hamming)
spec = np.abs(spec)
mel_spec = li.feature.melspectrogram(S=spec, sr=11025, htk=False)
# plot standard spectrogram and mel-scale spectrogram side by side:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))
specs = (li.amplitude_to_db(sp) for sp in (spec, mel_spec))
scales = ('hz', 'mel')
for i, (sp, sc) in enumerate(zip(specs, scales)):
li.display.specshow(sp, x_axis='time', y_axis=sc, sr=sr, ax=axes[i])
yticks_mel = np.arange(0, 41, 5)
yticks_hz = librosa.mel_to_hz(yticks_mel)
plt.gca().set_yticks([], minor=True)
plt.gca().set_yticks(yticks_hz, minor=False)
plt.gca().set_yticklabels(yticks_mel)
plt.gca().set_ylabel("Mel (Slaney)")
plt.tight_layout()
plt.show()
https://librosa.org/doc/0.10.2/generated/librosa.display.specshow.html
728x90
반응형
LIST
'Linguistic Intelligence > Audio Processing' 카테고리의 다른 글
캡스트럼 (Cepstrum) (0) | 2024.03.20 |
---|---|
오디오 데이터 처리 (2) | 2024.03.06 |
소리 및 파형 (0) | 2024.03.06 |
[Audio Processing] 시스템 구조 (Systems structures) (0) | 2023.06.15 |
[Audio Processing] 말 (Speech) (0) | 2023.03.09 |