[Audio Processing] librosa specshow

728x90

SMALL

librosa specshow

import librosa as li
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as sig

# load sample audio
file = li.ex('trumpet')
aud, sr = li.load(file, sr=None)
n_ftt = 512
rsr = 11025

# apply low pass filter before downsampling. Attenuate at resample rate divided by 2.
cutoff = rsr / 2
sos = sig.butter(10, cutoff, fs=sr, btype='lowpass', analog=False, output='sos')
aud = sig.sosfilt(sos, aud)

# downsample and update sample rate value
aud = librosa.resample(y=aud, orig_sr=sr, target_sr=rsr)
sr = rsr

# create both standard and mel spectrograms:
spec = li.stft(aud, n_fft=512, window=sig.windows.hamming)
spec = np.abs(spec)
mel_spec = li.feature.melspectrogram(S=spec, sr=11025)

# plot standard spectrogram and mel-scale spectrogram side by side:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))
specs = (li.amplitude_to_db(sp) for sp in (spec, mel_spec))
scales = ('hz', 'mel')

for i, (sp, sc) in enumerate(zip(specs, scales)):
        li.display.specshow(sp, x_axis='time', y_axis=sc, sr=sr, ax=axes[i])

plt.tight_layout()
plt.show()

HTK

import librosa as li
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as sig

# load sample audio
file = li.example('trumpet')
aud, sr = li.load(file, sr=None)
n_ftt = 512
rsr = 11025

# apply low pass filter before downsampling. Attenuate at resample rate divided by 2.
cutoff = rsr / 2
sos = sig.butter(10, cutoff, fs=sr, btype='lowpass', analog=False, output='sos')
aud = sig.sosfilt(sos, aud)

# downsample and update sample rate value
aud = librosa.resample(y=aud, orig_sr=sr, target_sr=rsr)
sr = rsr

# create both standard and mel spectrograms:
spec = li.stft(aud, n_fft=512, window=sig.windows.hamming)
spec = np.abs(spec)
mel_spec = li.feature.melspectrogram(S=spec, sr=11025, htk=True)

# plot standard spectrogram and mel-scale spectrogram side by side:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))
specs = (li.amplitude_to_db(sp) for sp in (spec, mel_spec))
scales = ('hz', 'mel')

for i, (sp, sc) in enumerate(zip(specs, scales)):
    li.display.specshow(sp, x_axis='time', y_axis=sc, sr=sr, ax=axes[i])

yticks_mel = np.arange(0, 41, 5)
yticks_hz = librosa.mel_to_hz(yticks_mel)
plt.gca().set_yticks([], minor=True)
plt.gca().set_yticks(yticks_hz, minor=False)
plt.gca().set_yticklabels(yticks_mel)
plt.gca().set_ylabel("Mel (HTK)")

plt.tight_layout()
plt.show()

Slaney

import librosa as li
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as sig

# load sample audio
file = li.example('trumpet')
aud, sr = li.load(file, sr=None)
n_ftt = 512
rsr = 11025

# apply low pass filter before downsampling. Attenuate at resample rate divided by 2.
cutoff = rsr / 2
sos = sig.butter(10, cutoff, fs=sr, btype='lowpass', analog=False, output='sos')
aud = sig.sosfilt(sos, aud)

# downsample and update sample rate value
aud = librosa.resample(y=aud, orig_sr=sr, target_sr=rsr)
sr = rsr

# create both standard and mel spectrograms:
spec = li.stft(aud, n_fft=512, window=sig.windows.hamming)
spec = np.abs(spec)
mel_spec = li.feature.melspectrogram(S=spec, sr=11025, htk=False)

# plot standard spectrogram and mel-scale spectrogram side by side:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))
specs = (li.amplitude_to_db(sp) for sp in (spec, mel_spec))
scales = ('hz', 'mel')

for i, (sp, sc) in enumerate(zip(specs, scales)):
    li.display.specshow(sp, x_axis='time', y_axis=sc, sr=sr, ax=axes[i])

yticks_mel = np.arange(0, 41, 5)
yticks_hz = librosa.mel_to_hz(yticks_mel)
plt.gca().set_yticks([], minor=True)
plt.gca().set_yticks(yticks_hz, minor=False)
plt.gca().set_yticklabels(yticks_mel)
plt.gca().set_ylabel("Mel (Slaney)")

plt.tight_layout()
plt.show()

https://librosa.org/doc/0.10.2/generated/librosa.display.specshow.html

librosa.display.specshow — librosa 0.10.2 documentation

librosa.org

728x90

LIST

저작자표시 비영리 변경금지 (새창열림)

'Linguistic Intelligence > Audio Processing' 카테고리의 다른 글

캡스트럼 (Cepstrum) (0)	2024.03.20
오디오 데이터 처리 (2)	2024.03.06
소리 및 파형 (0)	2024.03.06
[Audio Processing] 시스템 구조 (Systems structures) (0)	2023.06.15
[Audio Processing] 말 (Speech) (0)	2023.03.09

GOATLAB

[Audio Processing] librosa specshow

librosa specshow

HTK

Slaney

'Linguistic Intelligence > Audio Processing' 카테고리의 다른 글

티스토리툴바

[Audio Processing] librosa specshow

librosa specshow

HTK

Slaney

'Linguistic Intelligence > Audio Processing' 카테고리의 다른 글

관련글

티스토리툴바