Plot audio spectrogram inside tkinter GUI

I am trying to display the spectrogram of a selected segment of audio waveform representation. I can display the waveform of audio inside tkinter GUI but can not display the spectrogram. Actually I have no idea to include the spectrogram function inside the defined canvas on tkinter. I would be thankful if anyone can help me. Thanks.

from __future__ import print_function, absolute_import
import numpy
import math

import scipy.fftpack
import scipy.signal
from numpy.lib.stride_tricks import as_strided

import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.widgets import SpanSelector

import tkinter as tk
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure

class AudioPlayer(object):
    def __init__(self, signal, sampling_rate):
        self.signal = signal
        self.sampling_rate = sampling_rate

        if len(self.signal.shape) == 1:
            self.channels = 1
        else:
            self.channels = self.signal.shape[1]

    @property
    def fs(self):
        return self.sampling_rate

    @property
    def duration_samples(self):
        return self.signal.shape[0]


class EventListVisualizer(object):

    def __init__(self,master, **kwargs):

        self.master = master
        self.master.title("A simple GUI")

        if kwargs.get('audio_signal') is not None and kwargs.get('sampling_rate') is not None:
            audio_signal = kwargs.get('audio_signal') / numpy.max(numpy.abs(kwargs.get('audio_signal')))
            self.audio = AudioPlayer(signal=audio_signal,sampling_rate=kwargs.get('sampling_rate'))

        self.mode = 'spectrogram'
        #self.mode = 'time_domain'

        self.spec_hop_size = kwargs.get('spec_hop_size', 256)
        self.spec_win_size = kwargs.get('spec_win_size', 1024)
        self.spec_fft_size = kwargs.get('spec_fft_size', 1024)
        self.spec_cmap = kwargs.get('spec_cmap', 'magma')
        self.spec_interpolation =  kwargs.get('spec_interpolation', 'nearest')

        self.color = kwargs.get('color', '#339933')

        self.D = None
        self.x = None
        self.timedomain_locations = None

        self.begin_time = None
        self.end_time = None

        self.slider_time = None

        self.use_blit = kwargs.get('use_blit', False)

        self.waveform_selector_point_hop = kwargs.get('waveform_selector_point_hop', 1000)
        self.waveform_highlight_point_hop = 100
        self.waveform_highlight_color = self.color

        self.fig_shape = (14, 2)

        self._quit = False

        self.label_colormap = cm.get_cmap(name=kwargs.get('event_roll_cmap','rainbow'))


    def generate_GUI(self):
        #self.fig = plt.figure(figsize=self.fig_shape)
        self.fig1 = Figure(figsize=self.fig_shape, dpi=100)
        self.ax1 = self.fig1.add_subplot(111)
        self.ax1.grid(True)

        #Waveform display pannel
        # ====================================
        self.timedomain_locations = numpy.arange(0, self.audio.signal.shape[0])

        self.ax1.fill_between(
            self.timedomain_locations[::self.waveform_selector_point_hop],
            self.audio.signal[::self.waveform_selector_point_hop],
            -self.audio.signal[::self.waveform_selector_point_hop],
            color='0.5')

        # we create a frame in which we will pack the sound wave graph
        self.waveforms_frame = tk.Frame(self.master, relief=tk.RAISED, borderwidth=3)
        self.waveforms_frame.pack(fill=tk.X)

        title_label_1 = tk.Label(self.waveforms_frame, text="Wave Plot", font="Times 12 italic bold")
        title_label_1.pack()

        # we create a canvas to which we will convert the sound chart from MatPlotLib
        self.waveform_canvas = FigureCanvasTkAgg(self.fig1, master=self.waveforms_frame)
        self.waveform_canvas.draw()
        self.waveform_canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)

        # Highlight panel
        # ====================================
        self.fig2 = Figure(figsize=self.fig_shape, dpi=100)
        self.ax2 = self.fig2.add_subplot(111)
        self.ax2.grid(True)
        self.ax2.axhline() #Plot a line ine middle

        self.x = numpy.arange(0, self.audio.duration_samples)

        self.begin_time = self.x[0] / float(self.audio.fs)
        self.end_time = self.x[-1] / float(self.audio.fs)

        #Spectrogram display pannel
        self.D = self.get_spectrogram(audio=self.audio.signal,n_fft=self.spec_fft_size,win_length=self.spec_win_size, hop_length=self.spec_hop_size)
        self.plot_spectrogram(data=self.D,sampling_rate=self.audio.fs,interpolation=self.spec_interpolation,cmap=self.spec_cmap)

        # we create a frame in which we will pack the spectogram graph
        self.spectrums_frame = tk.Frame(self.master, relief=tk.RAISED, borderwidth=3)
        self.spectrums_frame.pack(fill=tk.X)

        title_label_2 = tk.Label(self.spectrums_frame, text="Spectrogram Plot", font="Times 12 italic bold")
        title_label_2.pack()

        # we create a canvas to which we will convert the spectogram graph from MatPlotLib
        self.spectogram_canvas = FigureCanvasTkAgg(self.fig2, master=self.spectrums_frame)
        self.spectogram_canvas.draw()
        self.spectogram_canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=tk.TRUE)


        #It select the area to display below
        self.slider_time = SpanSelector(ax=self.ax1,onselect=self.on_select,minspan=None,direction='horizontal',
            span_stays=True,useblit=self.use_blit,onmove_callback=None,rectprops=dict(alpha=0.15, facecolor=self.color))


    def on_select(self, x_min, x_max):
        x_min = int(x_min)
        x_max = int(x_max)
        if math.fabs(x_min-x_max) < 10:
            # Reset highlight
            self.begin_time = self.x[0] / float(self.audio.fs)
            self.end_time = self.x[-1] / float(self.audio.fs)

            # Set signal highlight panel
            if self.mode == 'spectrogram':
                self.ax2.set_xlim(0, self.D.shape[1])
            elif self.mode == 'time_domain':
                self.ax2.set_xlim(self.timedomain_locations[0], self.timedomain_locations[-1])

            self.slider_time.stay_rect.set_visible(False)

        else:
            # Set annotation panel
            self.begin_time = float(x_min) / self.audio.fs
            self.end_time = float(x_max) / self.audio.fs

            # Set signal highlight panel
            if self.mode == 'spectrogram':
                spec_min = int(x_min / float(self.spec_hop_size))
                spec_max = int(x_max / float(self.spec_hop_size))

                self.ax2.set_xlim(spec_min, spec_max)

            elif self.mode == 'time_domain':
                index_min, index_max = numpy.searchsorted(self.x, (x_min, x_max))
                index_max = min(len(self.x) - 1, index_max)
                this_x = self.timedomain_locations[index_min:index_max]
                self.ax2.set_xlim(this_x[0], this_x[-1])

            self.slider_time.stay_rect.set_visible(True)

        #self.fig.canvas.draw()
        self.spectogram_canvas.draw_idle() 

    @staticmethod
    def get_spectrogram(audio, n_fft=256, win_length=1024, hop_length=1024):
        fft_window = scipy.signal.hann(win_length, sym=False).reshape((-1, 1))

        audio = numpy.pad(array=audio,
                          pad_width=int(n_fft // 2),
                          mode='reflect')

        n_frames = 1 + int((len(audio) - n_fft) / hop_length)
        y_frames = as_strided(x=audio,
                              shape=(n_fft, n_frames),
                              strides=(audio.itemsize, int(hop_length * audio.itemsize)))

        S = numpy.empty((int(1 + n_fft // 2), y_frames.shape[1]), dtype=numpy.complex64, order='F')

        max_memory_block = 2**8 * 2**10
        n_columns = int(max_memory_block / (S.shape[0] * S.itemsize))

        for bl_s in range(0, S.shape[1], n_columns):
            bl_t = min(bl_s + n_columns, S.shape[1])

            # RFFT and Conjugate here to match phase from DPWE code
            S[:, bl_s:bl_t] = scipy.fftpack.fft(fft_window * y_frames[:, bl_s:bl_t], axis=0)[:S.shape[0]].conj()

        magnitude = numpy.abs(S) ** 2

        ref = numpy.max(magnitude)
        amin=1e-10
        top_db = 80.0

        log_spec = 10.0 * numpy.log10(numpy.maximum(amin, magnitude))
        log_spec -= 10.0 * numpy.log10(numpy.maximum(amin, ref))

        log_spec = numpy.maximum(log_spec, log_spec.max() - top_db)

        return log_spec

    @staticmethod
    def plot_spectrogram(data, sampling_rate=44100, n_yticks=5, interpolation='nearest', cmap='magma'):
        axes = plt.imshow(data, aspect='auto', origin='lower', interpolation=interpolation, cmap=plt.get_cmap(cmap))

        # X axis
        plt.xticks([])

        # Y axis
        positions = numpy.linspace(0, data.shape[0]-1, n_yticks, endpoint=True).astype(int)
        values = numpy.linspace(0, 0.5 * sampling_rate, data.shape[0], endpoint=True).astype(int)

        t_log = (data.shape[0] * (1 - numpy.logspace(-numpy.log2(data.shape[0]), 0, data.shape[0], base=2, endpoint=True))[::-1]).astype(int)
        t_inv = numpy.arange(len(t_log))
        for i in range(len(t_log)-1):
            t_inv[t_log[i]:t_log[i+1]] = i

        plt.yticks(positions, values[t_inv[positions]])

        return axes


if __name__ == '__main__':  
    root = tk.Tk()

    #Input Processing
    import soundfile
    import numpy as np
    import librosa

    def read_audio(audio_path, target_fs=None):
        (audio, fs) = soundfile.read(audio_path)

        if audio.ndim > 1:
            audio = np.mean(audio, axis=1)

        if target_fs is not None and fs != target_fs:
            audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs)
            fs = target_fs

        return audio, fs

    audio, fs = read_audio('sample.wav')

    vis = EventListVisualizer(root, audio_signal=audio,sampling_rate=fs)
    vis.generate_GUI()

    root.mainloop()
1 Like

@YashNita We can’t really help you with this because its not reproducible for us. Please strip out everything you don’t need to reproduce your question and make sure the example is self-contained. For a guide as to what that means: http://www.sscce.org