-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextraction_library.py
37 lines (26 loc) · 1.43 KB
/
extraction_library.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import librosa
import numpy as np
def process_audio(video):
audio_array = list(video.audio.iter_frames())
# Convert the list of samples to a NumPy array
sound_array = np.array(audio_array)
audio_data = sound_array[:, 0] # Extracting only one channel if it's stereo
# Get the sampling rate of the audio
sr = sound_array.shape[0] / video.duration
# Compute the Short-Time Fourier Transform (STFT) to get the frequency content
D = np.abs(librosa.stft(audio_data))
# Convert the frequency content to a logarithmic scale (in decibels)
DB = librosa.amplitude_to_db(D, ref=np.max)
# Define the frequency range
freqs = librosa.core.fft_frequencies(sr=sr)
return sound_array, audio_data, sr, D, DB, freqs
def extract_frequency_magnitude(DB, freqs, sr, target_freq):
target_freq_index = np.argmin(np.abs(freqs - target_freq))
magnitude_at_target = DB[target_freq_index]
print(f"mean magnitude at {target_freq}hz: {np.mean(magnitude_at_target):.2f}")
hop_length = 512
time_bin_length = sr / hop_length
num_time_bins_per_10s = int(10 * time_bin_length)
average_magnitudes_per_10s = [np.mean(DB[target_freq_index, i:i + num_time_bins_per_10s]) for i in range(0, DB.shape[1], num_time_bins_per_10s)]
average_magnitudes_formatted = [f"{x:.2f}" for x in average_magnitudes_per_10s]
print(f"mean magnitude at {target_freq}hz for every 10s chunk: {average_magnitudes_formatted}")