-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharctic.py
113 lines (98 loc) · 3.95 KB
/
arctic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
"""Most code is copy-pasted or adapted from sflinear/model.py"""
import numpy as np
import matplotlib.pyplot as plt
from IPython.core.pylabtools import figsize, getfigs
from scipy.io import wavfile
import scipy.signal
import parse
def parse_marks(marks, print_comments=True):
try:
with open(marks, 'r') as f:
marks_content = f.read()
marks = marks_content # Parse downstream
except:
pass
res = []
for line in marks.splitlines():
if line[0] != '#':
res.append(float(line))
else:
if print_comments: print(line)
return np.asarray(res)
def validate_marks(marks):
if not (len(marks) > 1 and np.all(np.diff(marks) > 0.)):
raise ValueError('Invalid marks')
def resample_data(fs, data, fs_new):
N = data.shape[0]
N_new = int(N*fs_new/fs)
new_data = scipy.signal.resample(data, N_new, axis=0)
return fs_new, new_data
def marks_to_array_indices(fs, d, marks, d_start_time=0.):
praat_t = d_start_time + np.arange(len(d)) / fs
indices = np.argmin(np.abs(praat_t[:,None] - marks[None,:]),axis=0)
return indices
def ensure_stereo(data):
try:
x, y = data.T
return x, y
except ValueError as e1:
try:
x = data
y = np.zeros(len(data))
return x, y
except Exception as e2:
del e2
raise ValueError('Cannot coerce to stereo') from e1
def extract_from_data(fs, data, begin_marker, end_marker):
d_full, egg_full = ensure_stereo(data)
begin_end = np.array([begin_marker, end_marker])
split_indices = marks_to_array_indices(fs, d_full, begin_end)
d = np.split(d_full, split_indices)[1]
egg = np.split(egg_full, split_indices)[1]
return d, egg
def extract_pitch_periods_from_data(fs, data, marks):
d_full, egg_full = ensure_stereo(data)
split_indices = marks_to_array_indices(fs, d_full, marks)
ds = np.split(d_full, split_indices)[1:-1]
eggs = np.split(egg_full, split_indices)[1:-1]
return ds, eggs
def rescale_to_normalize_ds(ds, eggs):
factor = 1./np.max([np.abs(d).max() for d in ds])
return [d*factor for d in ds], [egg*factor for egg in eggs]
def apply_polarity(ds, eggs, polarity):
def multiply(iter, c):
return [c*x for x in iter]
try:
ds = multiply(ds, polarity[0])
eggs = multiply(eggs, polarity[1])
except TypeError:
ds = multiply(ds, polarity)
eggs = multiply(eggs, polarity)
return ds, eggs
def load_arctic_file(path, marks, resample=False, polarity=+1, print_mark_comments=True):
"""
Args:
path (str): Path to wav file with speech signal in ch 1 and EGG in ch 2.
marks (str): If str, the marks are given in seconds. Each line must contain
a mark, unless if it starts with '#'. If a path to a text file,
the contents of this file will be used as the string.
polarity (float or 2-tuple): If tuple, polarity = (d_polarity, egg_polarity).
Otherwise the same polarity is applie to both.
print_mark_comments (bool): Print comments in the `marks` argument.
Returns:
fs0: Original sampling rate
fs
ts (list of arrays): List of indices. These are dimensionless units with an implied
scaling factor (T_0 = 1/fs) being the sampling interval.
ds_float64 (list of arrays)
eggs_float64 (list of arrays)
"""
marks = parse_marks(marks, print_mark_comments)
validate_marks(marks)
fs0, data0 = wavfile.read(path)
fs, data = resample_data(fs0, data0, int(resample)) if resample else (fs0, data0)
ds, eggs = extract_pitch_periods_from_data(fs, data, marks)
ds_float64, eggs_float64 = rescale_to_normalize_ds(ds, eggs)
ds_float64, eggs_float64 = apply_polarity(ds_float64, eggs_float64, polarity)
ts = [np.arange(len(d)) for d in ds] # Dimensionless units; scaling factor is the
return fs0, fs, ts, ds_float64, eggs_float64