-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathipa.py
353 lines (339 loc) · 11.5 KB
/
ipa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
###
#This file is a part of the NV Speech Player project.
#URL: https://bitbucket.org/nvaccess/speechplayer
#Copyright 2014 NV Access Limited.
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License version 2.0, as published by
#the Free Software Foundation.
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#This license can be found at:
#http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
###
import os
import itertools
import codecs
from . import speechPlayer
dataPath=os.path.join(os.path.dirname(__file__),'data.py')
data=eval(codecs.open(dataPath,'r','utf8').read(),None,None)
def iterPhonemes(**kwargs):
for k,v in data.items():
if all(v[x]==y for x,y in kwargs.items()):
yield k
def setFrame(frame,phoneme):
values=data[phoneme]
for k,v in values.items():
setattr(frame,k,v)
def applyPhonemeToFrame(frame,phoneme):
for k,v in phoneme.items():
if not k.startswith('_'):
setattr(frame,k,v)
def _IPAToPhonemesHelper(text):
textLen=len(text)
index=0
offset=0
curStress=0
for index in range(textLen):
index=index+offset
if index>=textLen:
break
char=text[index]
if char=='ˈ':
curStress=1
continue
elif char=='ˌ':
curStress=2
continue
isLengthened=(text[index+1:index+2]=='ː')
isTiedTo=(text[index+1:index+2]=='͡')
isTiedFrom=(text[index-1:index]=='͡') if index>0 else False
phoneme=None
if isTiedTo:
phoneme=data.get(text[index:index+3])
offset+=2 if phoneme else 1
elif isLengthened:
phoneme=data.get(text[index:index+2])
offset+=1
if not phoneme:
phoneme=data.get(char)
if not phoneme:
yield char,None
continue
phoneme=phoneme.copy()
if curStress:
phoneme['_stress']=curStress
curStress=0
if isTiedFrom:
phoneme['_tiedFrom']=True
elif isTiedTo:
phoneme['_tiedTo']=True
if isLengthened:
phoneme['_lengthened']=True
phoneme['_char']=char
yield char,phoneme
def IPAToPhonemes(ipaText):
phonemeList=[]
textLength=len(ipaText)
# Collect phoneme info for each IPA character, assigning diacritics (lengthened, stress) to the last real phoneme
newWord=True
lastPhoneme=None
syllableStartPhoneme=None
for char,phoneme in _IPAToPhonemesHelper(ipaText):
if char==' ':
newWord=True
elif phoneme:
stress=phoneme.pop('_stress',0)
if lastPhoneme and not lastPhoneme.get('_isVowel') and phoneme and phoneme.get('_isVowel'):
lastPhoneme['_syllableStart']=True
syllableStartPhoneme=lastPhoneme
elif stress==1 and lastPhoneme and lastPhoneme.get('_isVowel'):
phoneme['_syllableStart']=True
syllableStartPhoneme=phoneme
if lastPhoneme and lastPhoneme.get('_isStop') and not lastPhoneme.get('_isVoiced') and phoneme and phoneme.get('_isVoiced') and not phoneme.get('_isStop') and not phoneme.get('_isAfricate'):
psa=data['h'].copy()
psa['_postStopAspiration']=True
psa['_char']=None
phonemeList.append(psa)
lastPhoneme=psa
if newWord:
newWord=False
phoneme['_wordStart']=True
phoneme['_syllableStart']=True
syllableStartPhoneme=phoneme
if stress:
syllableStartPhoneme['_stress']=stress
elif phoneme.get('_isStop') or phoneme.get('_isAfricate'):
gap=dict(_silence=True,_preStopGap=True)
phonemeList.append(gap)
phonemeList.append(phoneme)
lastPhoneme=phoneme
return phonemeList
def correctHPhonemes(phonemeList):
finalPhonemeIndex=len(phonemeList)-1
# Correct all h phonemes (including inserted aspirations) so that their formants match the next phoneme, or the previous if there is no next
for index in range(len(phonemeList)):
prevPhoneme=phonemeList[index-1] if index>0 else None
curPhoneme=phonemeList[index]
nextPhoneme=phonemeList[index+1] if index<finalPhonemeIndex else None
if curPhoneme.get('_copyAdjacent'):
adjacent=nextPhoneme if nextPhoneme and not nextPhoneme.get('_silence') else prevPhoneme
if adjacent:
for k,v in adjacent.items():
if not k.startswith('_') and k not in curPhoneme:
curPhoneme[k]=v
def calculatePhonemeTimes(phonemeList,baseSpeed):
lastPhoneme=None
syllableStress=0
speed=baseSpeed
for index,phoneme in enumerate(phonemeList):
nextPhoneme=phonemeList[index+1] if len(phonemeList)>index+1 else None
syllableStart=phoneme.get('_syllableStart')
if syllableStart:
syllableStress=phoneme.get('_stress')
if syllableStress:
speed=baseSpeed/1.4 if syllableStress==1 else baseSpeed/1.1
else:
speed=baseSpeed
phonemeDuration=60.0/speed
phonemeFadeDuration=10.0/speed
if phoneme.get('_preStopGap'):
phonemeDuration=41.0/speed
elif phoneme.get('_postStopAspiration'):
phonemeDuration=20.0/speed
elif phoneme.get('_isStop'):
phonemeDuration=min(6.0/speed,6.0)
phonemeFadeDuration=0.001
elif phoneme.get('_isAfricate'):
phonemeDuration=24.0/speed
phonemeFadeDuration=0.001
elif not phoneme.get('_isVoiced'):
phonemeDuration=45.0/speed
else: # is voiced
if phoneme.get('_isVowel'):
if lastPhoneme and (lastPhoneme.get('_isLiquid') or lastPhoneme.get('_isSemivowel')):
phonemeFadeDuration=25.0/speed
if phoneme.get('_tiedTo'):
phonemeDuration=40.0/speed
elif phoneme.get('_tiedFrom'):
phonemeDuration=20.0/speed
phonemeFadeDuration=20.0/speed
elif not syllableStress and not syllableStart and nextPhoneme and not nextPhoneme.get('_wordStart') and (nextPhoneme.get('_isLiquid') or nextPhoneme.get('_isNasal')):
if nextPhoneme.get('_isLiquid'):
phonemeDuration=30.0/speed
else:
phonemeDuration=40.0/speed
else: # not a vowel
phonemeDuration=30.0/speed
if phoneme.get('_isLiquid') or phoneme.get('_isSemivowel'):
phonemeFadeDuration=20.0/speed
if phoneme.get('_lengthened'):
phonemeDuration*=1.05
phoneme['_duration']=phonemeDuration
phoneme['_fadeDuration']=phonemeFadeDuration
lastPhoneme=phoneme
def applyPitchPath(phonemeList,startIndex,endIndex,basePitch,inflection,startPitchPercent,endPitchPercent):
startPitch=basePitch*(2**(((startPitchPercent-50)/50.0)*inflection))
endPitch=basePitch*(2**(((endPitchPercent-50)/50.0)*inflection))
voicedDuration=0
for index in range(startIndex,endIndex):
phoneme=phonemeList[index]
if phoneme.get('_isVoiced'):
voicedDuration+=phoneme['_duration']
curDuration=0
pitchDelta=endPitch-startPitch
curPitch=startPitch
syllableStress=False
for index in range(startIndex,endIndex):
phoneme=phonemeList[index]
phoneme['voicePitch']=curPitch
if phoneme.get('_isVoiced'):
curDuration+=phoneme['_duration']
pitchRatio=curDuration/float(voicedDuration)
curPitch=startPitch+(pitchDelta*pitchRatio)
phoneme['endVoicePitch']=curPitch
intonationParamTable={
'.':{
'preHeadStart':46,
'preHeadEnd':57,
'headExtendFrom':4,
'headStart':80,
'headEnd':50,
'headSteps':[100,75,50,25,0,63,38,13,0],
'headStressEndDelta':-16,
'headUnstressedRunStartDelta':-8,
'headUnstressedRunEndDelta':-5,
'nucleus0Start':64,
'nucleus0End':8,
'nucleusStart':70,
'nucleusEnd':18,
'tailStart':24,
'tailEnd':8,
},
',':{
'preHeadStart':46,
'preHeadEnd':57,
'headExtendFrom':4,
'headStart':80,
'headEnd':60,
'headSteps':[100,75,50,25,0,63,38,13,0],
'headStressEndDelta':-16,
'headUnstressedRunStartDelta':-8,
'headUnstressedRunEndDelta':-5,
'nucleus0Start':34,
'nucleus0End':52,
'nucleusStart':78,
'nucleusEnd':34,
'tailStart':34,
'tailEnd':52,
},
'?':{
'preHeadStart':45,
'preHeadEnd':56,
'headExtendFrom':3,
'headStart':75,
'headEnd':43,
'headSteps':[100,75,50,20,60,35,11,0],
'headStressEndDelta':-16,
'headUnstressedRunStartDelta':-7,
'headUnstressedRunEndDelta':0,
'nucleus0Start':34,
'nucleus0End':68,
'nucleusStart':86,
'nucleusEnd':21,
'tailStart':34,
'tailEnd':68,
},
'!':{
'preHeadStart':46,
'preHeadEnd':57,
'headExtendFrom':3,
'headStart':90,
'headEnd':50,
'headSteps':[100,75,50,16,82,50,32,16],
'headStressEndDelta':-16,
'headUnstressedRunStartDelta':-9,
'headUnstressedRunEndDelta':0,
'nucleus0Start':92,
'nucleus0End':4,
'nucleusStart':92,
'nucleusEnd':80,
'tailStart':76,
'tailEnd':4,
}
}
def calculatePhonemePitches(phonemeList,speed,basePitch,inflection,clauseType):
intonationParams=intonationParamTable[clauseType or '.']
preHeadStart=0
preHeadEnd=len(phonemeList)
for index,phoneme in enumerate(phonemeList):
if phoneme.get('_syllableStart'):
syllableStress=phoneme.get('_stress')==1
if syllableStress:
preHeadEnd=index
break
if (preHeadEnd-preHeadStart)>0:
applyPitchPath(phonemeList,preHeadStart,preHeadEnd,basePitch,inflection,intonationParams['preHeadStart'],intonationParams['preHeadEnd'])
nucleusStart=nucleusEnd=tailStart=tailEnd=len(phonemeList)
for index in range(nucleusEnd-1,preHeadEnd-1,-1):
phoneme=phonemeList[index]
if phoneme.get('_syllableStart'):
syllableStress=phoneme.get('_stress')==1
if syllableStress :
nucleusStart=index
break
else:
nucleusEnd=tailStart=index
hasTail=(tailEnd-tailStart)>0
if hasTail:
applyPitchPath(phonemeList,tailStart,tailEnd,basePitch,inflection,intonationParams['tailStart'],intonationParams['tailEnd'])
if (nucleusEnd-nucleusStart)>0:
if hasTail:
applyPitchPath(phonemeList,nucleusStart,nucleusEnd,basePitch,inflection,intonationParams['nucleusStart'],intonationParams['nucleusEnd'])
else:
applyPitchPath(phonemeList,nucleusStart,nucleusEnd,basePitch,inflection,intonationParams['nucleus0Start'],intonationParams['nucleus0End'])
if preHeadEnd<nucleusStart:
headStartPitch=intonationParams['headStart']
headEndPitch=intonationParams['headEnd']
lastHeadStressStart=None
lastHeadUnstressedRunStart=None
stressEndPitch=None
steps=intonationParams['headSteps']
extendFrom=intonationParams['headExtendFrom']
stressStartPercentageGen=itertools.chain(steps,itertools.cycle(steps[extendFrom:]))
for index in range(preHeadEnd,nucleusStart+1):
phoneme=phonemeList[index]
syllableStress=phoneme.get('_stress')==1
if phoneme.get('_syllableStart'):
if lastHeadStressStart is not None:
stressStartPitch=headEndPitch+(((headStartPitch-headEndPitch)/100.0)*next(stressStartPercentageGen))
stressEndPitch=stressStartPitch+intonationParams['headStressEndDelta']
applyPitchPath(phonemeList,lastHeadStressStart,index,basePitch,inflection,stressStartPitch,stressEndPitch)
lastHeadStressStart=None
if syllableStress :
if lastHeadUnstressedRunStart is not None:
unstressedRunStartPitch=stressEndPitch+intonationParams['headUnstressedRunStartDelta']
unstressedRunEndPitch=stressEndPitch+intonationParams['headUnstressedRunEndDelta']
applyPitchPath(phonemeList,lastHeadUnstressedRunStart,index,basePitch,inflection,unstressedRunStartPitch,unstressedRunEndPitch)
lastHeadUnstressedRunStart=None
lastHeadStressStart=index
elif lastHeadUnstressedRunStart is None:
lastHeadUnstressedRunStart=index
def generateFramesAndTiming(ipaText,speed=1,basePitch=100,inflection=0.5,clauseType=None):
phonemeList=IPAToPhonemes(ipaText)
if len(phonemeList)==0:
return
correctHPhonemes(phonemeList)
calculatePhonemeTimes(phonemeList,speed)
calculatePhonemePitches(phonemeList,speed,basePitch,inflection,clauseType)
for phoneme in phonemeList:
frameDuration=phoneme.pop('_duration')
fadeDuration=phoneme.pop('_fadeDuration')
if phoneme.get('_silence'):
yield None,frameDuration,fadeDuration
else:
frame=speechPlayer.Frame()
frame.preFormantGain=1.0
frame.outputGain=2.0
applyPhonemeToFrame(frame,phoneme)
yield frame,frameDuration,fadeDuration