Модуль:la-pronunc
Зовнішній вигляд
Документацію для цього модуля можна створити у Модуль:la-pronunc/документація
local export = {}
local m_a = require("Module:accent qualifier")
local m_IPA = require("Module:IPA")
local lang = require("Module:languages").getByCode("la")
local letters_ipa = {
["a"] = "a",["e"] = "e",["i"] = "i",["o"] = "o",["u"] = "u",["y"] = "y",
["ā"] = "aː",["ē"] = "eː",["ī"] = "iː",["ō"] = "oː",["ū"] = "uː",["ȳ"] = "yː",
["ae"] = "ae̯",["oe"] = "oe̯",["ei"] = "ei̯",["au"] = "au̯",["eu"] = "eu̯",
["b"] = "b",["d"] = "d",["f"] = "f",
["c"] = "k",["g"] = "ɡ",["v"] = "w",["x"] = "ks",
["ph"] = "pʰ",["th"] = "tʰ",["ch"] = "kʰ",["gh"] = "ɡʰ",["rh"] = "r",["qv"] = "kʷ",["gv"] = "ɡʷ",
["'"] = "ˈ",["ˈ"] = "ˈ",
}
local letters_ipa_eccl = {
["a"] = "a",["e"] = "e",["i"] = "i",["o"] = "o",["u"] = "u",["y"] = "i",
["ā"] = "aː",["ē"] = "eː",["ī"] = "iː",["ō"] = "oː",["ū"] = "uː",["ȳ"] = "iː",
["ae"] = "ɛ",["oe"] = "ɛ",["ei"] = "ei̯",["au"] = "au̯",["eu"] = "eu̯",
["b"] = "b",["d"] = "d",["f"] = "f",
["c"] = "k",["g"] = "ɡ",["v"] = "v",["x"] = "ks",
["ph"] = "f",["th"] = "tʰ",["ch"] = "kʰ",["gh"] = "ɡʰ",["rh"] = "r",["qv"] = "kw",["gv"] = "ɡw",
["h"] = "",
["'"] = "ˈ",["ˈ"] = "ˈ",
}
local letters_ipa_vul = {
["a"] = "a",["e"] = "ɛ",["i"] = "i",["o"] = "ɔ",["u"] = "u",["y"] = "e",
["ā"] = "aː",["ē"] = "eː",["ī"] = "iː",["ō"] = "oː",["ū"] = "uː",["ȳ"] = "eː",
["ae"] = "e",["oe"] = "e",["ei"] = "ei̯",["au"] = "au̯",["eu"] = "eu̯",
["b"] = "β",["d"] = "d",["f"] = "f",
["c"] = "k",["g"] = "ɡ",["v"] = "β",["x"] = "s",
["ph"] = "f",["th"] = "tʰ",["ch"] = "kʰ",["gh"] = "g",["rh"] = "r",["qv"] = "kʷ",["gv"] = "ɡʷ",
["h"] = "",
["'"] = "ˈ",["ˈ"] = "ˈ",
}
local phonetic_vowels = {
["e"] = "ɛ",
["i"] = "ɪ",
["o"] = "ɔ",
["u"] = "ʊ",
["y"] = "ʏ",
}
local phonetic_rules = {
-- Velar nasal assimilation
{"ɡ([.ˈ]?)n", "ŋ%1n"},
{"n([.ˈ]?)([kɡ])", "ŋ%1%2"},
-- Fronted labialization before front vowels
{"ʷ([eɛiɪyʏ])", "ᶣ%1"},
-- Tensing of vowels before another vowel
{"ɛ%.([aeɛiɪoɔuʊyʏ])", "e.%1"},
{"ɪ%.([aeɛiɪoɔuʊyʏ])", "i.%1"},
-- Nasal vowels
{"a[nm]$", "ã"},
{"[eɛ][nm]$", "ẽ"},
{"[iɪ][nm]$", "ĩ"},
{"[oɔ][nm]$", "õ"},
{"[uʊ][nm]$", "ũ"},
{"[yʏ][nm]$", "ỹ"},
{"a[nm]([%.ˈ]?[sf])", "ãː%1"},
{"[eɛ][nm]([%.ˈ]?[sf])", "ẽː%1"},
{"[iɪ][nm]([%.ˈ]?[sf])", "ĩː%1"},
{"[oɔ][nm]([%.ˈ]?[sf])", "õː%1"},
{"[uʊ][nm]([%.ˈ]?[sf])", "ũː%1"},
{"[yʏ][nm]([%.ˈ]?[sf])", "ỹː%1"},
--L pinguis
{"l", "ɫ"},
{"ɫ([%.ˈ]?)ɫ", "l%1l"},
{"ɫ([%.ˈ]?[iɪyʏ])", "l%1"},
}
local phonetic_rules_eccl = {
{"n([.ˈ]?)([kɡ])", "ŋ%1%2"}, --assimilation
{"z", "d͡z"},
{"([aɛeiɔou])ː?%.ʃ([aɛeiɔou])","%1ʃ.ʃ%2"}, --gemination
{"([aɛeiɔou])ː?%.ɲ([aɛeiɔou])","%1ɲ.ɲ%2"}, --gemination
}
local phonetic_rules_vul = {
{"β([%.ˈ])β","b%1b"},
{"[ɛei]%.([aɔo])","ʲ%1"},
{"[%.ˈ]([^%.ˈ]*)[ɛei]ˈ([aɔo])","ˈ%1ʲ%2"},
{"([^%.ˈ]*)[ɛei]ˈ([aɔo])","ˈ%1ʲ%2"},
{"([tk])([ɛei])","%1ʲ%2"},
{"u%.([aɛeiɔo])","ʷ%1"},
{"[%.ˈ]([^%.ˈ]*)u%.([aɛeiɔo])","ˈ%1ʷ%2"},
{"([^%.ˈ]*)u%.([aɛeiɔo])","ˈ%1ʷ%2"},
{"i([^ː])", "e%1"},
{"u([^ː])", "o%1"},
{"βʲ","bʲ"},
{"([aɛeiɔouː])([%.ˈ])ɡ([aɛeiɔou])","%1%2ɣ%3"},
{"([aɛeiɔouː])([%.ˈ])d([aɛeiɔou])","%1%2ð%3"},
{"([aɛeiɔouː])([%.ˈ])k([aɛeiɔou])","%1%2ɡ%3"},
{"([aɛeiɔouː])([%.ˈ])t([aɛeiɔou])","%1%2d%3"},
{"([aɛeiɔouː])([%.ˈ])p([aɛeiɔou])","%1%2b%3"},
{"a[nm]$", "ã"},
{"[eɛ][nm]$", "ẽ"},
{"i[nm]$", "ĩ"},
{"[oɔ][nm]$", "õ"},
{"u[nm]$", "ũ"},
{"ː",""},
}
local lengthen_vowel = {
["a"] = "aː", ["aː"] = "aː",
["ɛ"] = "ɛː", ["ɛː"] = "ɛː",
["e"] = "eː", ["eː"] = "eː",
["i"] = "iː", ["iː"] = "iː",
["ɔ"] = "ɔː", ["ɔː"] = "ɔː",
["o"] = "oː", ["oː"] = "oː",
["u"] = "uː", ["uː"] = "uː",
}
local vowels = {
"a", "ɛ", "e", "ɪ", "i", "ɔ", "o", "ʊ", "u", "y",
"aː", "ɛː", "eː", "iː", "ɔː", "oː", "uː", "yː",
"ae̯", "oe̯", "ei̯", "au̯", "eu̯",
}
local onsets = {
"b", "p", "pʰ", "d", "t", "tʰ", "β",
"ɡ", "gʰ", "k", "kʰ", "kʷ", "ɡʷ", "kw", "ɡw", "t͡s", "t͡ʃ", "d͡ʒ", "ʃ",
"f", "s", "h", "z",
"l", "m", "n", "ɲ", "r", "j", "v", "w",
"bl", "pl", "pʰl", "br", "pr", "pʰr", "ps",
"dr", "tr", "tʰr",
"ɡl", "kl", "kʰl", "ɡr", "kr", "kʰr",
"fl", "fr",
"sp", "st", "sk", "skʷ", "sl", "sm", "sn", "sw",
"spr", "str", "skr",
"spl", "skl",
}
local codas = {
"b", "p", "pʰ", "d", "t", "tʰ", "ɡ", "k", "kʰ", "β",
"f", "s",
"l", "m", "n", "ɲ", "r", "j", "ʃ",
"sp", "st", "sk",
"spʰ", "stʰ", "skʰ",
"lp", "lt", "lk",
"lb", "ld", "lɡ",
"lpʰ", "ltʰ", "lkʰ",
"lf",
"rp", "rt", "rk",
"rb", "rd", "rɡ",
"rpʰ", "rtʰ", "rkʰ",
"rf",
"mp", "nt", "nk",
"mb", "nd", "nɡ",
"mpʰ", "ntʰ", "nkʰ",
"lm", "rl", "rm", "rn",
"ps", "ts", "ks", "ls", "ns", "rs",
"lks", "nks", "rks",
"lms", "rls", "rms", "rns",
}
local breves = {
["ă"] = "a",
["ĕ"] = "e",
["ĭ"] = "i",
["ŭ"] = "u",
["æ"] = "ae",
["œ"] = "oe",
}
for i, val in ipairs(vowels) do
vowels[val] = true
end
for i, val in ipairs(onsets) do
onsets[val] = true
end
for i, val in ipairs(codas) do
codas[val] = true
end
local function letters_to_ipa(word,phonetic,eccl,vul)
local phonemes = {}
local dictionary = eccl and letters_ipa_eccl or (vul and letters_ipa_vul or letters_ipa)
while mw.ustring.len(word) > 0 do
local longestmatch = ""
for letter, ipa in pairs(dictionary) do
if mw.ustring.len(letter) > mw.ustring.len(longestmatch) and mw.ustring.sub(word, 1, mw.ustring.len(letter)) == letter then
longestmatch = letter
end
end
if mw.ustring.len(longestmatch) > 0 then
if dictionary[longestmatch] == "ks" then
table.insert(phonemes, "k")
table.insert(phonemes, "s")
else
table.insert(phonemes, dictionary[longestmatch])
end
word = mw.ustring.sub(word, mw.ustring.len(longestmatch) + 1)
else
table.insert(phonemes, mw.ustring.sub(word, 1, 1))
word = mw.ustring.sub(word, 2)
end
end
if eccl then for i=1,#phonemes do
if phonemes[i+1] and (phonemes[i] == "k" or phonemes[i] == "ɡ") and (phonemes[i+1] == "e" or phonemes[i+1] == "ɛ" or phonemes[i+1] == "eː" or phonemes[i+1] == "i" or phonemes[i+1] == "iː") then
phonemes[i] = phonemes[i] == "k" and "t͡ʃ" or "d͡ʒ"
if phonemes[i] == "t͡ʃ" and phonemes[i-1] and phonemes[i-1] == "s" and not (phonemes[i-2] and not vowels[phonemes[i-2]]) and not (phonemes[i+1] and not vowels[phonemes[i+1]]) then
phonemes[i-1] = ""
phonemes[i] = "ʃ"
end
if phonemes[i-1] and phonemes[i-1] == "k" and phonemes[i] == "t͡ʃ" then
phonemes[i-1] = "t"
end
if phonemes[i-1] and phonemes[i-1] == "g" and phonemes[i] == "d͡ʒ" then
phonemes[i-1] = "d"
end
end
if phonemes[i+2] and phonemes[i] == "t" and phonemes[i+1] == "i" and vowels[phonemes[i+2]] and not (phonemes[i-1] and phonemes[i-1] == "s") then
phonemes[i] = "t͡s"
end
if phonemes[i] == "kʰ" then phonemes[i] = "k" end
if phonemes[i] == "tʰ" then phonemes[i] = "t" end
if phonemes[i+1] and phonemes[i] == "ɡ" and phonemes[i+1] == "n" then
phonemes[i] = ""
phonemes[i+1] = "ɲ"
end
end end
return phonemes
end
local function get_onset(syll)
local consonants = {}
for i = 1, #syll do
if vowels[syll[i]] then
break
end
if syll[i] ~= "ˈ" then
table.insert(consonants, syll[i])
end
end
return table.concat(consonants)
end
local function get_coda(syll)
local consonants = {}
for i = #syll, 1, -1 do
if vowels[syll[i]] then
break
end
table.insert(consonants, 1, syll[i])
end
return table.concat(consonants)
end
local function get_vowel(syll)
for i = 1,#syll do
if vowels[syll[i]] then return syll[i] end
end
end
-- Split the word into syllables of CV shape
local function split_syllables(remainder)
local syllables = {}
local syll = {}
while #remainder > 0 do
local phoneme = table.remove(remainder, 1)
if phoneme == "." then
if #syll > 0 then
table.insert(syllables, syll)
syll = {}
end
elseif phoneme == "ˈ" then
if #syll > 0 then
table.insert(syllables,syll)
end
syll = {"ˈ"}
elseif vowels[phoneme] then
table.insert(syll, phoneme)
table.insert(syllables, syll)
syll = {}
else
table.insert(syll, phoneme)
end
end
-- If there are phonemes left, then the word ends in a consonant
-- Add them to the last syllable
for _, phoneme in ipairs(syll) do
table.insert(syllables[#syllables], phoneme)
end
-- Split consonant clusters between syllables
for i, current in ipairs(syllables) do
if i > 1 then
local previous = syllables[i-1]
local onset = get_onset(current)
-- Shift over consonants until the syllable onset is valid
while not (onset == "" or onsets[onset]) do
table.insert(previous, table.remove(current, 1))
onset = get_onset(current)
end
-- If the preceding syllable still ends with a vowel, and the current one begins with s + another consonant, or with gn, then shift it over
if get_coda(previous) == "" and ((current[1] == "s" and not vowels[current[2]]) or (current[1] =="g" and current[2] == "n")) then
table.insert(previous, table.remove(current, 1))
end
-- If there is no vowel at all in this syllable
if not get_vowel(current) then
for j=1,#current do
table.insert(syllables[i-1], table.remove(current, 1))
end
table.remove(syllables,i)
end
end
end
for i, syll in ipairs(syllables) do
local onset = get_onset(syll)
local coda = get_coda(syll)
if not (onset == "" or onsets[onset]) then
require("Module:debug").track("la-pronunc/bad onset")
--error("onset error:[" .. onset .. "]")
end
if not (coda == "" or codas[coda]) then
require("Module:debug").track("la-pronunc/bad coda")
--error("coda error:[" .. coda .. "]")
end
end
return syllables
end
local function detect_accent(syllables,eccl)
-- Manual override
for i=1,#syllables do
for j=1,#syllables[i] do
if syllables[i][j] == "ˈ" then
table.remove(syllables[i],j)
return i
end
end
end
-- Detect accent placement
if #syllables > 2 then
-- Does the penultimate syllable end in a single vowel?
local penult = syllables[#syllables-1]
if mw.ustring.find(penult[#penult], "^[aɛeiɔouy]$") then
return #syllables - 2
else
return #syllables - 1
end
elseif #syllables == 2 then
return #syllables - 1
end
end
local function convert_word(word, phonetic, eccl, vul)
-- Normalize spelling
word = mw.ustring.gsub(word, "w", "v")
word = mw.ustring.gsub(word, "([aeiouyāēīōūȳ])v([^aeiouyāēīōūȳ])", "%1u%2")
word = mw.ustring.gsub(word, "qu", "qv")
word = mw.ustring.gsub(word, "ngu([aeiouyāēīōūȳ])", "ngv%1")
word = mw.ustring.gsub(word, "^i([aeiouyāēīōūȳ])", "j%1")
word = mw.ustring.gsub(word, "^u([aeiouyāēīōūȳ])", "v%1")
word = mw.ustring.gsub(word, "([aeiouyāēīōūȳ])i([aeiouyāēīōūȳ])", "%1jj%2")
word = mw.ustring.gsub(word, "([aeiouyāēīōūȳ])u([aeiouyāēīōūȳ])", "%1v%2")
-- Eu and ei diphthongs
word = mw.ustring.gsub(word, "e(u[ms])$", "e.%1")
word = mw.ustring.gsub(word, "ei", "e.i")
word = mw.ustring.gsub(word, "_", "")
-- Vowel length before nasal + fricative is allophonic
word = mw.ustring.gsub(word, "ā([mn][fs])", "a%1")
word = mw.ustring.gsub(word, "ē([mn][fs])", "e%1")
word = mw.ustring.gsub(word, "ī([mn][fs])", "i%1")
word = mw.ustring.gsub(word, "ō([mn][fs])", "o%1")
word = mw.ustring.gsub(word, "ū([mn][fs])", "u%1")
word = mw.ustring.gsub(word, "ȳ([mn][fs])", "y%1")
-- Apply some basic phoneme-level assimilations
word = mw.ustring.gsub(word, "xs?", "x")
word = mw.ustring.gsub(word, "b([st])", "p%1")
word = mw.ustring.gsub(word, "d([st])", "t%1")
word = mw.ustring.gsub(word, "g([st])", "k%1")
word = mw.ustring.gsub(word, "n([bp])", "m%1")
-- Convert word to IPA
local phonemes = letters_to_ipa(word,phonetic,eccl,vul)
-- Split into syllables
local syllables = split_syllables(phonemes)
-- Add accent
local accent = detect_accent(syllables,eccl)
for i, syll in ipairs(syllables) do
for j, phoneme in ipairs(syll) do
if eccl then
syll[j] = mw.ustring.gsub(syll[j], "ː", "")
elseif phonetic and not vul then
syll[j] = phonetic_vowels[syll[j]] or syll[j]
end
end
end
for i, syll in ipairs(syllables) do
if eccl and i == accent and phonetic and vowels[syll[#syll]] then
syll[#syll] = lengthen_vowel[syll[#syll]] or syll[#syll]
end
for j=1, #syll-1 do
if syll[j]==syll[j+1] then
syll[j+1] = ""
end
end
end
for i, syll in ipairs(syllables) do
syll = table.concat(syll)
if vul and i ~= accent then
syll = mw.ustring.gsub(syll,"ɔ","o")
syll = mw.ustring.gsub(syll,"ɛ","e")
end
syllables[i] = (i == accent and "ˈ" or "") .. syll
end
word = (mw.ustring.gsub(table.concat(syllables, "."), "%.ˈ", "ˈ"))
if phonetic then
local rules = eccl and phonetic_rules_eccl or (vul and phonetic_rules_vul or phonetic_rules)
for i, rule in ipairs(rules) do
word = mw.ustring.gsub(word, rule[1], rule[2])
end
end
if eccl and not phonetic then word = mw.ustring.gsub(word,"ɔ","o") end
return word
end
local function convert_words(words, phonetic, eccl, vul)
words = mw.ustring.lower(words)
words = mw.ustring.gsub(words,'[,?!:;()"%-]', '')
words = mw.ustring.gsub(words,'[ăĕĭŭæœ]', breves)
local disallowed = mw.ustring.gsub(words, '[a-zA-Zāēīōūȳ,.?!:;()\'"_ ]', '')
if mw.ustring.len(disallowed) > 0 then
if mw.ustring.len(disallowed) == 1 then
error('The character "' .. disallowed .. '" is not allowed.')
else
error('The characters "' .. disallowed .. '" are not allowed.')
end
end
local result = {}
for word in mw.text.gsplit(words, " ") do
table.insert(result, convert_word(word, phonetic, eccl, vul))
end
return table.concat(result, " ")
end
local function make_row(phm, pht, dial)
local IPA_args = {{pron = '/' .. phm .. '/'}}
if phm ~= pht then
table.insert(IPA_args, {pron = '[' .. pht .. ']'})
end
return m_a.show({dial}) .. ' ' .. m_IPA.format_IPA_full(lang, IPA_args)
end
function export.show_full(frame)
local params = {
[1] = {default = mw.title.getCurrentTitle().nsText == 'Template' and 'prōnuntiātiō' or mw.title.getCurrentTitle().text},
classical = {type = 'boolean', default = true},
eccl = {type = 'boolean'},
vul = {type = 'boolean', default = mw.title.getCurrentTitle().nsText == 'Reconstruction'},
}
local args = require("Module:parameters").process(frame:getParent().args, params)
words = args[1]:lower()
local categories = {}
local out = ''
if args.classical then
out = make_row(
convert_words(words, false, false, false),
convert_words(words, true, false, false),
'Classical'
)
end
if args.eccl then
out = out .. '\n* ' .. make_row(
convert_words(words, false, true, false),
convert_words(words, true, true, false),
'Ecclesiastical'
)
table.insert(categories, lang:getCanonicalName() .. ' terms with Ecclesiastical IPA pronunciation')
end
if args.vul then
out = out .. '\n* ' .. make_row(
convert_words(words, false, false, true),
convert_words(words, true, false, true),
'Vulgar'
)
end
export.track(words)
return out .. require("Module:utilities").format_categories(categories)
end
function export.show(words, phonetic, eccl, vul)
if type(words) == "table" then -- assume a frame
eccl = words.args["eccl"]
vul = words.args["vul"]
words = words.args[1]:lower() or mw.title.getCurrentTitle().text:lower()
end
if vul then
phonetic = true
end
return convert_words(words, phonetic, eccl, vul)
end
function export.allophone(word, eccl, vul)
return export.show(word, true, eccl, vul)
end
local unassimilated_prefixes = {
'ab','ad','circum','con','contrā','dis','ex','īn','inter','ob','per','sub','subter'
}
function export.track(words)
if type(words) == "table" then -- assume a frame
words = words.args[1]:lower() or mw.title.getCurrentTitle().text:lower()
end
words = mw.ustring.lower(words)
words = mw.ustring.gsub(words,'[,.?!:;()\'"%-]','')
words = mw.ustring.gsub(words,'[ăĕĭŭæœ]', breves)
temp = words
repeat
words = temp
for _, prefix in ipairs(unassimilated_prefixes) do
temp = mw.ustring.gsub(temp,"^"..prefix,"")
end
until temp == words
if mw.ustring.find(words,'^i[aeiouāēīōū]') then
return "[[Category:Kenny's testing category 8]]"
end
end
return export