Modul:ang-pron
Qiyofa
Bu modul uchun Modul:ang-pron/doc nomli hujjat sahifasini yaratishingiz mumkin
local strutils = require("Module:string utilities")
local m_table = require("Module:table")
local m_IPA = require("Module:IPA")
local lang = require("Module:languages").getByCode("ang")
local com = require("Module:ang-common")
local u = mw.ustring.char
local rsubn = mw.ustring.gsub
local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local rgsplit = mw.text.gsplit
local ulen = mw.ustring.len
local ulower = mw.ustring.lower
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar, n)
local retval = rsubn(term, foo, bar, n)
return retval
end
-- like str:gsub() but discards all but the first return value
local function gsub(term, foo, bar, n)
local retval = term:gsub(foo, bar, n)
return retval
end
local export = {}
-- When auto-generating primary and secondary stress accents, we use these
-- special characters, and later convert to normal IPA accent marks, so
-- we can distinguish auto-generated stress from user-specified stress.
local AUTOACUTE = u(0xFFF0)
local AUTOGRAVE = u(0xFFF1)
-- When the user uses the "explicit allophone" notation such as [z] or [ç] to
-- force a particular allophone, we internally convert that notation into a
-- single special character.
local EXPLICIT_TH = u(0xFFF2)
local EXPLICIT_DH = u(0xFFF3)
local EXPLICIT_S = u(0xFFF4)
local EXPLICIT_Z = u(0xFFF5)
local EXPLICIT_F = u(0xFFF6)
local EXPLICIT_V = u(0xFFF7)
local EXPLICIT_G = u(0xFFF8)
local EXPLICIT_GH = u(0xFFF9)
local EXPLICIT_H = u(0xFFFA)
local EXPLICIT_X = u(0xFFFB)
local EXPLICIT_C = u(0xFFFC)
local EXPLICIT_I = u(0xFFFD)
local explicit_cons = EXPLICIT_TH .. EXPLICIT_DH .. EXPLICIT_S .. EXPLICIT_Z ..
EXPLICIT_F .. EXPLICIT_V .. EXPLICIT_G .. EXPLICIT_GH .. EXPLICIT_H ..
EXPLICIT_X .. EXPLICIT_C
-- Map "explicit allophone" notation into special char. See above.
local char_to_explicit_char = {
["þ"] = EXPLICIT_TH,
["ð"] = EXPLICIT_DH,
["s"] = EXPLICIT_S,
["z"] = EXPLICIT_Z,
["f"] = EXPLICIT_F,
["v"] = EXPLICIT_V,
["g"] = EXPLICIT_G,
["ɣ"] = EXPLICIT_GH,
["h"] = EXPLICIT_H,
["x"] = EXPLICIT_X,
["ç"] = EXPLICIT_C,
["i"] = EXPLICIT_I,
}
-- Map "explicit allophone" notation into normal spelling, for supporting ann=.
local char_to_spelling = {
["þ"] = "þ",
["ð"] = "þ",
["s"] = "s",
["z"] = "s",
["f"] = "f",
["v"] = "f",
["g"] = "g",
["ɣ"] = "g",
["h"] = "h",
["x"] = "h",
["ç"] = "h",
["i"] = "i",
}
-- Map "explicit allophone" notation into phonemes, for phonemic output.
local explicit_char_to_phonemic = {
[EXPLICIT_TH] = "θ",
[EXPLICIT_DH] = "θ",
[EXPLICIT_S] = "s",
[EXPLICIT_Z] = "s",
[EXPLICIT_F] = "f",
[EXPLICIT_V] = "f",
[EXPLICIT_G] = "ɡ", -- IPA ɡ!
[EXPLICIT_GH] = "ɡ", -- IPA ɡ!
[EXPLICIT_H] = "x",
[EXPLICIT_X] = "x",
[EXPLICIT_C] = "x",
[EXPLICIT_I] = "i",
}
-- Map "explicit allophone" notation into IPA phones, for phonetic output.
local explicit_char_to_phonetic = {
[EXPLICIT_TH] = "θ",
[EXPLICIT_DH] = "ð",
[EXPLICIT_S] = "s",
[EXPLICIT_Z] = "z",
[EXPLICIT_F] = "f",
[EXPLICIT_V] = "v",
[EXPLICIT_G] = "ɡ", -- IPA ɡ!
[EXPLICIT_GH] = "ɣ",
[EXPLICIT_H] = "h",
[EXPLICIT_X] = "x",
[EXPLICIT_C] = "ç",
[EXPLICIT_I] = "i",
}
local accent = com.MACRON .. com.ACUTE .. com.GRAVE .. com.CFLEX .. AUTOACUTE .. AUTOGRAVE
local accent_c = "[" .. accent .. "]"
local stress_accent = com.ACUTE .. com.GRAVE .. com.CFLEX .. AUTOACUTE .. AUTOGRAVE
local stress_accent_c = "[" .. stress_accent .. "]"
local back_vowel = "aɑou"
local front_vowel = "eiyæœø" .. EXPLICIT_I
local vowel = back_vowel .. front_vowel
local vowel_or_accent = vowel .. accent
local vowel_c = "[" .. vowel .. "]"
local vowel_or_accent_c = "[" .. vowel_or_accent .. "]"
local non_vowel_c = "[^" .. vowel .. "]"
local front_vowel_c = "[" .. front_vowel .. "]"
-- The following include both IPA symbols and letters (including regular g and IPA ɡ)
-- so it can be used at any step of the process.
local obstruent = "bcċçdfgɡɣhkpqstvxzþðθʃʒ" .. explicit_cons
local resonant = "lmnŋrɫ"
local glide = "ġjwƿ"
local cons = obstruent .. resonant .. glide
local cons_c = "[" .. cons .. "]"
local voiced_sound = vowel .. "lrmnwjbdɡ" -- WARNING, IPA ɡ used here
-- These rules operate in order, and apply to the actual spelling,
-- after (1) macron decomposition, (2) syllable and prefix splitting,
-- (3) placement of primary and secondary stresses at the beginning
-- of the syllable. Each syllable will be separated either by ˈ
-- (if the following syllable is stressed), by ˌ (if the following
-- syllable has secondary stress), or by . (otherwise). In addition,
-- morpheme boundaries where the consonants on either side should be
-- treated as at the beginning/end of word (i.e. between prefix and
-- word, or between words in a compound word) will be marked with ⁀
-- before the syllable separator, and the beginning and end of text
-- will be marked by ⁀⁀. The output of this is fed into phonetic_rules,
-- and then is used to generate the displayed phonemic pronunciation
-- by removing ⁀ symbols.
local phonemic_rules = {
{com.MACRON, "ː"},
{"eoː", "oː"}, -- e.g. ġeōmor
{"eaː", "aː"},
{"[ei]ː?[aeo]", {
-- Alternative notation for short diphthongs: iu̯, eo̯, æɑ̯
-- Alternative notation for long diphthongs: iːu̯, eːo̯, æːɑ̯
["ea"] = "æ͜ɑ",
["eːa"] = "æ͜ɑː",
["eo"] = "e͜o",
["eːo"] = "e͜oː",
["io"] = "i͜u",
["iːo"] = "i͜uː",
["ie"] = "i͜y",
["iːe"] = "i͜yː",
}},
-- sċ between vowels when at the beginning of a syllable should be ʃ.ʃ
{"(" .. vowel_c .. "ː?)([.ˈˌ]?)sċ(" .. vowel_c .. ")", "%1ʃ%2ʃ%3"},
-- other sċ should be ʃ; note that sċ divided between syllables becomes s.t͡ʃ
{"sċ", "ʃ"},
-- x between vowels when at the beginning of a syllable should be k.s;
-- remaining x handled below
{"(" .. vowel_c .. "ː?)([.ˈˌ]?)x(" .. vowel_c .. ")", "%1k%2s%3"},
-- z between vowels when at the beginning of a syllable should be t.s;
-- remaining z handled below
{"(" .. vowel_c .. "ː?)([.ˈˌ]?)z(" .. vowel_c .. ")", "%1t%2s%3"},
-- short front vowel + -rian, -riend, -rienne, -riende in verb or verbal is
-- rendered with /j/; we need to carefully change the syllable structure
-- when doing this
{"(" .. front_vowel_c .. ")%.ri%.(an⁀)", "%1r.ġ%2", {"verb"}},
{"(" .. front_vowel_c .. ")%.ri%.(end⁀)", "%1r.ġ%2", {"verb", "verbal"}},
{"(" .. front_vowel_c .. ")%.ri%.(en%.[nd]e⁀)", "%1r.ġ%2", {"verb", "verbal"}},
{"nċ([.ˈˌ]?)ġ", "n%1j"},
{"ċ([.ˈˌ]?)ġ", "j%1j"},
{"c([.ˈˌ]?)g", "g%1g"},
{"ċ([.ˈˌ]?)ċ", "t%1t͡ʃ"},
{".", {
["ċ"] = "t͡ʃ",
["c"] = "k",
["ġ"] = "j",
["h"] = "x",
["þ"] = "θ",
["ð"] = "θ",
["ƿ"] = "w",
["x"] = "ks",
["z"] = "ts",
["g"] = "ɡ", -- map to IPA ɡ
["a"] = "ɑ",
["œ"] = "ø",
}},
}
local fricative_to_voiced = {
["f"] = "v",
["s"] = "z",
["θ"] = "ð",
}
local fricative_to_unvoiced = {
["v"] = "f",
["z"] = "s",
["ð"] = "θ",
}
-- These rules operate in order, on the output of phonemic_rules.
-- The output of this is used to generate the displayed phonemic
-- pronunciation by removing ⁀ symbols.
local phonetic_rules = {
-- Fricative voicing between voiced sounds. Note, the following operates
-- across a ⁀ boundary for a fricative before the boundary but not after.
{"([" .. voiced_sound .. "][ː.ˈˌ]*)([fsθ])([ː.ˈˌ⁀]*[" .. voiced_sound .. "])",
function(s1, c, s2)
return s1 .. fricative_to_voiced[c] .. s2
end
},
-- Fricative between unstressed vowels should be devoiced.
-- Note that unstressed syllables are preceded by . while stressed
-- syllables are preceded by a stress mark.
{"(%.[^.⁀][" .. vowel .. com.DOUBLE_BREVE_BELOW .. "ː]*%.)([vzð])",
function(s1, c)
return s1 .. fricative_to_unvoiced[c]
end
},
-- Final -sian, -siend, -sienne, -siende (and variants such as -siġan,
-- -siġend, etc.) in verb or verbal is rendered with [s]; clǣnsian will
-- have to be special-cased with ''[z]''
{"(" .. cons_c .. "ː?" .. "%.)z(i%.j?ɑn⁀)", "%1s%2", {"verb"}},
{"(" .. cons_c .. "ː?" .. "%.)z(i%.j?end⁀)", "%1s%2", {"verb", "verbal"}},
{"(" .. cons_c .. "ː?" .. "%.)z(i%.j?en%.[nd]e⁀)", "%1s%2", {"verb", "verbal"}},
-- Final unstressed -þu/-þo after a consonant should be devoiced.
{"(" .. cons_c .. "ː?" .. "%.)ð([uo]⁀)",
function(s1, s2)
return s1 .. "θ" .. s2
end
},
{"x[wnlr]", {
["xw"] = "ʍ",
["xl"] = "l̥",
["xn"] = "n̥",
["xr"] = "r̥",
}},
-- Note, the following will not operate across a ⁀ boundary.
{"n([.ˈˌ]?[ɡk])", "ŋ%1"}, -- WARNING, IPA ɡ used here
{"n([.ˈˌ]?)j", "n%1d͡ʒ"},
{"j([.ˈˌ]?)j", "d%1d͡ʒ"},
{"([^x][⁀.ˈˌ])x", "%1h"}, -- [h] occurs as a syllable-initial allophone
{"(" .. front_vowel_c .. ")x", "%1ç"}, -- [ç] occurs after front vowels
-- An IPA ɡ after a word/prefix boundary, after another ɡ or after n
-- (previously converted to ŋ in this circumstance) should remain as ɡ,
-- while all other ɡ's should be converted to ɣ except that word-final ɡ
-- becomes x. We do this by converting the ɡ's that should remain to regular
-- g (which should never occur otherwise), convert the remaining IPA ɡ's to ɣ
-- or x, and then convert the regular g's back to IPA ɡ.
{"ɡ([.ˈˌ]?)ɡ", "g%1g"}, -- WARNING, IPA ɡ on the left, regular g on the right
{"([ŋ⁀])([.ˈˌ]?)ɡ", "%1%2g"}, -- WARNING, IPA ɡ on the left, regular g on the right
{"ɡ", "ɣ"},
{"g", "ɡ"}, -- WARNING, regular g on the left, IPA ɡ on the right
{"l([.ˈˌ]?)l", "ɫ%1ɫ"},
{"r([.ˈˌ]?)r", "rˠ%1rˠ"},
{"l([.ˈˌ]?" .. cons_c .. ")", "ɫ%1"},
{"r([.ˈˌ]?" .. cons_c .. ")", "rˠ%1"},
-- Geminate consonants within a single syllable are pronounced singly.
-- Does not apply e.g. to ''ǣttren'', which will be divided as ''ǣt.tren''.
{"(" .. cons_c .. ")%1", "%1"},
{"rˠrˠ", "rˠ"},
-- [In the sequence vowel + obstruent + resonant in a single syllable,
-- the resonant should become syllabic, e.g. ādl [ˈɑːdl̩], blōstm [bloːstm̩],
-- fæþm [fæðm̩], bēacn [ˈbæːɑ̯kn̩]. We allow anything but a syllable or word
-- boundary betweent the vowel and the obstruent.] [BASED ON INPUT FROM
-- [[User:Urszag]], I'VE DECIDE AGAINST THIS]
-- {"(" .. vowel_c .. "[^.ˈˌ⁀]*[" .. obstruent .. "]ː?[" .. resonant .. "])", "%1" .. com.SYLLABIC},
-- also -mn e.g stemn /ˈstemn̩/; same for m + other resonants except m
-- {"(" .. vowel_c .. "[^.ˈˌ⁀]*mː?[lnŋrɫ])", "%1" .. com.SYLLABIC},
{".", explicit_char_to_phonetic},
}
local function apply_rules(word, rules, pos)
for _, rule in ipairs(rules) do
local allowed_pos = rule[3]
if not allowed_pos or m_table.contains(allowed_pos, pos) then
word = rsub(word, rule[1], rule[2])
end
end
return word
end
local function lookup_stress_spec(stress_spec, pos)
return stress_spec[pos] or (pos == "verbal" and stress_spec["verb"]) or nil
end
local function split_on_word_boundaries(word, pos)
local retparts = {}
local parts = strutils.capturing_split(word, "([<>%-])")
local i = 1
local saw_primary_stress = false
while i <= #parts do
local split_part = false
local insert_position = #retparts + 1
if parts[i + 1] ~= "<" and parts[i - 1] ~= ">" then
-- Split off any prefixes.
while true do
local broke_prefix = false
for _, prefixspec in ipairs(com.prefixes) do
local prefix_pattern = prefixspec[1]
local stress_spec = prefixspec[2]
local pos_stress = lookup_stress_spec(stress_spec, pos)
local prefix, rest = rmatch(parts[i], "^(" .. prefix_pattern .. ")(.*)$")
if prefix then
if not pos_stress then
-- prefix not recognized for this POS, don't split here
elseif stress_spec.restriction and not rfind(rest, stress_spec.restriction) then
-- restriction not met, don't split here
elseif rfind(rest, "^%+") then
-- explicit non-boundary here, so don't split here
elseif not rfind(rest, vowel_c) then
-- no vowels, don't split here
elseif rfind(rest, "^..?$") then
-- only two letters, unlikely to be a word, probably an ending, so don't split
-- here
else
local initial_cluster, after_cluster = rmatch(rest, "^(" .. non_vowel_c .. "*)(.-)$")
if rfind(initial_cluster, "..") and (
not (com.onsets_2[initial_cluster] or com.secondary_onsets_2[initial_cluster] or
com.onsets_3[initial_cluster])) then
-- initial cluster isn't a possible onset, don't split here
elseif rfind(initial_cluster, "^x") then
-- initial cluster isn't a possible onset, don't split here
elseif rfind(after_cluster, "^" .. vowel_c .. "$") then
-- remainder is a cluster + short vowel,
-- unlikely to be a word so don't split here
else
-- break the word in two; next iteration we process
-- the rest, which may need breaking again
parts[i] = rest
if pos_stress == "unstressed" then
-- don't do anything
elseif pos_stress == "secstressed" or (saw_primary_stress and pos_stress == "stressed") then
prefix = rsub(prefix, "(" .. vowel_c .. ")", "%1" .. AUTOGRAVE, 1)
elseif pos_stress == "stressed" then
prefix = rsub(prefix, "(" .. vowel_c .. ")", "%1" .. AUTOACUTE, 1)
saw_primary_stress = true
else
error("Unrecognized stress spec for pos=" .. pos .. ", prefix=" .. prefix .. ": " .. pos_stress)
end
table.insert(retparts, insert_position, prefix)
insert_position = insert_position + 1
broke_prefix = true
break
end
end
end
end
if not broke_prefix then
break
end
end
-- Now do the same for suffixes.
while true do
local broke_suffix = false
for _, suffixspec in ipairs(com.suffixes) do
local suffix_pattern = suffixspec[1]
local stress_spec = suffixspec[2]
local pos_stress = lookup_stress_spec(stress_spec, pos)
local rest, suffix = rmatch(parts[i], "^(.-)(" .. suffix_pattern .. ")$")
if suffix then
if not pos_stress then
-- suffix not recognized for this POS, don't split here
elseif stress_spec.restriction and not rfind(rest, stress_spec.restriction) then
-- restriction not met, don't split here
elseif rfind(rest, "%+$") then
-- explicit non-boundary here, so don't split here
elseif not rfind(rest, vowel_c) then
-- no vowels, don't split here
else
local before_cluster, final_cluster = rmatch(rest, "^(.-)(" .. non_vowel_c .. "*)$")
if rfind(final_cluster, "%..") then
-- syllable division within or before final
-- cluster, don't split here
else
-- break the word in two; next iteration we process
-- the rest, which may need breaking again
parts[i] = rest
if pos_stress == "unstressed" then
-- don't do anything
elseif pos_stress == "secstressed" then
suffix = rsub(suffix, "(" .. vowel_c .. ")", "%1" .. AUTOGRAVE, 1)
elseif pos_stress == "stressed" then
error("Primary stress not allowed for suffixes (suffix=" .. suffix .. ")")
else
error("Unrecognized stress spec for pos=" .. pos .. ", suffix=" .. suffix .. ": " .. pos_stress)
end
table.insert(retparts, insert_position, suffix)
broke_suffix = true
break
end
end
end
end
if not broke_suffix then
break
end
end
end
local acc = rfind(parts[i], "(" .. stress_accent_c .. ")")
if acc == com.CFLEX then
-- remove circumflex but don't accent
parts[i] = gsub(parts[i], com.CFLEX, "")
elseif acc == com.ACUTE or acc == AUTOACUTE then
saw_primary_stress = true
elseif not acc and parts[i + 1] ~= "<" and parts[i - 1] ~= ">" then
-- Add primary or secondary stress on the part; primary stress if no primary
-- stress yet, otherwise secondary stress.
acc = saw_primary_stress and AUTOGRAVE or AUTOACUTE
saw_primary_stress = true
parts[i] = rsub(parts[i], "(" .. vowel_c .. ")", "%1" .. acc, 1)
end
table.insert(retparts, insert_position, parts[i])
i = i + 2
end
-- remove any +, which has served its purpose
for i, part in ipairs(retparts) do
retparts[i] = gsub(part, "%+", "")
end
return retparts
end
local function break_vowels(vowelseq)
local function check_empty(char)
if char ~= "" then
error("Something wrong, non-vowel '" .. char .. "' seen in vowel sequence '" .. vowelseq .. "'")
end
end
local vowels = {}
local chars = strutils.capturing_split(vowelseq, "(" .. vowel_c .. accent_c .. "*)")
local i = 1
while i <= #chars do
if i % 2 == 1 then
check_empty(chars[i])
i = i + 1
else
if i < #chars - 1 and com.diphthongs[
rsub(chars[i], stress_accent_c, "") .. rsub(chars[i + 2], stress_accent_c, "")
] then
check_empty(chars[i + 1])
table.insert(vowels, chars[i] .. chars[i + 2])
i = i + 3
else
table.insert(vowels, chars[i])
i = i + 1
end
end
end
return vowels
end
-- Break a word into alternating C and V components where a C component is a run
-- of zero or more consonants and a V component in a single vowel or dipthong.
-- There will always be an odd number of components, where all odd-numbered
-- components (starting from 1) are C components and all even-numbered components
-- are V components.
local function break_into_c_and_v_components(word)
local cons_vowel = strutils.capturing_split(word, "(" .. vowel_or_accent_c .. "+)")
local components = {}
for i = 1, #cons_vowel do
if i % 2 == 1 then
table.insert(components, cons_vowel[i])
else
local vowels = break_vowels(cons_vowel[i])
for j = 1, #vowels do
if j == 1 then
table.insert(components, vowels[j])
else
table.insert(components, "")
table.insert(components, vowels[j])
end
end
end
end
return components
end
local function split_into_syllables(word)
local cons_vowel = break_into_c_and_v_components(word)
if #cons_vowel == 1 then
return cons_vowel
end
for i = 1, #cons_vowel do
if i % 2 == 1 then
-- consonant
local cluster = cons_vowel[i]
local len = ulen(cluster)
if i == 1 then
cons_vowel[i + 1] = cluster .. cons_vowel[i + 1]
elseif i == #cons_vowel then
cons_vowel[i - 1] = cons_vowel[i - 1] .. cluster
elseif rfind(cluster, "%.") then
local before_break, after_break = rmatch(cluster, "^(.-)%.(.*)$")
cons_vowel[i - 1] = cons_vowel[i - 1] .. before_break
cons_vowel[i + 1] = after_break .. cons_vowel[i + 1]
elseif len == 0 then
-- do nothing
elseif len == 1 then
cons_vowel[i + 1] = cluster .. cons_vowel[i + 1]
elseif len == 2 then
local c1, c2 = rmatch(cluster, "^(.)(.)$")
if c1 == "s" and c2 == "ċ" then
cons_vowel[i + 1] = "sċ" .. cons_vowel[i + 1]
else
cons_vowel[i - 1] = cons_vowel[i - 1] .. c1
cons_vowel[i + 1] = c2 .. cons_vowel[i + 1]
end
else
-- check for onset_3 preceded by consonant(s).
local first, last3 = rmatch(cluster, "^(.-)(...)$")
if #first > 0 and com.onsets_3[last3] then
cons_vowel[i - 1] = cons_vowel[i - 1] .. first
cons_vowel[i + 1] = last3 .. cons_vowel[i + 1]
else
local first, last2 = rmatch(cluster, "^(.-)(..)$")
if com.onsets_2[last2] or (com.secondary_onsets_2[last2] and not first:find("[lr]$")) then
cons_vowel[i - 1] = cons_vowel[i - 1] .. first
cons_vowel[i + 1] = last2 .. cons_vowel[i + 1]
else
local first, last = rmatch(cluster, "^(.-)(.)$")
cons_vowel[i - 1] = cons_vowel[i - 1] .. first
cons_vowel[i + 1] = last .. cons_vowel[i + 1]
end
end
end
end
end
local retval = {}
for i = 1, #cons_vowel do
if i % 2 == 0 then
-- remove any stray periods.
table.insert(retval, rsub(cons_vowel[i], "%.", ""))
end
end
return retval
end
-- Combine syllables into a word, moving stress markers (acute/grave) to the
-- beginning of the syllable.
local function combine_syllables_moving_stress(syllables, no_auto_stress)
local modified_syls = {}
for i, syl in ipairs(syllables) do
if syl:find(com.ACUTE) or syl:find(AUTOACUTE) and not no_auto_stress then
syl = "ˈ" .. syl
elseif syl:find(com.GRAVE) or syl:find(AUTOGRAVE) and not no_auto_stress then
syl = "ˌ" .. syl
elseif i > 1 then
syl = "." .. syl
end
syl = rsub(syl, stress_accent_c, "")
table.insert(modified_syls, syl)
end
return table.concat(modified_syls)
end
-- Combine word parts (split-off prefixes, suffixes or parts of a compound word)
-- into a single word. Separate parts with ⁀ and the put ⁀⁀ at word boundaries.
local function combine_parts(parts)
local text = {}
for i, part in ipairs(parts) do
if i > 1 and not rfind(part, "^[ˈˌ]") then
-- Need a syllable boundary if there isn't a stress marker.
table.insert(text, "." .. part)
else
table.insert(text, part)
end
end
return "⁀⁀" .. table.concat(text, "⁀") .. "⁀⁀"
end
local function transform_word(word, pos, no_auto_stress)
word = com.decompose(word)
local parts = split_on_word_boundaries(word, pos)
for i, part in ipairs(parts) do
local syllables = split_into_syllables(part)
parts[i] = combine_syllables_moving_stress(syllables,
no_auto_stress or (#parts == 1 and #syllables == 1))
end
return combine_parts(parts)
end
local function default_pos(word, pos)
if not pos then
-- verbs in -an/-ōn/-ēon, inflected infinitives in -enne
if rfind(word, "[aāō]n$") or rfind(word, "ēon$") or rfind(word, "enne$") then
pos = "verb"
else
-- adjectives in -līċ, adverbs in -līċe and nouns in -nes can follow
-- nouns or participles (which are "verbal"); truncate the ending
-- and check what precedes
word = rsub(word, "^(.*" .. vowel_c .. ".*)l[iī][cċ]e?$", "%1")
word = rsub(word, "^(.*" .. vowel_c .. ".*)n[eiy]ss?$", "%1")
-- participles in -end(e)/-en/-ed/-od, verbal nouns in -ing/-ung
if rfind(word, "ende?$") or rfind(word, "[eo]d$") or rfind(word, "en$")
or rfind(word, "[iu]ng$") then
pos = "verbal"
else
pos = "noun"
end
end
elseif pos == "adj" or pos == "adjective" then
pos = "noun"
elseif pos ~= "noun" and pos ~= "verb" and pos ~= "verbal" then
error("Unrecognized part of speech: " .. pos)
end
return pos
end
local function generate_phonemic_word(word, pos)
word = gsub(word, "[.!?]$", "")
word = rsub(word, "%[(.)%]", char_to_explicit_char)
pos = default_pos(word, pos)
local is_prefix_suffix
if word:find("^%-") or word:find("%-$") then
is_prefix_suffix = true
word = gsub(word, "^%-?(.-)%-?$", "%1")
end
word = transform_word(word, pos, is_prefix_suffix)
word = apply_rules(word, phonemic_rules, pos)
return word, pos
end
function export.phonemic(text, pos)
if type(text) == "table" then
pos = text.args["pos"]
text = text[1]
end
local result = {}
text = ulower(text)
for word in rgsplit(text, " ") do
local phonemic, respos = generate_phonemic_word(word, pos)
table.insert(result, phonemic)
end
result = table.concat(result, " ")
result = rsub(result, ".", explicit_char_to_phonemic)
return gsub(result, "⁀", "")
end
function export.phonetic(text, pos)
if type(text) == "table" then
pos = text.args["pos"]
text = text[1]
end
local result = {}
text = ulower(text)
for word in rgsplit(text, " ") do
local phonemic, respos = generate_phonemic_word(word, pos)
word = apply_rules(phonemic, phonetic_rules, respos)
table.insert(result, word)
end
return gsub(table.concat(result, " "), "⁀", "")
end
function export.show(frame)
local parent_args = frame:getParent().args
local params = {
[1] = { required = true, default = "hlǣf-dīġe", list = true },
["pos"] = {},
["ann"] = {},
}
local args = require("Module:parameters").process(parent_args, params)
local IPA_args = {}
for _, arg in ipairs(args[1]) do
local phonemic = export.phonemic(arg, args.pos)
local phonetic = export.phonetic(arg, args.pos)
table.insert(IPA_args, {pron = '/' .. phonemic .. '/'})
if phonemic ~= phonetic then
table.insert(IPA_args, {pron = '[' .. phonetic .. ']'})
end
end
local anntext
if args.ann == "1" then
anntext = {}
for _, arg in ipairs(args[1]) do
-- remove all spelling markup except ġ/ċ and macrons
arg = rsub(com.decompose(arg), "[%-+._<>" .. com.ACUTE .. com.GRAVE .. com.CFLEX .. "]", "")
arg = rsub(arg, "%[(.)%]", char_to_spelling)
m_table.insertIfNot(anntext, "'''" .. arg .. "'''")
end
anntext = table.concat(anntext, ", ") .. ": "
elseif args.ann then
anntext = "'''" .. args.ann .. "''': "
else
anntext = ""
end
return anntext .. m_IPA.format_IPA_full(lang, IPA_args)
end
return export