Kontent qismiga oʻtish

Modul:ang-pron

Vikilug‘atdan olingan

Bu modul uchun Modul:ang-pron/doc nomli hujjat sahifasini yaratishingiz mumkin

local strutils = require("Module:string utilities")
local m_table = require("Module:table")
local m_IPA = require("Module:IPA")
local lang = require("Module:languages").getByCode("ang")
local com = require("Module:ang-common")

local u = mw.ustring.char
local rsubn = mw.ustring.gsub
local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local rgsplit = mw.text.gsplit
local ulen = mw.ustring.len
local ulower = mw.ustring.lower

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar, n)
	local retval = rsubn(term, foo, bar, n)
	return retval
end

-- like str:gsub() but discards all but the first return value
local function gsub(term, foo, bar, n)
	local retval = term:gsub(foo, bar, n)
	return retval
end

local export = {}

-- When auto-generating primary and secondary stress accents, we use these
-- special characters, and later convert to normal IPA accent marks, so
-- we can distinguish auto-generated stress from user-specified stress.
local AUTOACUTE = u(0xFFF0)
local AUTOGRAVE = u(0xFFF1)

-- When the user uses the "explicit allophone" notation such as [z] or [ç] to
-- force a particular allophone, we internally convert that notation into a
-- single special character.
local EXPLICIT_TH = u(0xFFF2)
local EXPLICIT_DH = u(0xFFF3)
local EXPLICIT_S = u(0xFFF4)
local EXPLICIT_Z = u(0xFFF5)
local EXPLICIT_F = u(0xFFF6)
local EXPLICIT_V = u(0xFFF7)
local EXPLICIT_G = u(0xFFF8)
local EXPLICIT_GH = u(0xFFF9)
local EXPLICIT_H = u(0xFFFA)
local EXPLICIT_X = u(0xFFFB)
local EXPLICIT_C = u(0xFFFC)
local EXPLICIT_I = u(0xFFFD)

local explicit_cons = EXPLICIT_TH .. EXPLICIT_DH .. EXPLICIT_S .. EXPLICIT_Z ..
	EXPLICIT_F .. EXPLICIT_V .. EXPLICIT_G .. EXPLICIT_GH .. EXPLICIT_H ..
	EXPLICIT_X .. EXPLICIT_C

-- Map "explicit allophone" notation into special char. See above.
local char_to_explicit_char = {
	["þ"] = EXPLICIT_TH,
	["ð"] = EXPLICIT_DH,
	["s"] = EXPLICIT_S,
	["z"] = EXPLICIT_Z,
	["f"] = EXPLICIT_F,
	["v"] = EXPLICIT_V,
	["g"] = EXPLICIT_G,
	["ɣ"] = EXPLICIT_GH,
	["h"] = EXPLICIT_H,
	["x"] = EXPLICIT_X,
	["ç"] = EXPLICIT_C,
	["i"] = EXPLICIT_I,
}

-- Map "explicit allophone" notation into normal spelling, for supporting ann=.
local char_to_spelling = {
	["þ"] = "þ",
	["ð"] = "þ",
	["s"] = "s",
	["z"] = "s",
	["f"] = "f",
	["v"] = "f",
	["g"] = "g",
	["ɣ"] = "g",
	["h"] = "h",
	["x"] = "h",
	["ç"] = "h",
	["i"] = "i",
}

-- Map "explicit allophone" notation into phonemes, for phonemic output.
local explicit_char_to_phonemic = {
	[EXPLICIT_TH] = "θ",
	[EXPLICIT_DH] = "θ",
	[EXPLICIT_S] = "s",
	[EXPLICIT_Z] = "s",
	[EXPLICIT_F] = "f",
	[EXPLICIT_V] = "f",
	[EXPLICIT_G] = "ɡ", -- IPA ɡ!
	[EXPLICIT_GH] = "ɡ", -- IPA ɡ!
	[EXPLICIT_H] = "x",
	[EXPLICIT_X] = "x",
	[EXPLICIT_C] = "x",
	[EXPLICIT_I] = "i",
}

-- Map "explicit allophone" notation into IPA phones, for phonetic output.
local explicit_char_to_phonetic = {
	[EXPLICIT_TH] = "θ",
	[EXPLICIT_DH] = "ð",
	[EXPLICIT_S] = "s",
	[EXPLICIT_Z] = "z",
	[EXPLICIT_F] = "f",
	[EXPLICIT_V] = "v",
	[EXPLICIT_G] = "ɡ", -- IPA ɡ!
	[EXPLICIT_GH] = "ɣ",
	[EXPLICIT_H] = "h",
	[EXPLICIT_X] = "x",
	[EXPLICIT_C] = "ç",
	[EXPLICIT_I] = "i",
}

local accent = com.MACRON .. com.ACUTE .. com.GRAVE .. com.CFLEX .. AUTOACUTE .. AUTOGRAVE
local accent_c = "[" .. accent .. "]"
local stress_accent = com.ACUTE .. com.GRAVE .. com.CFLEX .. AUTOACUTE .. AUTOGRAVE
local stress_accent_c = "[" .. stress_accent .. "]"
local back_vowel = "aɑou"
local front_vowel = "eiyæœø" .. EXPLICIT_I
local vowel = back_vowel .. front_vowel
local vowel_or_accent = vowel .. accent
local vowel_c = "[" .. vowel .. "]"
local vowel_or_accent_c = "[" .. vowel_or_accent .. "]"
local non_vowel_c = "[^" .. vowel .. "]"
local front_vowel_c = "[" .. front_vowel .. "]"
-- The following include both IPA symbols and letters (including regular g and IPA ɡ)
-- so it can be used at any step of the process.
local obstruent = "bcċçdfgɡɣhkpqstvxzþðθʃʒ" .. explicit_cons
local resonant = "lmnŋrɫ"
local glide = "ġjwƿ"
local cons = obstruent .. resonant .. glide
local cons_c = "[" .. cons .. "]"
local voiced_sound = vowel .. "lrmnwjbdɡ" -- WARNING, IPA ɡ used here

-- These rules operate in order, and apply to the actual spelling,
-- after (1) macron decomposition, (2) syllable and prefix splitting,
-- (3) placement of primary and secondary stresses at the beginning
-- of the syllable. Each syllable will be separated either by ˈ
-- (if the following syllable is stressed), by ˌ (if the following
-- syllable has secondary stress), or by . (otherwise). In addition,
-- morpheme boundaries where the consonants on either side should be
-- treated as at the beginning/end of word (i.e. between prefix and
-- word, or between words in a compound word) will be marked with ⁀
-- before the syllable separator, and the beginning and end of text
-- will be marked by ⁀⁀. The output of this is fed into phonetic_rules,
-- and then is used to generate the displayed phonemic pronunciation
-- by removing ⁀ symbols.
local phonemic_rules = {
	{com.MACRON, "ː"},
	{"eoː", "oː"}, -- e.g. ġeōmor
	{"eaː", "aː"},
	{"[ei]ː?[aeo]", {
		-- Alternative notation for short diphthongs: iu̯, eo̯, æɑ̯
		-- Alternative notation for long diphthongs: iːu̯, eːo̯, æːɑ̯
		["ea"] = "æ͜ɑ",
		["eːa"] = "æ͜ɑː",
		["eo"] = "e͜o",
		["eːo"] = "e͜oː",
		["io"] = "i͜u",
		["iːo"] = "i͜uː",
		["ie"] = "i͜y",
		["iːe"] = "i͜yː",
	}},
	-- sċ between vowels when at the beginning of a syllable should be ʃ.ʃ
	{"(" .. vowel_c .. "ː?)([.ˈˌ]?)sċ(" .. vowel_c .. ")", "%1ʃ%2ʃ%3"},
	-- other sċ should be ʃ; note that sċ divided between syllables becomes s.t͡ʃ
	{"sċ", "ʃ"},
	-- x between vowels when at the beginning of a syllable should be k.s;
	-- remaining x handled below
	{"(" .. vowel_c .. "ː?)([.ˈˌ]?)x(" .. vowel_c .. ")", "%1k%2s%3"},
	-- z between vowels when at the beginning of a syllable should be t.s;
	-- remaining z handled below
	{"(" .. vowel_c .. "ː?)([.ˈˌ]?)z(" .. vowel_c .. ")", "%1t%2s%3"},
	-- short front vowel + -rian, -riend, -rienne, -riende in verb or verbal is
	-- rendered with /j/; we need to carefully change the syllable structure
	-- when doing this
	{"(" .. front_vowel_c .. ")%.ri%.(an⁀)", "%1r.ġ%2", {"verb"}},
	{"(" .. front_vowel_c .. ")%.ri%.(end⁀)", "%1r.ġ%2", {"verb", "verbal"}},
	{"(" .. front_vowel_c .. ")%.ri%.(en%.[nd]e⁀)", "%1r.ġ%2", {"verb", "verbal"}},
	{"nċ([.ˈˌ]?)ġ", "n%1j"},
	{"ċ([.ˈˌ]?)ġ", "j%1j"},
	{"c([.ˈˌ]?)g", "g%1g"},
	{"ċ([.ˈˌ]?)ċ", "t%1t͡ʃ"},
	{".", {
		["ċ"] = "t͡ʃ",
		["c"] = "k",
		["ġ"] = "j",
		["h"] = "x",
		["þ"] = "θ",
		["ð"] = "θ",
		["ƿ"] = "w",
		["x"] = "ks",
		["z"] = "ts",
		["g"] = "ɡ", -- map to IPA ɡ
		["a"] = "ɑ",
		["œ"] = "ø",
	}},
}

local fricative_to_voiced = {
	["f"] = "v",
	["s"] = "z",
	["θ"] = "ð",
}

local fricative_to_unvoiced = {
	["v"] = "f",
	["z"] = "s",
	["ð"] = "θ",
}

-- These rules operate in order, on the output of phonemic_rules.
-- The output of this is used to generate the displayed phonemic
-- pronunciation by removing ⁀ symbols.
local phonetic_rules = {
	-- Fricative voicing between voiced sounds. Note, the following operates
	-- across a ⁀ boundary for a fricative before the boundary but not after.
	{"([" .. voiced_sound .. "][ː.ˈˌ]*)([fsθ])([ː.ˈˌ⁀]*[" .. voiced_sound .. "])",
		function(s1, c, s2)
			return s1 .. fricative_to_voiced[c] .. s2
		end
	},
	-- Fricative between unstressed vowels should be devoiced.
	-- Note that unstressed syllables are preceded by . while stressed
	-- syllables are preceded by a stress mark.
	{"(%.[^.⁀][" .. vowel .. com.DOUBLE_BREVE_BELOW .. "ː]*%.)([vzð])",
		function(s1, c)
			return s1 .. fricative_to_unvoiced[c]
		end
	},
	-- Final -sian, -siend, -sienne, -siende (and variants such as -siġan,
	-- -siġend, etc.) in verb or verbal is rendered with [s]; clǣnsian will
	-- have to be special-cased with ''[z]''
	{"(" .. cons_c .. "ː?" .. "%.)z(i%.j?ɑn⁀)", "%1s%2", {"verb"}},
	{"(" .. cons_c .. "ː?" .. "%.)z(i%.j?end⁀)", "%1s%2", {"verb", "verbal"}},
	{"(" .. cons_c .. "ː?" .. "%.)z(i%.j?en%.[nd]e⁀)", "%1s%2", {"verb", "verbal"}},
	-- Final unstressed -þu/-þo after a consonant should be devoiced.
	{"(" .. cons_c .. "ː?" .. "%.)ð([uo]⁀)",
		function(s1, s2)
			return s1 .. "θ" .. s2
		end
	},
	{"x[wnlr]", {
		["xw"] = "ʍ",
		["xl"] = "l̥",
		["xn"] = "n̥",
		["xr"] = "r̥",
	}},
	-- Note, the following will not operate across a ⁀ boundary.
	{"n([.ˈˌ]?[ɡk])", "ŋ%1"}, -- WARNING, IPA ɡ used here
	{"n([.ˈˌ]?)j", "n%1d͡ʒ"},
	{"j([.ˈˌ]?)j", "d%1d͡ʒ"},
	{"([^x][⁀.ˈˌ])x", "%1h"},      -- [h] occurs as a syllable-initial allophone
	{"(" .. front_vowel_c .. ")x", "%1ç"}, -- [ç] occurs after front vowels
	-- An IPA ɡ after a word/prefix boundary, after another ɡ or after n
	-- (previously converted to ŋ in this circumstance) should remain as ɡ,
	-- while all other ɡ's should be converted to ɣ except that word-final ɡ
	-- becomes x. We do this by converting the ɡ's that should remain to regular
	-- g (which should never occur otherwise), convert the remaining IPA ɡ's to ɣ
	-- or x, and then convert the regular g's back to IPA ɡ.
	{"ɡ([.ˈˌ]?)ɡ", "g%1g"}, -- WARNING, IPA ɡ on the left, regular g on the right
	{"([ŋ⁀])([.ˈˌ]?)ɡ", "%1%2g"}, -- WARNING, IPA ɡ on the left, regular g on the right 
	{"ɡ", "ɣ"},
	{"g", "ɡ"}, -- WARNING, regular g on the left, IPA ɡ on the right
	{"l([.ˈˌ]?)l", "ɫ%1ɫ"},
	{"r([.ˈˌ]?)r", "rˠ%1rˠ"},
	{"l([.ˈˌ]?" .. cons_c .. ")", "ɫ%1"},
	{"r([.ˈˌ]?" .. cons_c .. ")", "rˠ%1"},
	-- Geminate consonants within a single syllable are pronounced singly.
	-- Does not apply e.g. to ''ǣttren'', which will be divided as ''ǣt.tren''.
	{"(" .. cons_c .. ")%1", "%1"},
	{"rˠrˠ", "rˠ"},
	-- [In the sequence vowel + obstruent + resonant in a single syllable,
	-- the resonant should become syllabic, e.g. ādl [ˈɑːdl̩], blōstm [bloːstm̩],
	-- fæþm [fæðm̩], bēacn [ˈbæːɑ̯kn̩]. We allow anything but a syllable or word
	-- boundary betweent the vowel and the obstruent.] [BASED ON INPUT FROM
	-- [[User:Urszag]], I'VE DECIDE AGAINST THIS]
	-- {"(" .. vowel_c .. "[^.ˈˌ⁀]*[" .. obstruent .. "]ː?[" .. resonant .. "])", "%1" .. com.SYLLABIC},
	-- also -mn e.g stemn /ˈstemn̩/; same for m + other resonants except m
	-- {"(" .. vowel_c .. "[^.ˈˌ⁀]*mː?[lnŋrɫ])", "%1" .. com.SYLLABIC},
	{".", explicit_char_to_phonetic},
}

local function apply_rules(word, rules, pos)
	for _, rule in ipairs(rules) do
		local allowed_pos = rule[3]
		if not allowed_pos or m_table.contains(allowed_pos, pos) then
			word = rsub(word, rule[1], rule[2])
		end
	end
	return word
end

local function lookup_stress_spec(stress_spec, pos)
	return stress_spec[pos] or (pos == "verbal" and stress_spec["verb"]) or nil
end

local function split_on_word_boundaries(word, pos)
	local retparts = {}
	local parts = strutils.capturing_split(word, "([<>%-])")
	local i = 1
	local saw_primary_stress = false
	while i <= #parts do
		local split_part = false
		local insert_position = #retparts + 1
		if parts[i + 1] ~= "<" and parts[i - 1] ~= ">" then
			-- Split off any prefixes.
			while true do
				local broke_prefix = false
				for _, prefixspec in ipairs(com.prefixes) do
					local prefix_pattern = prefixspec[1]
					local stress_spec = prefixspec[2]
					local pos_stress = lookup_stress_spec(stress_spec, pos)
					local prefix, rest = rmatch(parts[i], "^(" .. prefix_pattern .. ")(.*)$")
					if prefix then
						if not pos_stress then
							-- prefix not recognized for this POS, don't split here
						elseif stress_spec.restriction and not rfind(rest, stress_spec.restriction) then
							-- restriction not met, don't split here
						elseif rfind(rest, "^%+") then
							-- explicit non-boundary here, so don't split here
						elseif not rfind(rest, vowel_c) then
							-- no vowels, don't split here
						elseif rfind(rest, "^..?$") then
							-- only two letters, unlikely to be a word, probably an ending, so don't split
							-- here
						else
							local initial_cluster, after_cluster = rmatch(rest, "^(" .. non_vowel_c .. "*)(.-)$")
							if rfind(initial_cluster, "..") and (
								not (com.onsets_2[initial_cluster] or com.secondary_onsets_2[initial_cluster] or
									com.onsets_3[initial_cluster])) then
								-- initial cluster isn't a possible onset, don't split here
							elseif rfind(initial_cluster, "^x") then
								-- initial cluster isn't a possible onset, don't split here
							elseif rfind(after_cluster, "^" .. vowel_c .. "$") then
								-- remainder is a cluster + short vowel,
								-- unlikely to be a word so don't split here
							else
								-- break the word in two; next iteration we process
								-- the rest, which may need breaking again
								parts[i] = rest
								if pos_stress == "unstressed" then
									-- don't do anything
								elseif pos_stress == "secstressed" or (saw_primary_stress and pos_stress == "stressed") then
									prefix = rsub(prefix, "(" .. vowel_c .. ")", "%1" .. AUTOGRAVE, 1)
								elseif pos_stress == "stressed" then
									prefix = rsub(prefix, "(" .. vowel_c .. ")", "%1" .. AUTOACUTE, 1)
									saw_primary_stress = true
								else
									error("Unrecognized stress spec for pos=" .. pos .. ", prefix=" .. prefix .. ": " .. pos_stress)
								end
								table.insert(retparts, insert_position, prefix)
								insert_position = insert_position + 1
								broke_prefix = true
								break
							end
						end
					end
				end
				if not broke_prefix then
					break
				end
			end

			-- Now do the same for suffixes.
			while true do
				local broke_suffix = false
				for _, suffixspec in ipairs(com.suffixes) do
					local suffix_pattern = suffixspec[1]
					local stress_spec = suffixspec[2]
					local pos_stress = lookup_stress_spec(stress_spec, pos)
					local rest, suffix = rmatch(parts[i], "^(.-)(" .. suffix_pattern .. ")$")
					if suffix then
						if not pos_stress then
							-- suffix not recognized for this POS, don't split here
						elseif stress_spec.restriction and not rfind(rest, stress_spec.restriction) then
							-- restriction not met, don't split here
						elseif rfind(rest, "%+$") then
							-- explicit non-boundary here, so don't split here
						elseif not rfind(rest, vowel_c) then
							-- no vowels, don't split here
						else
							local before_cluster, final_cluster = rmatch(rest, "^(.-)(" .. non_vowel_c .. "*)$")
							if rfind(final_cluster, "%..") then
								-- syllable division within or before final
								-- cluster, don't split here
							else
								-- break the word in two; next iteration we process
								-- the rest, which may need breaking again
								parts[i] = rest
								if pos_stress == "unstressed" then
									-- don't do anything
								elseif pos_stress == "secstressed" then
									suffix = rsub(suffix, "(" .. vowel_c .. ")", "%1" .. AUTOGRAVE, 1)
								elseif pos_stress == "stressed" then
									error("Primary stress not allowed for suffixes (suffix=" .. suffix .. ")")
								else
									error("Unrecognized stress spec for pos=" .. pos .. ", suffix=" .. suffix .. ": " .. pos_stress)
								end
								table.insert(retparts, insert_position, suffix)
								broke_suffix = true
								break
							end
						end
					end
				end
				if not broke_suffix then
					break
				end
			end
		end

		local acc = rfind(parts[i], "(" .. stress_accent_c .. ")")
		if acc == com.CFLEX then
			-- remove circumflex but don't accent
			parts[i] = gsub(parts[i], com.CFLEX, "")
		elseif acc == com.ACUTE or acc == AUTOACUTE then
			saw_primary_stress = true
		elseif not acc and parts[i + 1] ~= "<" and parts[i - 1] ~= ">" then
			-- Add primary or secondary stress on the part; primary stress if no primary
			-- stress yet, otherwise secondary stress.
			acc = saw_primary_stress and AUTOGRAVE or AUTOACUTE
			saw_primary_stress = true
			parts[i] = rsub(parts[i], "(" .. vowel_c .. ")", "%1" .. acc, 1)
		end
		table.insert(retparts, insert_position, parts[i])
		i = i + 2
	end

	-- remove any +, which has served its purpose
	for i, part in ipairs(retparts) do
		retparts[i] = gsub(part, "%+", "")
	end
	return retparts
end

local function break_vowels(vowelseq)
	local function check_empty(char)
		if char ~= "" then
			error("Something wrong, non-vowel '" .. char .. "' seen in vowel sequence '" .. vowelseq .. "'")
		end
	end

	local vowels = {}
	local chars = strutils.capturing_split(vowelseq, "(" .. vowel_c .. accent_c .. "*)")
	local i = 1
	while i <= #chars do
		if i % 2 == 1 then
			check_empty(chars[i])
			i = i + 1
		else
			if i < #chars - 1 and com.diphthongs[
				rsub(chars[i], stress_accent_c, "") .. rsub(chars[i + 2], stress_accent_c, "")
			] then
				check_empty(chars[i + 1])
				table.insert(vowels, chars[i] .. chars[i + 2])
				i = i + 3
			else
				table.insert(vowels, chars[i])
				i = i + 1
			end
		end
	end
	return vowels
end

-- Break a word into alternating C and V components where a C component is a run
-- of zero or more consonants and a V component in a single vowel or dipthong.
-- There will always be an odd number of components, where all odd-numbered
-- components (starting from 1) are C components and all even-numbered components
-- are V components.
local function break_into_c_and_v_components(word)
	local cons_vowel = strutils.capturing_split(word, "(" .. vowel_or_accent_c .. "+)")
	local components = {}
	for i = 1, #cons_vowel do
		if i % 2 == 1 then
			table.insert(components, cons_vowel[i])
		else
			local vowels = break_vowels(cons_vowel[i])
			for j = 1, #vowels do
				if j == 1 then
					table.insert(components, vowels[j])
				else
					table.insert(components, "")
					table.insert(components, vowels[j])
				end
			end
		end
	end
	return components
end

local function split_into_syllables(word)
	local cons_vowel = break_into_c_and_v_components(word)
	if #cons_vowel == 1 then
		return cons_vowel
	end
	for i = 1, #cons_vowel do
		if i % 2 == 1 then
			-- consonant
			local cluster = cons_vowel[i]
			local len = ulen(cluster)
			if i == 1 then
				cons_vowel[i + 1] = cluster .. cons_vowel[i + 1]
			elseif i == #cons_vowel then
				cons_vowel[i - 1] = cons_vowel[i - 1] .. cluster
			elseif rfind(cluster, "%.") then
				local before_break, after_break = rmatch(cluster, "^(.-)%.(.*)$")
				cons_vowel[i - 1] = cons_vowel[i - 1] .. before_break
				cons_vowel[i + 1] = after_break .. cons_vowel[i + 1]
			elseif len == 0 then
				-- do nothing
			elseif len == 1 then
				cons_vowel[i + 1] = cluster .. cons_vowel[i + 1]
			elseif len == 2 then
				local c1, c2 = rmatch(cluster, "^(.)(.)$")
				if c1 == "s" and c2 == "ċ" then
					cons_vowel[i + 1] = "sċ" .. cons_vowel[i + 1]
				else
					cons_vowel[i - 1] = cons_vowel[i - 1] .. c1
					cons_vowel[i + 1] = c2 .. cons_vowel[i + 1]
				end
			else
				-- check for onset_3 preceded by consonant(s).
				local first, last3 = rmatch(cluster, "^(.-)(...)$")
				if #first > 0 and com.onsets_3[last3] then
					cons_vowel[i - 1] = cons_vowel[i - 1] .. first
					cons_vowel[i + 1] = last3 .. cons_vowel[i + 1]
				else
					local first, last2 = rmatch(cluster, "^(.-)(..)$")
					if com.onsets_2[last2] or (com.secondary_onsets_2[last2] and not first:find("[lr]$")) then
						cons_vowel[i - 1] = cons_vowel[i - 1] .. first
						cons_vowel[i + 1] = last2 .. cons_vowel[i + 1]
					else
						local first, last = rmatch(cluster, "^(.-)(.)$")
						cons_vowel[i - 1] = cons_vowel[i - 1] .. first
						cons_vowel[i + 1] = last .. cons_vowel[i + 1]
					end
				end
			end
		end
	end

	local retval = {}
	for i = 1, #cons_vowel do
		if i % 2 == 0 then
			-- remove any stray periods.
			table.insert(retval, rsub(cons_vowel[i], "%.", ""))
		end
	end
	return retval
end

-- Combine syllables into a word, moving stress markers (acute/grave) to the
-- beginning of the syllable.
local function combine_syllables_moving_stress(syllables, no_auto_stress)
	local modified_syls = {}
	for i, syl in ipairs(syllables) do
		if syl:find(com.ACUTE) or syl:find(AUTOACUTE) and not no_auto_stress then
			syl = "ˈ" .. syl
		elseif syl:find(com.GRAVE) or syl:find(AUTOGRAVE) and not no_auto_stress then
			syl = "ˌ" .. syl
		elseif i > 1 then
			syl = "." .. syl
		end
		syl = rsub(syl, stress_accent_c, "")
		table.insert(modified_syls, syl)
	end
	return table.concat(modified_syls)
end

-- Combine word parts (split-off prefixes, suffixes or parts of a compound word)
-- into a single word. Separate parts with ⁀ and the put ⁀⁀ at word boundaries.
local function combine_parts(parts)
	local text = {}
	for i, part in ipairs(parts) do
		if i > 1 and not rfind(part, "^[ˈˌ]") then
			-- Need a syllable boundary if there isn't a stress marker.
			table.insert(text, "." .. part)
		else
			table.insert(text, part)
		end
	end
	return "⁀⁀" .. table.concat(text, "⁀") .. "⁀⁀"
end

local function transform_word(word, pos, no_auto_stress)
	word = com.decompose(word)
	local parts = split_on_word_boundaries(word, pos)
	for i, part in ipairs(parts) do
		local syllables = split_into_syllables(part)
		parts[i] = combine_syllables_moving_stress(syllables,
			no_auto_stress or (#parts == 1 and #syllables == 1))
	end
	return combine_parts(parts)
end

local function default_pos(word, pos)
	if not pos then
		-- verbs in -an/-ōn/-ēon, inflected infinitives in -enne
		if rfind(word, "[aāō]n$") or rfind(word, "ēon$") or rfind(word, "enne$") then
			pos = "verb"
		else
			-- adjectives in -līċ, adverbs in -līċe and nouns in -nes can follow
			-- nouns or participles (which are "verbal"); truncate the ending
			-- and check what precedes
			word = rsub(word, "^(.*" .. vowel_c .. ".*)l[iī][cċ]e?$", "%1")
			word = rsub(word, "^(.*" .. vowel_c .. ".*)n[eiy]ss?$", "%1")
			-- participles in -end(e)/-en/-ed/-od, verbal nouns in -ing/-ung
			if rfind(word, "ende?$") or rfind(word, "[eo]d$") or rfind(word, "en$")
				or rfind(word, "[iu]ng$") then
				pos = "verbal"
			else
				pos = "noun"
			end
		end
	elseif pos == "adj" or pos == "adjective" then
		pos = "noun"
	elseif pos ~= "noun" and pos ~= "verb" and pos ~= "verbal" then
		error("Unrecognized part of speech: " .. pos)
	end
	return pos
end

local function generate_phonemic_word(word, pos)
	word = gsub(word, "[.!?]$", "")
	word = rsub(word, "%[(.)%]", char_to_explicit_char)
	pos = default_pos(word, pos)
	local is_prefix_suffix
	if word:find("^%-") or word:find("%-$") then
		is_prefix_suffix = true
		word = gsub(word, "^%-?(.-)%-?$", "%1")
	end
	word = transform_word(word, pos, is_prefix_suffix)
	word = apply_rules(word, phonemic_rules, pos)
	return word, pos
end

function export.phonemic(text, pos)
	if type(text) == "table" then
		pos = text.args["pos"]
		text = text[1]
	end
	local result = {}
	text = ulower(text)
	for word in rgsplit(text, " ") do
		local phonemic, respos = generate_phonemic_word(word, pos)
		table.insert(result, phonemic)
	end
	result = table.concat(result, " ")
	result = rsub(result, ".", explicit_char_to_phonemic)
	return gsub(result, "⁀", "")
end

function export.phonetic(text, pos)
	if type(text) == "table" then
		pos = text.args["pos"]
		text = text[1]
	end
	local result = {}
	text = ulower(text)
	for word in rgsplit(text, " ") do
		local phonemic, respos = generate_phonemic_word(word, pos)
		word = apply_rules(phonemic, phonetic_rules, respos)
		table.insert(result, word)
	end
	return gsub(table.concat(result, " "), "⁀", "")
end

function export.show(frame)
	local parent_args = frame:getParent().args
	local params = {
		[1] = { required = true, default = "hlǣf-dīġe", list = true },
		["pos"] = {},
		["ann"] = {},
	}
	local args = require("Module:parameters").process(parent_args, params)

	local IPA_args = {}
	for _, arg in ipairs(args[1]) do
		local phonemic = export.phonemic(arg, args.pos)
		local phonetic = export.phonetic(arg, args.pos)
		table.insert(IPA_args, {pron = '/' .. phonemic .. '/'})
		if phonemic ~= phonetic then
			table.insert(IPA_args, {pron = '[' .. phonetic .. ']'})
		end
	end

	local anntext
	if args.ann == "1" then
		anntext = {}
		for _, arg in ipairs(args[1]) do
			-- remove all spelling markup except ġ/ċ and macrons
			arg = rsub(com.decompose(arg), "[%-+._<>" .. com.ACUTE .. com.GRAVE .. com.CFLEX .. "]", "")
			arg = rsub(arg, "%[(.)%]", char_to_spelling)
			m_table.insertIfNot(anntext, "'''" .. arg .. "'''")
		end
		anntext = table.concat(anntext, ", ") .. ":&#32;"
	elseif args.ann then
		anntext = "'''" .. args.ann .. "''':&#32;"
	else
		anntext = ""
	end

	return anntext .. m_IPA.format_IPA_full(lang, IPA_args)
end

return export