Modul:ar-headword

Vikilug‘atdan olingan

Bu modul uchun Modul:ar-headword/doc nomli hujjat sahifasini yaratishingiz mumkin

local tili = require("Module:tili").getByCode("ar")

local export = {}
local pos_functions = {}

-- diacritics
local u = mw.ustring.char
local A  = u(0x064E) -- fatḥa
local AN = u(0x064B) -- fatḥatān (fatḥa tanwīn)
local U  = u(0x064F) -- ḍamma
local UN = u(0x064C) -- ḍammatān (ḍamma tanwīn)
local I  = u(0x0650) -- kasra
local IN = u(0x064D) -- kasratān (kasra tanwīn)
local SK = u(0x0652) -- sukūn = no vowel
local SH = u(0x0651) -- šadda = gemination of consonants
local DAGGER_ALIF = u(0x0670)
local DIACRITIC_ANY_BUT_SH = "[" .. A .. I .. U .. AN .. IN .. UN .. SK .. DAGGER_ALIF .. "]"

-- various letters and signs
local TAM    = u(0x0629) -- tāʾ marbūṭa = ة

-- common combinations
local UNU   = "[" .. UN .. U .. "]"

-----------------------
-- Utility functions --
-----------------------

-- If Not Empty
local function ine(arg)
	if arg == "" then
		return nil
	else
		return arg
	end
end

local function list_to_set(list)
	local set = {}
	for _, item in ipairs(list) do
		set[item] = true
	end
	return set
end

-- version of mw.ustring.gsub() that discards all but the first return value
function rsub(term, foo, bar)
	local retval = mw.ustring.gsub(term, foo, bar)
	return retval
end

local rfind = mw.ustring.find

function remove_links(text)
	text = rsub(text, "%[%[[^|%]]*|", "")
	text = rsub(text, "%[%[", "")
	text = rsub(text, "%]%]", "")
	return text
end

function reorder_shadda(text)
	-- shadda+short-vowel (including tanwīn vowels, i.e. -an -in -un) gets
	-- replaced with short-vowel+shadda during NFC normalisation, which
	-- MediaWiki does for all Unicode strings; however, it makes the
	-- detection process inconvenient, so undo it. (For example, the tracking
	-- code below would fail to detect the -un in سِتٌّ because the shadda
	-- would come after the -un.)
	text = rsub(text, "(" .. DIACRITIC_ANY_BUT_SH .. ")" .. SH, SH .. "%1")
	return text
end

-- Tracking functions

local trackfn = require("Module:debug").track
function track(page)
	trackfn("ar-headword/" .. page)
	return true
end

function track_form(argname, form, translit, pos)
	form = reorder_shadda(remove_links(form))

	-- Examples of what you can find by looking at what links to the given
	-- pages:
	--
	-- Template:tracking/ar-headword/unvocalized (all unvocalized pages)
	-- Template:tracking/ar-headword/unvocalized/pl (all unvocalized pages
	--   where the plural is unvocalized, whether specified using pl=,
	--   pl2=, etc.)
	-- Template:tracking/ar-headword/unvocalized/head (all unvocalized pages
	--   where the head is unvocalized)
	-- Template:tracking/ar-headword/unvocalized/head/nouns (all nouns --
	--   excluding proper nouns, collective nouns, singulative nouns --
	--   where the head is unvocalized)
	-- Template:tracking/ar-headword/unvocalized/head/proper nouns (all
	--   proper nouns where the head is unvocalized)
	-- Template:tracking/ar-headword/unvocalized/head/not proper nouns (all
	--   words that are not proper nouns where the head is unvocalized)
	-- Template:tracking/ar-headword/unvocalized/adjectives (all
	--   adjectives where any parameter is unvocalized; currently only works
	--   for heads, so equivalent to .../unvocalized/head/adjectives)
	-- Template:tracking/ar-headword/unvocalized-empty-head (all pages
	--   with an empty head)
	-- Template:tracking/ar-headword/unvocalized-manual-translit (all
	--   unvocalized pages with manual translit)
	-- Template:tracking/ar-headword/unvocalized-manual-translit/head/nouns
	--   (all nouns where the head is unvocalized but has manual translit)
	-- Template:tracking/ar-headword/unvocalized-no-translit (all unvocalized
	--   pages without manual translit)
	-- Template:tracking/ar-headword/i3rab (all pages with any parameter
	--   containing i3rab of either -un, -u, -a or -i)
	-- Template:tracking/ar-headword/i3rab-un (all pages with any parameter
	--   containing an -un i3rab ending)
	-- Template:tracking/ar-headword/i3rab-un/pl (all pages where a form
	--   specified using pl=, pl2=, etc. contains an -un i3rab ending)
	-- Template:tracking/ar-headword/i3rab-u/head (all pages with a head
	--   containing an -u i3rab ending)
	-- Template:tracking/ar-headword/i3rab/head/proper nouns (all proper nouns
	--   with a head containing i3rab of either -un, -u, -a or -i)
	--
	-- In general, the format is one of the following:
	--
	-- Template:tracking/ar-headword/FIRSTLEVEL
	-- Template:tracking/ar-headword/FIRSTLEVEL/ARGNAME
	-- Template:tracking/ar-headword/FIRSTLEVEL/POS
	-- Template:tracking/ar-headword/FIRSTLEVEL/ARGNAME/POS
	--
	-- FIRSTLEVEL can be one of "unvocalized", "unvocalized-empty-head" or its
	-- opposite "unvocalized-specified", "unvocalized-manual-translit" or its
	-- opposite "unvocalized-no-translit", "i3rab", "i3rab-un", "i3rab-u",
	-- "i3rab-a", or "i3rab-i".
	--
	-- ARGNAME is either "head" or an argument such as "pl", "f", "cons", etc.
	-- This automatically includes arguments specified as head2=, pl3=, etc.
	--
	-- POS is a part of speech, lowercase and pluralized, e.g. "nouns",
	-- "adjectives", "proper nouns", "collective nouns", etc. or
	-- "not proper nouns", which includes all parts of speech but proper nouns.
	function dotrack(page)
		track(page)
		track(page .. "/" .. argname)
		if pos then
			track(page .. "/" .. pos)
			track(page .. "/" .. argname .. "/" .. pos)
			if pos ~= "proper nouns" then
				track(page .. "/not proper nouns")
				track(page .. "/" .. argname .. "/not proper nouns")
			end
		end
	end
	function track_i3rab(arabic, tr)
		if rfind(form, arabic .. "$") then
			dotrack("i3rab")
			dotrack("i3rab-" .. tr)
		end
	end
	track_i3rab(UN, "un")
	track_i3rab(U, "u")
	track_i3rab(A, "a")
	track_i3rab(I, "i")
	if form == "" or not tili:transliterate(form) then
		dotrack("unvocalized")
		if form == "" then
			dotrack("unvocalized-empty-head")
		else
			dotrack("unvocalized-specified")
		end
		if translit then
			dotrack("unvocalized-manual-translit")
		else
			dotrack("unvocalized-no-translit")
		end
	end
end

-- The main entry point.
function export.show(frame)
	local args = frame:getParent().args
	local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")

	-- Gather parameters
	local boshlar = {}
	local translits = {}
	local genders = {}
	local infls = {}
	local cats = {"Arabchada " .. poscat}

	local bosh = args["bosh"] or args[1] or ""
	local translit = ine(args["tr"])
	local i = 1

	while bosh do
		if bosh then
			table.insert(boshlar, bosh)
			translits[#boshlar] = translit
			track_form("bosh", bosh, translit, poscat)
		end

		i = i + 1
		bosh = ine(args["bosh" .. i])
		translit = ine(args["tr" .. i])
	end

	if pos_functions[poscat] then
		pos_functions[poscat](args, genders, infls, cats)
	end

	if args[3] or args[4] or args[5] or args[6] or args[7] or args[8] or args[9] then
		track("num")
	end

	if args["bosh"] then
		track("bosh")
	end

	if args["g"] then
		track("g")
	end

	return require("Module:headword").full_headword(tili, nil, boshlar, translits, genders, infls, cats, nil)
end

-- Get a list of inflections. See handle_infl() for meaning of ARGS, ARGPREF
-- and DEFGENDER.
local function params(args, argpref, defgender)
	-- Gather parameters
	local forms = {}

	local form = ine(args[argpref])
	local translit = ine(args[argpref .. "tr"])
	local gender = ine(args[argpref .. "g"])
	local gender2 = ine(args[argpref .. "g2"])
	local i = 1

	while form do
		local genderlist = (gender or gender2) and {gender, gender2} or defgender and {defgender} or nil
		track_form(argpref, form, translit)
		table.insert(forms, {term = form, translit = translit, gender = genderlist})

		i = i + 1
		form = ine(args[argpref .. i])
		translit = ine(args[argpref .. i .. "tr"])
		gender = ine(args[argpref .. i .. "g"])
		gender2 = ine(args[argpref .. i .. "g2"])
	end

	return forms
end

-- Get a list of inflections from the arguments in ARGS based on argument
-- prefix ARGPREF (e.g. "pl" to snarf arguments called "pl", "pl2", etc.,
-- along with "pltr", "pl2tr", etc. and optional gender(s) "plg", "plg2",
-- "pl2g", "pl2g2", "pl3g", "pl3g2", etc.). Label with LABEL (e.g. "plural"),
-- which will appear in the headword. Insert into inflections list
-- INFLS. Optional DEFGENDER is default gender to insert if gender
-- isn't given; otherwise, no gender is inserted. (This is used for
-- singulative forms of collective nouns, and collective forms of singulative
-- nouns, which have different gender from the base form(s).)
local function handle_infl(args, infls, argpref, label, defgender)
	local newinfls = params(args, argpref, defgender)
	newinfls.label = label

	if #newinfls > 0 then
		table.insert(infls, newinfls)
	end
end

-- Handle a basic inflection (e.g. plural, feminine) along with the construct,
-- definite and oblique variants of this inflection. Can also handle the base
-- construct/definite/oblique variants if both ARGPREF and LABEL are given
-- as blank strings. If NOBASE or ARGPREF is blank, skip the base inflection.
local function handle_all_infl(args, infls, argpref, label, nobase)
	if not nobase and argpref ~= "" then
		handle_infl(args, infls, argpref, label)
	end
	local labelsp = label == "" and "" or label .. " "
	handle_infl(args, infls, argpref .. "cons", labelsp .. "construct state")
	handle_infl(args, infls, argpref .. "def", labelsp .. "definite state")
	handle_infl(args, infls, argpref .. "obl", labelsp .. "oblique")
	handle_infl(args, infls, argpref .. "inf", labelsp .. "informal")
end

local function prepend_cat(cats, pos)
	table.insert(cats, 1, tili:getCanonicalName() .. " " .. pos)
end

local function append_cat(cats, pos)
	table.insert(cats, tili:getCanonicalName() .. " " .. pos)
end

-- Handle the case where pl=-, indicating an uncountable noun.
local function handle_noun_plural(args, infls, cats)
	if args["pl"] == "-" then
		table.insert(infls, {label = "usually [[Appendix:Glossariy#uncountable|uncountable]]"})
		append_cat(cats, "uncountable nouns")
	else
		handle_infl(args, infls, "pl", "plural")
	end
end

local valid_genders = list_to_set(
	{"m", "f", "m-s", "f-s", "m-d", "f-d", "p", "m-p", "f-p"})

-- Handle gender in unnamed param 2 and a second gender in param g2,
-- inserting into the list of genders in GENDER. Also insert categories
-- into CATS if the gender is unexpected for the form of the noun
-- or if multiple genders occur. If gender unspecified, default to
-- DEFAULT, which may be omitted.
local function handle_gender(args, genders, cats, default)
	local g = ine(args[2]) or default
	local g2 = ine(args["g2"])

	local function process_gender(g)
		if not g then
			table.insert(genders, "?")
		elseif valid_genders[g] then
			table.insert(genders, g)
		else
			error("Unrecognized gender: " .. g)
		end
	end

	process_gender(g)
	if g2 then
		process_gender(g2)
	end

	if g and g2 then
		append_cat(cats, "terms with multiple genders")
	elseif g == "m" or g == "f" then
		local bosh = ine(args["bosh"]) or ine(args[1])
		if bosh then
			bosh = rsub(reorder_shadda(remove_links(bosh)), UNU .. "?$", "")
			local ends_with_tam = rfind(bosh, "^[^ ]*" .. TAM .. "$") or
				rfind(bosh, "^[^ ]*" .. TAM .. " ")
			if g == "m" and ends_with_tam then
				append_cat(cats, "masculine terms with feminine ending")
			elseif g == "f" and not ends_with_tam then
				append_cat(cats, "feminine terms lacking feminine ending")
			end
		end
	end
end

-- Part-of-speech functions

pos_functions["sifat"] = function(args, genders, infls, cats)
	local function infl(argpref, label)
		handle_infl(args, infls, argpref, label)
	end
	local function allinfl(argpref, label)
		handle_all_infl(args, infls, argpref, label)
	end
	allinfl("", "") -- handle cons, def, obl, inf
	allinfl("f", "feminine")
	allinfl("d", "dual")
	allinfl("cpl", "common plural")
	allinfl("pl", "masculine plural")
	allinfl("fpl", "feminine plural")
	infl("el", "elative")
end

function handle_sing_coll_noun_infls(args, infls, cats)
	local function allinfl(argpref, label, nobase)
		handle_all_infl(args, infls, argpref, label, nobase)
	end
	allinfl("", "") -- handle cons, def, obl, inf
	allinfl("d", "dual")
	allinfl("pauc", "paucal")
	handle_noun_plural(args, infls, cats)
	allinfl("pl", "plural", "nobase")
end

-- Collective and singulative tracking code. FIXME: This is old and may not
-- be needed anymore. ARGS are the template arguments. COLLSING is either
-- "coll" or "sing" according to whether we're dealing with collective or
-- singulative nouns. OTHER is the other of the two possible values of
-- COLLSING. DEFGENDER is the default gender for nouns of this type --
-- "m" for collectives, "f" for singulatives.
function track_coll_sing(args, collsing, other, defgender)
	local g = ine(args[2]) or defgender
	if g ~= defgender then
		track(collsing .. " n" .. defgender)
	end

	local otherg = ine(args[other .. "g"])
	if otherg then
		track(other .. "g")

		if otherg == "m" or otherg == "f" then
			track(other .. "g/" .. otherg)
		else
			track(other .. "g/-")
		end
	end
end

pos_functions["collective nouns"] = function(args, genders, infls, cats)
	prepend_cat(cats, "nouns")
	table.insert(infls, {label = "collective"})

	track_coll_sing(args, "coll", "sing", "m")
	handle_gender(args, genders, cats, "m")
	-- Handle sing= (the corresponding singulative noun) and singg= (its gender)
	handle_infl(args, infls, "sing", "singulative", "f")
	handle_sing_coll_noun_infls(args, infls, cats)
end

pos_functions["singulative nouns"] = function(args, genders, infls, cats)
	prepend_cat(cats, "nouns")
	table.insert(infls, {label = "singulative"})

	track_coll_sing(args, "sing", "coll", "f")
	handle_gender(args, genders, cats, "f")
	-- Handle coll= (the corresponding collective noun) and collg= (its gender)
	handle_infl(args, infls, "coll", "collective", "m")
	handle_sing_coll_noun_infls(args, infls, cats)
end

function handle_noun_infls(args, infls, cats, singonly)
	local function allinfl(argpref, label, nobase)
		handle_all_infl(args, infls, argpref, label, nobase)
	end
	allinfl("", "") -- handle cons, def, obl, inf
	if not singonly then
		allinfl("d", "dual")
		handle_noun_plural(args, infls, cats)
		allinfl("pl", "plural", "nobase")
	end
	allinfl("f", "feminine")
	allinfl("m", "masculine")
end

pos_functions["nouns"] = function(args, genders, infls, cats)
	handle_gender(args, genders, cats)

	handle_noun_infls(args, infls, cats)
end

pos_functions["numerals"] = function(args, genders, infls, cats)
	append_cat(cats, "cardinal numbers")
	handle_gender(args, genders, cats)

	handle_noun_infls(args, infls, cats)
end


pos_functions["proper nouns"] = function(args, genders, infls, cats)
	handle_gender(args, genders, cats)

	handle_noun_infls(args, infls, cats, "singular only")
end


pos_functions["verbal nouns"] = function(args, genders, infls, cats)
	prepend_cat(cats, "nouns")
	handle_gender(args, genders, cats)

	handle_noun_infls(args, infls, cats)
end

pos_functions["pronouns"] = function(args, genders, infls, cats)
	local function allinfl(argpref, label)
		handle_all_infl(args, infls, argpref, label)
	end
	handle_gender(args, genders, cats)
	allinfl("f", "feminine")
end

pos_functions["noun plural forms"] = function(args, genders, infls, cats)
	local function allinfl(argpref, label)
		handle_all_infl(args, infls, argpref, label)
	end
	prepend_cat(cats, "plurals")
	--prepend_cat(cats, "noun forms")
	handle_gender(args, genders, cats, "p")
	allinfl("", "") -- handle cons, def, obl, inf
end

pos_functions["noun dual forms"] = function(args, genders, infls, cats)
	prepend_cat(cats, "duals")
	--prepend_cat(cats, "noun forms")
	handle_gender(args, genders, cats, "m-d")
end

pos_functions["adjective plural forms"] = function(args, genders, infls, cats)
	prepend_cat(cats, "plurals")
	--prepend_cat(cats, "adjective forms")
	handle_gender(args, genders, cats, "m-p")
end

pos_functions["adjective dual forms"] = function(args, genders, infls, cats)
	prepend_cat(cats, "duals")
	--prepend_cat(cats, "adjective forms")
	handle_gender(args, genders, cats, "m-d")
end

pos_functions["plurals"] = function(args, genders, infls, cats)
	handle_gender(args, genders, cats, "p")
end

pos_functions["noun forms"] = function(args, genders, infls, cats)
	handle_gender(args, genders, cats)
end

local valid_forms = list_to_set(
	{"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII",
	 "XIII", "XIV", "XV", "Iq", "IIq", "IIIq", "IVq"})

local function handle_conj_form(args, infls)
	local form = ine(args[2])
	if form then
		if not valid_forms[form] then
			error("Invalid verb conjugation form " .. form)
		end
		table.insert(infls, {label = '[[Appendix:Arabchada feʼli#Form ' .. form .. '|form ' .. form .. ']]'})
	end
end

pos_functions["verb forms"] = function(args, genders, infls, cats)
	handle_conj_form(args, infls)
end

pos_functions["active participles"] = function(args, genders, infls, cats)
	prepend_cat(cats, "participles")
	handle_conj_form(args, infls)
end

pos_functions["passive participles"] = function(args, genders, infls, cats)
	prepend_cat(cats, "participles")
	handle_conj_form(args, infls)
end

return export

-- For Vim, so we get 4-space tabs
-- vim: set ts=4 sw=4 noet: