Kontent qismiga oʻtish

Modul:tili/data: Versiyalar orasidagi farq

Vikilug‘atdan olingan
Kontent oʻchirildi Kontent qoʻshildi
Tahrir izohi yoʻq
Tahrir izohi yoʻq
Teg: Qaytarildi
Qator 1: Qator 1:
local langs = {
local m_scripts = require("Module:scripts")

["abq"] = { "abq", "Abazacha" },
local table = table
["ab"] = { "ab", "Abxazcha" },
local insert = table.insert
["av"] = { "av", "Avarcha" },
local u = require("Module:string/char")
["ave"] = { "ave", "Avestocha" },

["agx"] = { "agx", "Agulcha" },
local export = {}
["aja"] = { "aja", "Adja", "", "(Sudan)" },

["ajg"] = { "ajg", "Adja", "", "(Benin)" },
-- UTF-8 encoded strings for some commonly-used diacritics.
["ady"] = { "ady", "Adigeycha" },
local c = {
["az"] = { "az", "Ozarbayjoncha" },
grave = u(0x0300),
["az.cyr"] = { "az", "Ozarbayjoncha", "", "(kir.)" },
acute = u(0x0301),
["az.lat"] = { "az", "Ozarbayjoncha", "", "(lat.)" },
circ = u(0x0302),
["az.arab"] = { "az", "Ozarbayjoncha", "", "(arabsk.)" },
tilde = u(0x0303),
["ay"] = { "ay", "Aymarcha" },
macron = u(0x0304),
["ain"] = { "ain", "Ayncha" },
overline = u(0x0305),
["ain.lat"] = { "ain", "Ayncha", "", "(lat)" },
breve = u(0x0306),
["ain.kana"] = { "ain", "Ayncha", "", "(kana)" },
dotabove = u(0x0307),
["akk"] = { "akk", "Akkadcha", "†" },
diaer = u(0x0308),
["akz"] = { "akz", "Alabamcha" },
ringabove = u(0x030A),
["sq"] = { "sq", "Albancha" },
dacute = u(0x030B),
["als"] = { "als", "Alemancha" },
caron = u(0x030C),
["ale"] = { "ale", "Aleutcha" },
lineabove = u(0x030D),
["alt"] = { "alt", "Oltoycha" },
dgrave = u(0x030F),
["alr"] = { "alr", "Alyutorcha" },
invbreve = u(0x0311),
["am"] = { "am", "Amxarcha" },
commaabove = u(0x0313),
["en"] = { "en", "Inglizcha" },
revcommaabove = u(0x0314),
["ar"] = { "ar", "Arabcha" },
dotbelow = u(0x0323),
["an"] = { "an", "Aragoncha" },
diaerbelow = u(0x0324),
["arc.syr"] = { "arc", "Arameycha", "", "(sir.)" },
ringbelow = u(0x0325),
["arc.jud"] = { "arc", "Arameycha", "", "(iud.)" },
cedilla = u(0x0327),
["arp"] = { "arp", "Arapaxo" },
ogonek = u(0x0328),
["arn"] = { "arn", "Araukancha" },
brevebelow = u(0x032E),
["hy"] = { "hy", "Armancha" },
macronbelow = u(0x0331),
["rup"] = { "rup", "Arumincha" },
perispomeni = u(0x0342),
["aqc"] = { "aqc", "Archincha" },
ypogegrammeni = u(0x0345),
["asm"] = { "asm", "Assamcha" },
CGJ = u(0x034F), -- combining grapheme joiner
["aii"] = { "aii", "Assiriycha" },
zigzag = u(0x035B),
["ast"] = { "ast", "Asturiycha" },
dbrevebelow = u(0x035C),
["auj"] = { "auj", "Audjila" },
dmacron = u(0x035E),
["aar"] = { "aar", "Afarcha" },
dtilde = u(0x0360),
["af"] = { "af", "Afrikaans" },
dinvbreve = u(0x0361),
["ace"] = { "ace", "Achexcha" },
small_a = u(0x0363),
["bar"] = { "bar", "Bavarcha" },
small_e = u(0x0364),
["ban"] = { "ban", "Baliycha" },
small_i = u(0x0365),
["bm"] = { "bm", "Bambara" },
small_o = u(0x0366),
["bjn"] = { "bjn", "Bandjarcha" },
small_u = u(0x0367),
["eu"] = { "eu", "Baskcha" },
kamora = u(0x0484),
["ba"] = { "ba", "Boshqirdcha" },
dasiapneumata = u(0x0485),
["be"] = { "be", "Beloruscha" },
psilipneumata = u(0x0486),
["bal"] = { "bal", "Belujcha" },
kashida = u(0x0640),
["bem"] = { "bem", "Bemba" },
fathatan = u(0x064B),
["bn"] = { "bn", "Bengalcha" },
dammatan = u(0x064C),
["bcl"] = { "bcl", "Bikolcha", "", "(markaziy)" },
kasratan = u(0x064D),
["byn"] = { "byn", "Bilin" },
fatha = u(0x064E),
["my"] = { "my", "Birmancha" },
damma = u(0x064F),
["bib"] = { "bib", "Bisa" },
kasra = u(0x0650),
["bis"] = { "bis", "Bislama" },
shadda = u(0x0651),
["bpy"] = { "bpy", "Bishnupriya-manipuri" },
sukun = u(0x0652),
["akm"] = { "akm", "Bo" },
hamzaabove = u(0x0654),
["bg"] = { "bg", "Bolgarcha" },
nunghunna = u(0x0658),
["bs"] = { "bs", "Bosniycha" },
zwarakay = u(0x0659),
["bph"] = { "bph", "Botlixcha" },
smallv = u(0x065A),
["br"] = { "br", "Bretoncha" },
superalef = u(0x0670),
["bdk"] = { "bdk", "Buduxcha" },
udatta = u(0x0951),
["bua"] = { "bua", "Buryatcha" },
anudatta = u(0x0952),
["bug"] = { "bug", "Bugiycha" },
psili = u(0x1FBD),
["vai"] = { "vai", "Vai" },
coronis = u(0x1FBF),
["cy"] = { "cy", "Valliycha" },
ZWNJ = u(0x200C), -- zero width non-joiner
["wa"] = { "wa", "Valloncha" },
ZWJ = u(0x200D), -- zero width joiner
["war"] = { "war", "Varaycha" },
RSQuo = u(0x2019), -- right single quote
["hu"] = { "hu", "Mojarcha" },
VS01 = u(0xFE00), -- variation selector 1
["ven"] = { "ven", "Venda" },
-- Punctuation for the standardChars field.
["vec"] = { "vec", "Venetcha" },
-- Note: characters are literal (i.e. no magic characters).
["vep"] = { "vep", "Vepscha" },
punc = " ',-‐‑‒–—…∅",
["hsb"] = { "hsb", "Verxnelujitcha" },
-- Range covering all diacritics.
["vot"] = { "vot", "Vodcha" },
diacritics = u(0x300) .. "-" .. u(0x34E) ..
["vo"] = { "vo", "Volapyuk", "i" },
u(0x350) .. "-" .. u(0x36F) ..
["wo"] = { "wo", "Volof" },
u(0x1AB0) .. "-" .. u(0x1ACE) ..
["stq"] = { "stq", "Vostochnofrizcha" },
u(0x1DC0) .. "-" .. u(0x1DFF) ..
["vro"] = { "vro", "Virucha" },
u(0x20D0) .. "-" .. u(0x20F0) ..
["vi"] = { "vi", "Vetnamcha" },
u(0xFE20) .. "-" .. u(0xFE2F),
["haw"] = { "haw", "Gavayаcha" },
}
["gag"] = { "gag", "Gagauzcha" },
-- Braille characters for the standardChars field.
["ht"] = { "ht", "Gaityancha" },
local braille = {}
["gl"] = { "gl", "Galisiyаcha" },
["gan"] = { "gan", "Gan" },
for i = 0x2800, 0x28FF do
insert(braille, u(i))
["ze"] = { "ze", "Genuezcha" },
end
["gez"] = { "gez", "Geez" },
c.braille = table.concat(braille)
["goe"] = { "goe", "Gongdu" },
export.chars = c
["mrj"] = { "mrj", "Gornomariycha" },

["got"] = { "got", "Gotcha", "†" },
-- PUA characters, generally used in sortkeys.
["xcl"] = { "xcl", "Grabar" },
-- Note: if the limit needs to be increased, do so in powers of 2 (due to the way memory is allocated for tables).
["kl"] = { "kl", "Grenlandcha" },
local p = {}
["el"] = { "el", "Grechecha" },
for i = 1, 32 do
["el.dhi"] = { "el", "Grechecha", "", "(demot.)" },
p[i] = u(0xF000+i-1)
["el.kat"] = { "el", "Grechecha", "", "(kafar.)" },
end
["ka"] = { "ka", "Gruzincha" },
export.puaChars = p
["gn"] = { "gn", "Guarani" },

["gu"] = { "gu", "Gujarati" },
local s = {}
["gd"] = { "gd", "Gelcha" },
-- These values are placed here to make it possible to synchronise a group of languages without the need for a dedicated function module.
["dar"] = { "dar", "Dargincha" },

["prs"] = { "prs", "Dari" },
s["cau-Cyrl-displaytext"] = {
["da"] = { "da", "Datcha" },
["dz"] = { "dz", "Dzong-ke" },
from = {"[IlΙІӀ]", ""},
["dv"] = { "dv", "Divexi" },
to = {"ӏ", ""}
}
["dlg"] = { "dlg", "Dolgancha" },

["dgo"] = { "dgo", "Dogri" },
s["cau-Cyrl-entryname"] = {
["ang"] = { "ang", "Qadimgi inglizcha", "†" },
remove_diacritics = c.grave .. c.acute .. c.macron,
["obt"] = { "obt", "Qadimgi bretoncha", "†" },
from = s["cau-Cyrl-displaytext"].from,
["goh"] = { "goh", "Qadimgi yuqori nemischa", "†" },
to = s["cau-Cyrl-displaytext"].to
["grc"] = { "grc", "Qadim grekcha", "†" },
}
["hbo"] = { "hbo", "Qadimgi yevreycha", "†" },

["non"] = { "non", "Qadimgi islandcha", "†" },
s["cau-Latn-entryname"] = {remove_diacritics = c.grave .. c.acute .. c.macron}
["peo"] = { "peo", "Qadimgi forscha", "†" },

["orv"] = { "orv", "Qadimgi ruscha", "†" },
s["Cyrs-entryname"] = {remove_diacritics = c.grave .. c.acute .. c.diaer .. c.kamora .. c.dasiapneumata .. c.psilipneumata}
["fic-drw"] = { "", "Drou", "f", "" },

["dng"] = { "dng", "Dungancha" },
s["Cyrs-sortkey"] = {
["egy"] = { "egy", "Misrcha", "†" },
from = {
["arz"] = { "arz", "Misrcha arabcha" },
"ї", "оу", -- 2 chars
["sgs"] = { "sgs", "Jemaytcha" },
"ґ", "ꙣ", "є", "[ѕꙃꙅ]", "ꙁ", "[іꙇ]", "[ђꙉ]", "[ѻꙩꙫꙭꙮꚙꚛ]", "ꙋ", "[ѡѿꙍѽ]", "ꙑ", "ѣ", "ꙗ", "ѥ", "ꙕ", "[ѧꙙ]", "[ѩꙝ]", "ꙛ", "ѫ", "ѭ", "ѯ", "ѱ", "ѳ", "ѵ", "ҁ" -- 1 char
["vls"] = { "vls", "Gʻarbiy flamandcha" },
},
["zza"] = { "zza", "Zazaki" },
to = {
["zu"] = { "zu", "Zulu" },
["he"] = { "he", "Ivrit" },
"и" .. p[1], "у",
"г" .. p[1], "д" .. p[1], "е", "ж" .. p[1], "з", "и" .. p[1], "и" .. p[2], "о", "у", "х" .. p[1], "ы", "ь" .. p[1], "ь" .. p[2], "ь" .. p[3], "ю", "я", "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4], "я" .. p[5], "я" .. p[6], "я" .. p[7], "я" .. p[8], "я" .. p[9]
["ibo"] = { "ibo", "Igbo" },
},
["yi"] = { "yi", "Idish" },
}
["io"] = { "io", "Ido", "i" },

["izh"] = { "izh", "Ijorcha" },
s["Grek-sortkey"] = {
["ilo"] = { "ilo", "Ilokancha" },
remove_diacritics = c.grave .. c.acute .. c.diaer .. c.caron .. c.commaabove .. c.revcommaabove .. c.macron .. c.breve .. c.diaerbelow .. c.brevebelow .. c.perispomeni .. c.ypogegrammeni,
["smn"] = { "smn", "Inari-saamcha" },
["inh"] = { "inh", "Ingushcha" },
from = {"ϝ", "ͷ", "ϛ", "ͱ", "ϻ", "ϟ", "ϙ", "ς", "ϡ", "ͳ"},
to = {"ε" .. p[1], "ε" .. p[2], "ε" .. p[3], "ζ" .. p[1], "π" .. p[1], "π" .. p[2], "π" .. p[2], "σ", "ω" .. p[1], "ω" .. p[1]}
["id"] = { "id", "Indonezcha" },
}
["ia"] = { "ia", "Interlingva" , "i" },

["ie"] = { "ie", "Interlingve", "i" },
s["Jpan-standardchars"] = -- exclude ぢづヂヅ
["iu"] = { "iu", "Inuktitut" },
"ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちっつてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろん" ..
["ik"] = { "ik", "Inupiak" },
"ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチッツテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロン"
["ga"] = { "ga", "Irlandcha" },

["is"] = { "is", "Irlandcha" },
local jpx_displaytext = {
["es"] = { "es", "Ispancha" },
["it"] = { "it", "Italyancha" },
from = {"", ""},
["itl"] = { "itl", "Itelmencha" },
to = {"", ""}
}
["ith.lat"] = { "ith", "Ifkuil", "i", "(lat.)" },

["yo"] = { "yo", "Yoruba" },
s["jpx-displaytext"] = {
["kbd"] = { "kbd", "Kabardin-cherkescha" },
Jpan = jpx_displaytext,
["kab"] = { "kab", "Qobilcha" },
Hani = jpx_displaytext,
["kea"] = { "kea", "Kabuverdyanu" },
Hrkt = jpx_displaytext,
["kk"] = { "kk", "Qozoqcha" },
Hira = jpx_displaytext,
["kk.cyr"] = { "kk", "Qozoqcha", "", "(kir.)" },
Kana = jpx_displaytext
["kk.lat"] = { "kk", "Qozoqcha", "", "(lat.)" },
-- not Latn or Brai
["kk.arab"] = { "kk", "Qozoqcha", "", "(arab.)" },
}
["xal"] = { "xal", "Kalmicha" },

["rmq"] = { "rmq", "Kalo" },
s["jpx-entryname"] = s["jpx-displaytext"]
["kn"] = { "kn", "Kannada" },

["pam"] = { "pam", "Kapampangancha" },
s["jpx-sortkey"] = {
["kdr"] = { "kdr", "Qaraimcha" },
Jpan = "Jpan-sortkey",
["kaa"] = { "kaa", "Qoraqalpoqcha" },
Hani = "Hani-sortkey",
["krc"] = { "krc", "Qorachoy-bolqorcha" },
Hrkt = "Hira-sortkey", -- sort general kana by normalizing to Hira
["krl"] = { "krl", "Karelcha" },
Hira = "Hira-sortkey",
["ca"] = { "ca", "Katalancha" },
Kana = "Kana-sortkey",
["kas"] = { "kas", "Kashmiriy" },
Latn = {remove_diacritics = c.tilde .. c.macron .. c.diaer}
["csb"] = { "csb", "Kashubcha" },
}
["qya"] = { "qya", "Kvenya", "f" },

["qu"] = { "qu", "Kechua" },
s["jpx-translit"] = {
["kg"] = { "kg", "Kikongo" },
Hrkt = "Hrkt-translit",
["kik"] = { "kik", "Kikuyyu" },
Hira = "Hrkt-translit",
["rw"] = { "rw", "Kinyaruanda" },
Kana = "Hrkt-translit"
["ky"] = { "ky", "Qirgʻizcha" },
}
["gil"] = { "gil", "Kiribati" },

["run"] = { "run", "Kirundi" },
local HaniChars = m_scripts.getByCode("Hani"):getCharacters()
["zh"] = { "zh", "Xitoycha" },
-- `漢字(한자)`→`漢字`
["zh-tw"] = { "", "Xitoycha", "", "(anʼana.)" },
-- `가-나-다`→`가나다`, `가--나--다`→`가-나-다`
["wuu"] = { "", "Xitoycha", "", "(u)" },
-- `온돌(溫突/溫堗)`→`온돌` ([[ondol]])
["zh-cn"] = { "", "Xitoycha", "", "(sodda.)" },
s["Kore-entryname"] = {
["nan"] = { "", "Xitoycha", "", "(janubiy mincha)" },
remove_diacritics = u(0x302E) .. u(0x302F),
["sms"] = { "sms", "Koltta-saamcha" },
from = {"([" .. HaniChars .. "])%(.-%)", "^%-", "%-$", "%-(%-?)", "\1", "%([" .. HaniChars .. "/]+%)"},
["kom"] = { "kom", "Komi-ziryancha" },
["koi"] = { "koi", "Komi-permyatcha" },
to = {"%1", "\1", "\1", "%1", "-"}
}
["kok"] = { "kok", "Konkani" },

["cop"] = { "cop", "Koptcha" },
s["Lisu-sortkey"] = {
["ko"] = { "ko", "Koreyscha" },
["kw"] = { "kw", "Korncha" },
from = {"𑾰"},
["co"] = { "co", "Korsikancha" },
to = {"" .. p[1]}
}
["kpy"] = { "kpy", "Koryakcha" },

["xh"] = { "xh", "Kosa" },
s["Mong-displaytext"] = {
["cr"] = { "cr", "Kri" },
from = {"([ᠨ-ᡂᡸ])ᠶ([ᠨ-ᡂᡸ])", "([ᠠ-ᡂᡸ])ᠸ([^᠋ᠠ-ᠧ])", "([ᠠ-ᡂᡸ])ᠸ$"},
["crh"] = { "crh", "Qrim-tatarcha" },
["jct"] = { "jct", "Qrimchoqcha" },
to = {"%1ᠢ%2", "%1ᠧ%2", "%1ᠧ"}
}
["kum"] = { "kum", "Qumiqcha" },

["ku"] = { "ku", "Qurdcha" },
s["Mong-entryname"] = s["Mong-displaytext"]
["ku.cyr"] = { "ku", "Qurdcha", "", "(kir.)" },

["kmr"] = { "kmr", "Qurdcha (kurmandji)", "", "(lat.)" },
s["Polyt-entryname"] = {
["ckb"] = { "ckb", "Qurdcha (sorani)" },
remove_diacritics = c.macron .. c.breve .. c.dbrevebelow,
["km"] = { "km", "Kxmercha" },
from = {"[" .. c.RSQuo .. c.psili .. c.coronis .. "]"},
["lad"] = { "lad", "Ladino" },
["lld"] = { "lld", "Ladincha" },
to = {"'"}
}
["lzz"] = { "lzz", "Lazcha" },

["lkt"] = { "lkt", "Lakota" },
s["roa-oil-sortkey"] = {
["lbe"] = { "lbe", "Lakcha" },
remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove .. c.cedilla .. "'",
["lo"] = { "lo", "Laoscha" },
["ltg"] = { "ltg", "Latgalcha" },
from = {"æ", "œ"},
["la"] = { "la", "Latincha" },
to = {"ae", "oe"}
}
["lv"] = { "lv", "Latishcha" },

["lez"] = { "lez", "Lezgincha" },
s["Tibt-displaytext"] = {
["liv"] = { "liv", "Livcha" },
from = {"ༀ", "༌", "།།", "༚༚", "༚༝", "༝༚", "༝༝", "ཷ", "ཹ", "ེེ", "ོོ"},
["li"] = { "li", "Limburgcha" },
to = {"ཨོཾ", "་", "༎", "༛", "༟", "࿎", "༞", "ྲཱྀ", "ླཱྀ", "ཻ", "ཽ"}
["ln"] = { "ln", "Lingala" },
}
["lt"] = { "lt", "Litovcha" },

["jbo"] = { "jbo", "Lojban", "i" },
s["Tibt-entryname"] = s["Tibt-displaytext"]
["lmo"] = { "lmo", "Lombardcha" },

["lug"] = { "lug", "Luganda" },
s["wen-sortkey"] = {
["lb"] = { "lb", "Lyuksemburgcha" },
from = {
["mad"] = { "mad", "Madurcha" },
"l", -- Ensure "l" comes after "ł".
["mzn"] = { "mzn", "Mazenderancha" },
"b́", "č", "ć", "dź", "ě", "f́", "ch", "ł", "ḿ", "ń", "ó", "ṕ", "ř", "ŕ", "š", "ś", "ẃ", "ž", "ż", "ź"
["mak"] = { "mak", "Makasarcha" },
},
["mk"] = { "mk", "Makedoncha" },
to = {
["mg"] = { "mg", "Malagasiyacha" },
"l" .. p[1],
["ms"] = { "ms", "Malaycha" },
"b" .. p[1], "c" .. p[1], "c" .. p[2], "d" .. p[1], "e" .. p[1], "f" .. p[1], "h" .. p[1], "l", "m" .. p[1], "n" .. p[1], "o" .. p[1], "p" .. p[1], "r" .. p[1], "r" .. p[2], "s" .. p[1], "s" .. p[2], "w" .. p[1], "z" .. p[1], "z" .. p[2], "z" .. p[3]
["ml"] = { "ml", "Malayalam" },
}
["mt"] = { "mt", "Maltiycha" },
}
["man.arab"] = { "man", "Mandingo", "", "(arabsk.)" },

["man.lat"] = { "man", "Mandingo", "", "(lat.)" },
export.shared = s
["mns"] = { "mns", "Mansiycha" },

["mnc"] = { "mnc", "Manchjurcha" },
-- Short-term solution to override the standard substitution process, by forcing the module to substitute the entire text in one pass. This results in any PUA characters that are used as stand-ins for formatting being handled by the language-specific substitution process, which is usually undesirable.
["mi"] = { "mi", "Maori" },
-- This override is provided for languages which use formatting between strings of text which might need to interact with each other (e.g. Korean 값이 transliterates as "gaps-i", but [[값]] has the formatting '''값'''[[-이]]. The normal process would split the text at the second '''.)
["mr"] = { "mr", "Maratxi" },
export.contiguous_substitution = {
["chm"] = { "chm", "Mariycha" },
["mas"] = { "mas", "Masaycha" },
["ja"] = "tr",
["xmf"] = { "xmf", "Megrelcha" },
["jje"] = "tr",
["ulk"] = { "ulk", "Meriam" },
["ko"] = "tr",
["gmy"] = { "gmy", "Mikencha", "†" },
["ko-ear"] = "tr",
["omn"] = { "omn", "Minoycha", "†" },
["ru"] = "tr",
["cdo"] = { "cdo", "Min-dun" },
["th-new"] = "tr",
["mwl"] = { "mwl", "Mirandcha" },
["sa"] = "tr",
["moh"] = { "moh", "Mogaukcha" },
["zkt"] = "tr",
}
["mdf"] = { "mdf", "Mokshancha" },

["mo"] = { "mo", "Moldavcha" },
-- Code aliases. The left side is the alias and the right side is the canonical code. NOTE: These are gradually
["mn"] = { "mn", "Mongolcha" },
-- being deprecated, so should not be added to on a permanent basis. Temporary additions are permitted under reasonable
["mos"] = { "mos", "More" },
-- circumstances (e.g. to facilitate changing a language's code). When an alias is no longer used, it should be removed.
["gv"] = { "gv", "Mencha" },
-- Aliases in this table are tracked at [[Wiktionary:Tracking/languages/LANG]]; see e.g.
["hmn"] = { "hmn", "Myao" },
-- [[Special:WhatLinksHere/Wiktionary:Tracking/languages/RL.]] for the `RL.` alias.
["nv"] = { "nv", "Navaxo" },
export.aliases = {
["naq"] = { "naq", "Nama" },
["gld"] = { "gld", "Nanaycha" },
["CL."] = "la-cla",
["nah"] = { "nah", "Nauatl" },
["EL."] = "la-ecc",
["na"] = { "na", "Nauru" },
["LL."] = "la-lat",
["nio"] = { "nio", "Nganasancha" },
["ML."] = "la-med",
["nap"] = { "nap", "Neapolitano-kalabriycha" },
["NL."] = "la-new",
["new"] = { "new", "Nevarcha" },
["RL."] = "la-ren",
["de"] = { "de", "Nemischa" },
["VL."] = "la-vul",
["yrk"] = { "yrk", "Nenecha" },
["prv"] = "oc-pro",
}
["ne"] = { "ne", "Nepalcha" },

["niv"] = { "niv", "Nivxcha" },
-- Codes which are tracked. Note that all aliases listed above are also tracked, so should not be duplicated here.
["nl"] = { "nl", "Niderlandcha" },
-- Tracking uses the same mechanism described above in the comment above `export.aliases`.
["dsb"] = { "dsb", "Nijnelujitcha" },
export.track = {
["nds-nl"] = { "nds-nl", "Nijnenemecha (Niderlandi)" },
-- Codes duplicated between full and etymology-only languages.
["nds"] = { "nds", "Nijnesaksoncha" },
["nov"] = { "nov", "Novial" },
["lzh-lit"] = true,
-- Languages actively being converted to families.
["nog"] = { "nog", "Nogaycha" },
["no"] = { "no", "Norvejcha" },
["bh"] = true, -- inc-bih
["nb"] = { "nb", "Norvejcha (bukmol)" },
["nan"] = true, -- zhx-nan
["nn"] = { "nn", "Norvejcha (nyunorsk)" },
["roa-nor"] = { "roa-nor", "Normandcha" },
["pih"] = { "pih", "Norfolkcha" },
["ii"] = { "ii", "Nosu" },
["oj"] = { "oj", "Odjibva" },
["oc"] = { "oc", "Oksitancha" },
["art-oou"] = { "", "Oou", "i" },
["or"] = { "or", "Oriya" },
["om"] = { "om", "Oromo" },
["os"] = { "os", "Osetincha" },
["ota"] = { "ota", "Osmancha", "†" },
["pau"] = { "pau", "Palau" },
["pi"] = { "pi", "Pali" },
["pag"] = { "pag", "Pangasinancha" },
["pa"] = { "pa", "Pandjabi" },
["pap"] = { "pap", "Papyamentu" },
["nso"] = { "nso", "Pedi" },
["fa"] = { "fa", "Forscha" },
["pcd"] = { "pcd", "Pikardcha" },
["pox"] = { "pox", "Polabcha", "†" },
["pl"] = { "pl", "Polcha" },
["pt"] = { "pt", "Portugalcha" },
["psl"] = { "", "Praslavyancha", "r" },
["ppol"] = { "", "Protopolineziycha" },
["prg"] = { "prg", "Pruscha", "†" },
["ps"] = { "ps", "Pushtu" },
["pms"] = { "pms", "Pyemontcha" },
["rap"] = { "rap", "Rapanuycha" },
["rm"] = { "rm", "Retoromancha" },
["ksh"] = { "ksh", "Ripuarcha" },
["rmy"] = { "rmy", "Romani" },
["roh"] = { "roh", "Romanshcha" },
["ro"] = { "ro", "Rumincha" },
["ru"] = { "ru", "Ruscha" },
["rue"] = { "rue", "Rusincha" },
["rut"] = { "rut", "Rutulcha" },
["sjd"] = { "sjd", "Saamcha (kildincha)" },
["sm"] = { "sm", "Samoa:" },
["sg"] = { "sag", "Sango" },
["sa"] = { "sa", "Sanskrit" },
["sat"] = { "sat", "Santali" },
["sc"] = { "sc", "Sardincha" },
["sva"] = { "sva", "Svancha" },
["ss"] = { "ss", "Svati" },
["ceb"] = { "ceb", "Sebuano" },
["se"] = { "se", "Severnosaamcha" },
["ykg"] = { "ykg", "Severnoyukagircha" },
["sel"] = { "sel", "Selkupcha" },
["sr"] = { "sr", "Serbcha", "", "(kir.)" },
["sr-l"] = { "sr", "Serbcha", "", "(lat.)" },
["sh"] = { "sh", "Serbskoxorvatcha" },
["st"] = { "st", "Sesoto" },
["szl"] = { "szl", "Silezcha" },
["si"] = { "si", "Singalcha" },
["sjn"] = { "", "Sindarin", "f" },
["sd"] = { "sd", "Sindxi" },
["syc"] = { "syc", "Siriycha" },
["scn"] = { "scn", "Sitsiliycha" },
["sk"] = { "sk", "Slovatcha" },
["sl"] = { "sl", "Slovencha" },
["slovio-c"] = { "slovio", "Slovio", "i", "(kir.)" },
["slovio-l"] = { "slovio", "Slovio", "i", "(lat.)" },
["sob"] = { "sob", "Sobey" },
["xog"] = { "xog", "Soga" },
["sol"] = { "sol", "Solresol", "i" },
["so"] = { "so", "Somaliycha" },
["snk"] = { "snk", "Soninke" },
["srn"] = { "srn", "Sranan-tongo" },
["enm"] = { "enm", "Oʻrta inglizcha" },
["xbm"] = { "xbm", "Oʻrta bretoncha" },
["gmh"] = { "gmh", "Oʻrta yuqori nemischa", "†" },
["frm"] = { "frm", "Oʻrta fransuzcha" },
["oen"] = { "oen", "Eski inglizcha" },
["cu"] = { "cu", "Eski slavyancha", "†" },
["cu-Cyrl"] = { "cu", "Eski slavyancha", "†", "(kirillitsa.)" },
["cu-Glag"] = { "cu", "Eski slavyancha", "†", "(glagolitsa)" },
["fro"] = { "fro", "Eski fransuzcha" },
["sw"] = { "sw", "Suaxili" },
["suk"] = { "suk", "Sukuma" },
["su"] = { "su", "Sundancha" },
["tab"] = { "tab", "Tabasarancha" },
["tl"] = { "tl", "Tagalcha" },
["tg"] = { "tg", "Tojikcha" },
["ty"] = { "ty", "Taityancha" },
["th"] = { "th", "Taycha" },
["tly"] = { "tly", "Talishcha" },
["tmh"] = { "tmh", "Tamashek" },
["ta"] = { "ta", "Tamilcha" },
["tt"] = { "tt", "Tatarcha" },
["tt.cyr"] = { "tt", "Tatarcha", "", "(kir.)" },
["tt.lat"] = { "tt", "Tatarcha", "", "(lot.)" },
["ttt"] = { "ttt", "Tatcha" },
["te"] = { "te", "Telugu" },
["tet"] = { "tet", "Tetum" },
["bo"] = { "bo", "Tibetcha" },
["tig"] = { "tig", "Tigre" },
["tir"] = { "tir", "Tigrinya" },
["art"] = { "art", "Tokipona", "i" },
["tpi"] = { "tpi", "Tok-pisin" },
["ksd"] = { "ksd", "Tolai" },
["to"] = { "to", "Tonga" },
["kim"] = { "kim", "Tofalarcha" },
["tn"] = { "tn", "Tsvana" },
["tso"] = { "tso", "Tsonga" },
["tvl"] = { "tvl", "Tuvalu" },
["tyv"] = { "tyv", "Tuvacha" },
["tcy"] = { "tcy", "Tulu" },
["tr"] = { "tr", "Turkcha" , "", "", "Turkey" },
["tk"] = { "tk", "Turkmancha" },
["uby"] = { "uby", "Ubixcha", "†" },
["uga"] = { "uga", "Ugaritcha", "†" },
["udi"] = { "udi", "Udincha" },
["udm"] = { "udm", "Udmurtcha" },
["ug"] = { "ug", "Uygʻurcha" },
["uk"] = { "uk", "Ukraincha" },
["uz"] = { "uz", "Oʻzbekcha" },
["ulc"] = { "ulc", "Ulchcha" },
["ur"] = { "ur", "Urdu" },
["fo"] = { "fo", "Farercha" },
["fj"] = { "fj", "Fidji" },
["fi"] = { "fi", "Fincha" },
["fon"] = { "fon", "Fon" },
["frk"] = { "frk", "Frankcha" },
["fr"] = { "fr", "Fransuzcha" },
["fy"] = { "fy", "Frizcha" },
["fur"] = { "fur", "Friulcha" },
["ff"] = { "", "Fula" },
["kjh"] = { "kjh", "Xakascha" },
["hak"] = { "hak", "Xakka" },
["kca"] = { "kca", "Xantiycha" },
["ha"] = { "ha", "Xausa" },
["ha.lat"] = { "ha", "Xausa", "", "(lat.)" },
["ha.arab"] = { "ha", "Xausa", "", "(arab.)" },
["hit"] = { "hit", "Xettcha", "†" },
["hi"] = { "hi", "Hindi" },
["hr"] = { "hr", "Xorvatcha" },
["tkr"] = { "tkr", "Saxurcha" },
["cel"] = { "", "Selincha", "f" },
["chu-ru"] = { "chu-ru", "Cherkov-slavyancha" },
["rom"] = { "rom", "Sigancha" },
["ch"] = { "ch", "Chamorro" },
["twi"] = { "twi", "Chvi" },
["chr"] = { "chr", "Cheroki" },
["ce"] = { "ce", "Chechencha" },
["cs"] = { "cs", "Cheshcha" },
["cv"] = { "cv", "Chuvashcha" },
["ckt"] = { "ckt", "Chukotcha" },
["chy"] = { "chy", "Shayencha" },
["sv"] = { "sv", "Shvedcha" },
["xsr"] = { "xsr", "Sherpcha" },
["shp"] = { "shp", "Shipibo" },
["sn"] = { "sn", "Shona" },
["cjs"] = { "cjs", "Shorcha" },
["sco"] = { "sco", "Shotlandcha" },
["sux"] = { "sux", "Shumercha", "†" },
["ewe"] = { "ewe", "Eve" },
["evn"] = { "evn", "Evenkiycha" },
["eve"] = { "eve", "Evencha" },
["eml"] = { "eml", "Emiliano-romanolcha" },
["myv"] = { "myv", "Erzyancha" },
["eo"] = { "eo", "Esperanto", "i" },
["et"] = { "et", "Estoncha" },
["ext"] = { "ext", "Estremadurcha" },
["yux"] = { "yux", "Janubiy yukagircha" },
["yua"] = { "yua", "Yukatekcha" },
["yue"] = { "yue", "Yue" },
["jv"] = { "jv", "Yavancha" },
["sah"] = { "sah", "Yoqutcha" },
["ium"] = { "ium", "Yao" },
["ja"] = { "ja", "Yaponcha" }
}
}


return langs;
return export

2024-yil 6-noyabr, 01:23 dagi koʻrinishi

Bu modul uchun Modul:tili/data/doc nomli hujjat sahifasini yaratishingiz mumkin

local m_scripts = require("Module:scripts")

local table = table
local insert = table.insert
local u = require("Module:string/char")

local export = {}

-- UTF-8 encoded strings for some commonly-used diacritics.
local c = {
	grave			= u(0x0300),
	acute			= u(0x0301),
	circ			= u(0x0302),
	tilde			= u(0x0303),
	macron			= u(0x0304),
	overline		= u(0x0305),
	breve			= u(0x0306),
	dotabove		= u(0x0307),
	diaer			= u(0x0308),
	ringabove		= u(0x030A),
	dacute			= u(0x030B),
	caron			= u(0x030C),
	lineabove		= u(0x030D),
	dgrave			= u(0x030F),
	invbreve		= u(0x0311),
	commaabove		= u(0x0313),
	revcommaabove	= u(0x0314),
	dotbelow		= u(0x0323),
	diaerbelow		= u(0x0324),
	ringbelow		= u(0x0325),
	cedilla			= u(0x0327),
	ogonek			= u(0x0328),
	brevebelow		= u(0x032E),
	macronbelow		= u(0x0331),
	perispomeni		= u(0x0342),
	ypogegrammeni	= u(0x0345),
	CGJ				= u(0x034F), -- combining grapheme joiner
	zigzag			= u(0x035B),
	dbrevebelow		= u(0x035C),
	dmacron			= u(0x035E),
	dtilde			= u(0x0360),
	dinvbreve		= u(0x0361),
	small_a			= u(0x0363),
	small_e			= u(0x0364),
	small_i			= u(0x0365),
	small_o			= u(0x0366),
	small_u			= u(0x0367),
	kamora          = u(0x0484),
	dasiapneumata   = u(0x0485),
	psilipneumata   = u(0x0486),
	kashida			= u(0x0640),
	fathatan		= u(0x064B),
	dammatan		= u(0x064C),
	kasratan		= u(0x064D),
	fatha			= u(0x064E),
	damma			= u(0x064F),
	kasra			= u(0x0650),
	shadda			= u(0x0651),
	sukun			= u(0x0652),
	hamzaabove		= u(0x0654),
	nunghunna		= u(0x0658),
	zwarakay        = u(0x0659),
	smallv			= u(0x065A),
	superalef		= u(0x0670),
	udatta			= u(0x0951),
	anudatta		= u(0x0952),
	psili			= u(0x1FBD),
	coronis			= u(0x1FBF),
	ZWNJ			= u(0x200C), -- zero width non-joiner
	ZWJ				= u(0x200D), -- zero width joiner
	RSQuo			= u(0x2019), -- right single quote
	VS01			= u(0xFE00), -- variation selector 1
	-- Punctuation for the standardChars field.
	-- Note: characters are literal (i.e. no magic characters).
	punc			= " ',-‐‑‒–—…∅",
	-- Range covering all diacritics.
	diacritics		= u(0x300) .. "-" .. u(0x34E) ..
						u(0x350) .. "-" .. u(0x36F) ..
						u(0x1AB0) .. "-" .. u(0x1ACE) ..
						u(0x1DC0) .. "-" .. u(0x1DFF) ..
						u(0x20D0) .. "-" .. u(0x20F0) ..
						u(0xFE20) .. "-" .. u(0xFE2F),
}
-- Braille characters for the standardChars field.
local braille = {}
for i = 0x2800, 0x28FF do
	insert(braille, u(i))
end
c.braille = table.concat(braille)
export.chars = c

-- PUA characters, generally used in sortkeys.
-- Note: if the limit needs to be increased, do so in powers of 2 (due to the way memory is allocated for tables).
local p = {}
for i = 1, 32 do
	p[i] = u(0xF000+i-1)
end
export.puaChars = p

local s = {}
-- These values are placed here to make it possible to synchronise a group of languages without the need for a dedicated function module.

s["cau-Cyrl-displaytext"] = {
	from = {"[IlΙІӀ]", "ᴴ"},
	to = {"ӏ", "ᵸ"}
}

s["cau-Cyrl-entryname"] = {
	remove_diacritics = c.grave .. c.acute .. c.macron,
	from = s["cau-Cyrl-displaytext"].from,
	to = s["cau-Cyrl-displaytext"].to
}

s["cau-Latn-entryname"] = {remove_diacritics = c.grave .. c.acute .. c.macron}

s["Cyrs-entryname"] = {remove_diacritics = c.grave .. c.acute ..  c.diaer .. c.kamora .. c.dasiapneumata .. c.psilipneumata}

s["Cyrs-sortkey"] = {
	from = {
		"ї", "оу", -- 2 chars
		"ґ", "ꙣ", "є", "[ѕꙃꙅ]", "ꙁ", "[іꙇ]", "[ђꙉ]", "[ѻꙩꙫꙭꙮꚙꚛ]", "ꙋ", "[ѡѿꙍѽ]", "ꙑ", "ѣ", "ꙗ", "ѥ", "ꙕ", "[ѧꙙ]", "[ѩꙝ]", "ꙛ", "ѫ", "ѭ", "ѯ", "ѱ", "ѳ", "ѵ", "ҁ" -- 1 char
	},
	to = {
		"и" .. p[1], "у",
		"г" .. p[1], "д" .. p[1], "е", "ж" .. p[1], "з", "и" .. p[1], "и" .. p[2], "о", "у", "х" .. p[1], "ы", "ь" .. p[1], "ь" .. p[2], "ь" .. p[3], "ю", "я", "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4], "я" .. p[5], "я" .. p[6], "я" .. p[7], "я" .. p[8], "я" .. p[9]
	},
}

s["Grek-sortkey"] = {
	remove_diacritics = c.grave .. c.acute .. c.diaer .. c.caron .. c.commaabove .. c.revcommaabove .. c.macron .. c.breve .. c.diaerbelow .. c.brevebelow .. c.perispomeni .. c.ypogegrammeni,
	from = {"ϝ", "ͷ", "ϛ", "ͱ", "ϻ", "ϟ", "ϙ", "ς", "ϡ", "ͳ"},
	to = {"ε" .. p[1], "ε" .. p[2], "ε" .. p[3], "ζ" .. p[1], "π" .. p[1], "π" .. p[2], "π" .. p[2], "σ", "ω" .. p[1], "ω" .. p[1]}
}

s["Jpan-standardchars"] = -- exclude ぢづヂヅ
	"ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちっつてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろん" ..
	"ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチッツテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロン"

local jpx_displaytext = {
	from = {"~", "="},
	to = {"〜", "゠"}
}

s["jpx-displaytext"] = {
	Jpan = jpx_displaytext,
	Hani = jpx_displaytext,
	Hrkt = jpx_displaytext,
	Hira = jpx_displaytext,
	Kana = jpx_displaytext
	-- not Latn or Brai
}

s["jpx-entryname"] = s["jpx-displaytext"]

s["jpx-sortkey"] = {
	Jpan = "Jpan-sortkey",
	Hani = "Hani-sortkey",
	Hrkt = "Hira-sortkey", -- sort general kana by normalizing to Hira
	Hira = "Hira-sortkey",
	Kana = "Kana-sortkey",
	Latn = {remove_diacritics = c.tilde .. c.macron .. c.diaer}
}

s["jpx-translit"] = {
	Hrkt = "Hrkt-translit",
	Hira = "Hrkt-translit",
	Kana = "Hrkt-translit"
}

local HaniChars = m_scripts.getByCode("Hani"):getCharacters()
-- `漢字(한자)`→`漢字`
-- `가-나-다`→`가나다`, `가--나--다`→`가-나-다`
-- `온돌(溫突/溫堗)`→`온돌` ([[ondol]])
s["Kore-entryname"] = {
	remove_diacritics = u(0x302E) .. u(0x302F),
	from = {"([" .. HaniChars .. "])%(.-%)", "^%-", "%-$", "%-(%-?)", "\1", "%([" .. HaniChars .. "/]+%)"},
	to = {"%1", "\1", "\1", "%1", "-"}
}

s["Lisu-sortkey"] = {
	from = {"𑾰"},
	to = {"ꓬ" .. p[1]}
}

s["Mong-displaytext"] = {
	from = {"([ᠨ-ᡂᡸ])ᠶ([ᠨ-ᡂᡸ])", "([ᠠ-ᡂᡸ])ᠸ([^᠋ᠠ-ᠧ])", "([ᠠ-ᡂᡸ])ᠸ$"},
	to = {"%1ᠢ%2", "%1ᠧ%2", "%1ᠧ"}
}

s["Mong-entryname"] = s["Mong-displaytext"]

s["Polyt-entryname"] = {
	remove_diacritics = c.macron .. c.breve .. c.dbrevebelow,
	from = {"[" .. c.RSQuo .. c.psili .. c.coronis .. "]"},
	to = {"'"}
}

s["roa-oil-sortkey"] = {
	remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove .. c.cedilla .. "'",
	from = {"æ", "œ"},
	to = {"ae", "oe"}
}

s["Tibt-displaytext"] = {
	from = {"ༀ", "༌", "།།", "༚༚", "༚༝", "༝༚", "༝༝", "ཷ", "ཹ", "ེེ", "ོོ"},
	to = {"ཨོཾ", "་", "༎", "༛", "༟", "࿎", "༞", "ྲཱྀ", "ླཱྀ", "ཻ", "ཽ"}
}

s["Tibt-entryname"] = s["Tibt-displaytext"]

s["wen-sortkey"] = {
	from = {
		"l", -- Ensure "l" comes after "ł".
		"b́", "č", "ć", "dź", "ě", "f́", "ch", "ł", "ḿ", "ń", "ó", "ṕ", "ř", "ŕ", "š", "ś", "ẃ", "ž", "ż", "ź"
	},
	to = {
		"l" .. p[1],
		"b" .. p[1], "c" .. p[1], "c" .. p[2], "d" .. p[1], "e" .. p[1], "f" .. p[1], "h" .. p[1], "l", "m" .. p[1], "n" .. p[1], "o" .. p[1], "p" .. p[1], "r" .. p[1], "r" .. p[2], "s" .. p[1], "s" .. p[2], "w" .. p[1], "z" .. p[1], "z" .. p[2], "z" .. p[3]
	}
}

export.shared = s

-- Short-term solution to override the standard substitution process, by forcing the module to substitute the entire text in one pass. This results in any PUA characters that are used as stand-ins for formatting being handled by the language-specific substitution process, which is usually undesirable.
-- This override is provided for languages which use formatting between strings of text which might need to interact with each other (e.g. Korean 값이 transliterates as "gaps-i", but [[값]] has the formatting '''값'''[[-이]]. The normal process would split the text at the second '''.)
export.contiguous_substitution = {
	["ja"] = "tr",
	["jje"] = "tr",
	["ko"] = "tr",
	["ko-ear"] = "tr",
	["ru"] = "tr",
	["th-new"] = "tr",
	["sa"] = "tr",
	["zkt"] = "tr",
}

-- Code aliases. The left side is the alias and the right side is the canonical code. NOTE: These are gradually
-- being deprecated, so should not be added to on a permanent basis. Temporary additions are permitted under reasonable
-- circumstances (e.g. to facilitate changing a language's code). When an alias is no longer used, it should be removed.
-- Aliases in this table are tracked at [[Wiktionary:Tracking/languages/LANG]]; see e.g.
-- [[Special:WhatLinksHere/Wiktionary:Tracking/languages/RL.]] for the `RL.` alias.
export.aliases = {
	["CL."] = "la-cla",
	["EL."] = "la-ecc",
	["LL."] = "la-lat",
	["ML."] = "la-med",
	["NL."] = "la-new",
	["RL."] = "la-ren",
	["VL."] = "la-vul",
	["prv"] = "oc-pro",
}

-- Codes which are tracked. Note that all aliases listed above are also tracked, so should not be duplicated here.
-- Tracking uses the same mechanism described above in the comment above `export.aliases`.
export.track = {
	-- Codes duplicated between full and etymology-only languages.
	["lzh-lit"] = true,
	-- Languages actively being converted to families.
	["bh"] = true, -- inc-bih
	["nan"] = true, -- zhx-nan
}

return export