Difference between revisions of "Module:Lang/data"

From Wonkpedia
Jump to navigation Jump to search
 
m (1 revision imported)
 
(2 intermediate revisions by 2 users not shown)
Line 1: Line 1:
  +
local lang_obj = mw.language.getContentLanguage();
  +
local this_wiki_lang_tag = lang_obj.code; -- get this wiki's language tag
  +
  +
 
--[[--------------------------< L A N G _ N A M E _ T A B L E >------------------------------------------------
 
--[[--------------------------< L A N G _ N A M E _ T A B L E >------------------------------------------------
   
Line 13: Line 17:
 
have multiple associated names; Module:lang is only concerned with the first name so key_to_lower() only fetches
 
have multiple associated names; Module:lang is only concerned with the first name so key_to_lower() only fetches
 
the first name.
 
the first name.
 
TODO: instead of returning:
 
["key"] = {"name"}
 
where each table has only one name, return
 
["key"] = "name"
 
requires changes in Module:Lang.
 
   
 
]]
 
]]
Line 27: Line 25:
 
if 'var_sup' == src_type then
 
if 'var_sup' == src_type then
 
for k, v in pairs (source) do
 
for k, v in pairs (source) do
out[k:lower()] = v; -- for variant, everything is needed
+
out[k:lower()] = v; -- for variant and suppressed everything is needed
 
end
 
end
   
 
elseif 'lang' == src_type and source.active then -- for ~/iana_languages (active)
 
elseif 'lang' == src_type and source.active then -- for ~/iana_languages (active)
 
for k, v in pairs (source.active) do
 
for k, v in pairs (source.active) do
out[k:lower()] = {v[1]}; -- ignore multiple names; take first name only
+
out[k:lower()] = v[1]; -- ignore multiple names; take first name only
 
end
 
end
   
 
elseif 'lang_dep' == src_type and source.deprecated then -- for ~/iana_languages (deprecated)
 
elseif 'lang_dep' == src_type and source.deprecated then -- for ~/iana_languages (deprecated)
 
for k, v in pairs (source.deprecated) do
 
for k, v in pairs (source.deprecated) do
out[k:lower()] = {v[1]}; -- ignore multiple names; take first name only
+
out[k:lower()] = v[1]; -- ignore multiple names; take first name only
 
end
 
end
   
 
else -- here for all other sources
 
else -- here for all other sources
 
for k, v in pairs (source) do
 
for k, v in pairs (source) do
out[k:lower()] = {v[1]}; -- ignore multiple names; take first name only
+
out[k:lower()] = v[1]; -- ignore multiple names; take first name only
 
end
 
end
 
end
 
end
Line 48: Line 46:
 
end
 
end
   
local lang_name_table = {
+
local lang_name_table_t = {
 
lang = key_to_lower ('Module:Language/data/iana languages', 'lang'),
 
lang = key_to_lower ('Module:Language/data/iana languages', 'lang'),
 
lang_dep = key_to_lower ('Module:Language/data/iana languages', 'lang_dep'),
 
lang_dep = key_to_lower ('Module:Language/data/iana languages', 'lang_dep'),
Line 56: Line 54:
 
suppressed = key_to_lower ('Module:Language/data/iana suppressed scripts', 'var_sup'), -- script keys are capitalized; set to lower
 
suppressed = key_to_lower ('Module:Language/data/iana suppressed scripts', 'var_sup'), -- script keys are capitalized; set to lower
 
}
 
}
  +
  +
  +
--[[--------------------------< I 1 8 N M E D I A W I K I O V E R R I D E >--------------------------------
  +
  +
For internationalization; not used at en.wiki
  +
  +
The language names taken from the IANA language-subtag-registry file are given in English. That may not be ideal.
  +
Translating ~8,000 language names is also not ideal. MediaWiki maintains (much) shorter lists of language names
  +
in most languages for which there is a Wikipedia edition. When desired, Module:Lang can use the MediaWiki
  +
language list for the local language.
  +
  +
Caveat lector: the list of MediaWiki language names for your language may not be complete or may not exist at all.
  +
When incomplete, MediaWiki's list will 'fall back' to another language (typically English). When that happens
  +
add an appropriate entry to the override table below.
  +
  +
Caveat lector: the list of MediaWiki language names for your language may not be correct. At en.wiki, the
  +
MediaWiki language names do not agree with the IANA language names for these ISO 639-1 tags. Often it is simply
  +
spelling differences:
  +
bh: IANA: Bihari languages MW: Bhojpuri – the ISO 639-3 tag for Bhojpuri is bho
  +
bn: IANA: Bengali MW: Bangla – Bengali is the exonym, Bangla is the endonym
  +
dv: IANA: Dhivehi MW: Divehi
  +
el: IANA: Modern Greek MW: Greek
  +
ht: IANA: Haitian MW: Haitian Creole
  +
ky: IANA: Kirghiz MW: Kyrgyz
  +
li: IANA: Limburgan MW: Limburgish
  +
or: IANA: Oriya MW: Odia
  +
os: IANA: Ossetian MW: Ossetic
  +
"pa: IANA: Panjabi MW: Punjabi
  +
"ps: IANA: Pushto MW: Pashto
  +
"to: IANA: Tonga MW: Tongan
  +
"ug: IANA: Uighur MW: Uyghur
  +
use the override table to override language names that are incorrect for your project
  +
  +
To see the list of names that MediaWiki has for your language, enter this in the Debug colsole:
  +
=mw.dumpObject (mw.language.fetchLanguageNames ('<tag>', 'all'))
  +
(replacing <tag> with the language tag for your language)
  +
  +
Use of the MediaWiki language names lists is enabled when media_wiki_override_enable is set to boolean true.
  +
  +
]]
  +
  +
local media_wiki_override_enable = false; -- set to true to override IANA names with MediaWiki names; always false at en.wiki
  +
-- caveat lector: the list of MediaWiki language names for your language may not be complete or may not exist at all
  +
if true == media_wiki_override_enable then
  +
local mw_languages_by_tag_t = mw.language.fetchLanguageNames (this_wiki_lang_tag, 'all'); -- get a table of language tag/name pairs known to MediaWiki
  +
for tag, name in pairs (mw_languages_by_tag_t) do -- loop through each tag/name pair in the MediaWiki list
  +
if lang_name_table_t.lang[tag] then -- if the tag is in the main list
  +
lang_name_table_t.lang[tag] = name; -- overwrite exisiting name with the name from MediaWiki
  +
end
  +
end
  +
end
   
   
Line 69: Line 118:
 
------------------------------< I S O _ 6 3 9 - 1 >------------------------------------------------------------
 
------------------------------< I S O _ 6 3 9 - 1 >------------------------------------------------------------
   
["ca-valencia"] = {"Valencian"},
+
["ca-valencia"] = "Valencian",
["cu"] = {"Church Slavonic"}, -- 2nd IANA name;
+
["cu"] = "Church Slavonic", -- 2nd IANA name;
["de-at"] = {"Austrian German"}, -- these code-region and code-variant tags to match en.wiki article names
+
["de-at"] = "Austrian German", -- these code-region and code-variant tags to match en.wiki article names
["de-ch"] = {"Swiss Standard German"},
+
["de-ch"] = "Swiss Standard German",
["en-au"] = {"Australian English"},
+
["en-au"] = "Australian English",
["en-ca"] = {"Canadian English"},
+
["en-ca"] = "Canadian English",
["en-emodeng"] = {"Early Modern English"},
+
["en-emodeng"] = "Early Modern English",
["en-gb"] = {"British English"},
+
["en-gb"] = "British English",
["en-ie"] = {"Irish English"},
+
["en-ie"] = "Irish English",
["en-in"] = {"Indian English"},
+
["en-in"] = "Indian English",
["en-nz"] = {"New Zealand English"},
+
["en-nz"] = "New Zealand English",
["en-us"] = {"American English"},
+
["en-us"] = "American English",
["en-za"] = {"South African English"},
+
["en-za"] = "South African English",
["fy"] = {"West Frisian"}, -- Western Frisian
+
["fy"] = "West Frisian", -- Western Frisian
["mo"] = {"Moldovan"}, -- Moldavian (deprecated code); to match en.wiki article title
+
["mo"] = "Moldovan", -- Moldavian (deprecated code); to match en.wiki article title
["nl-be"] = {"Flemish"}, -- match MediaWiki
+
["nl-be"] = "Flemish", -- match MediaWiki
["oc-provenc"] = {"Provençal"},
+
["oc-provenc"] = "Provençal",
["ps"] = {"Pashto"}, -- Pushto
+
["ps"] = "Pashto", -- Pushto
["pt-br"] = {"Brazilian Portuguese"}, -- match MediaWiki
+
["pt-br"] = "Brazilian Portuguese", -- match MediaWiki
["tw-asante"] = {"Asante Twi"},
+
["tw-asante"] = "Asante Twi",
   
 
-- these ISO 639-1 language-name overrides imported from Module:Language/data/wp_languages
 
-- these ISO 639-1 language-name overrides imported from Module:Language/data/wp_languages
 
--<begin do-not-edit except to comment out>--
 
--<begin do-not-edit except to comment out>--
["av"] = {"Avar"}, -- Avaric
+
["av"] = "Avar", -- Avaric
["bo"] = {"Standard Tibetan"}, -- Tibetan
+
["bo"] = "Standard Tibetan", -- Tibetan
["el"] = {"Greek"}, -- Modern Greek
+
["el"] = "Greek", -- Modern Greek
-- ["en-SA"] = {"South African English"}, -- English; no; SA is not South Africa it Saudi Arabia; ZA is South Africa
+
-- ["en-SA"] = "South African English", -- English; no; SA is not South Africa it Saudi Arabia; ZA is South Africa
["ff"] = {"Fula"}, -- Fulah
+
["ff"] = "Fula", -- Fulah
["ht"] = {"Haitian Creole"}, -- Haitian
+
["ht"] = "Haitian Creole", -- Haitian
["hz"] = {"Otjiherero"}, -- Herero
+
["hz"] = "Otjiherero", -- Herero
["ii"] = {"Yi"}, -- Sichuan Yi
+
["ii"] = "Yi", -- Sichuan Yi
["ki"] = {"Gikuyu"}, -- Kikuyu
+
["ki"] = "Gikuyu", -- Kikuyu
["kl"] = {"Greenlandic"}, -- Kalaallisut
+
["kl"] = "Greenlandic", -- Kalaallisut
["ky"] = {"Kyrgyz"}, -- Kirghiz
+
["ky"] = "Kyrgyz", -- Kirghiz
["lg"] = {"Luganda"}, -- Ganda
+
["lg"] = "Luganda", -- Ganda
["li"] = {"Limburgish"}, -- Limburgan
+
["li"] = "Limburgish", -- Limburgan
["mi"] = {"Māori"}, -- Maori
+
["mi"] = "Māori", -- Maori
["na"] = {"Nauruan"}, -- Nauru
+
["na"] = "Nauruan", -- Nauru
["nb"] = {"Bokmål"}, -- Norwegian Bokmål
+
["nb"] = "Bokmål", -- Norwegian Bokmål
["nd"] = {"Northern Ndebele"}, -- North Ndebele
+
["nd"] = "Northern Ndebele", -- North Ndebele
["nn"] = {"Nynorsk"}, -- Norwegian Nynorsk
+
["nn"] = "Nynorsk", -- Norwegian Nynorsk
["nr"] = {"Southern Ndebele"}, -- South Ndebele
+
["nr"] = "Southern Ndebele", -- South Ndebele
["ny"] = {"Chichewa"}, -- Nyanja
+
["ny"] = "Chichewa", -- Nyanja
["oj"] = {"Ojibwe"}, -- Ojibwa
+
["oj"] = "Ojibwe", -- Ojibwa
["or"] = {"Odia"}, -- Oriya
+
["or"] = "Odia", -- Oriya
["pa"] = {"Punjabi"}, -- Panjabi
+
["pa"] = "Punjabi", -- Panjabi
["rn"] = {"Kirundi"}, -- Rundi
+
["rn"] = "Kirundi", -- Rundi
["sl"] = {"Slovene"}, -- Slovenian
+
["sl"] = "Slovene", -- Slovenian
["ss"] = {"Swazi"}, -- Swati
+
["ss"] = "Swazi", -- Swati
["st"] = {"Sotho"}, -- Southern Sotho
+
["st"] = "Sotho", -- Southern Sotho
["to"] = {"Tongan"}, -- Tonga
+
["to"] = "Tongan", -- Tonga
 
--<end do-not-edit except to comment out>--
 
--<end do-not-edit except to comment out>--
   
Line 125: Line 174:
 
------------------------------< I S O _ 6 3 9 - 2, - 3, - 5 >----------------------------------------------
 
------------------------------< I S O _ 6 3 9 - 2, - 3, - 5 >----------------------------------------------
   
["alv"] = {"Atlantic–Congo languages"}, -- to match en.wiki article title (endash)
+
["alv"] = "Atlantic–Congo languages", -- to match en.wiki article title (endash)
["arc"] = {"Aramaic"}, -- Official Aramaic (700-300 BCE), Imperial Aramaic (700-300 BCE);
+
["arc"] = "Aramaic", -- Official Aramaic (700-300 BCE), Imperial Aramaic (700-300 BCE);
["art"] = {"constructed"}, -- to match en.wiki article; lowercase for category name
+
["art"] = "constructed", -- to match en.wiki article; lowercase for category name
["bhd"] = {"Bhadarwahi"}, -- Bhadrawahi; to match en.wiki article title
+
["bhd"] = "Bhadarwahi", -- Bhadrawahi; to match en.wiki article title
["bla"] = {"Blackfoot"}, -- Siksika; to match en.wiki article title
+
["bla"] = "Blackfoot", -- Siksika; to match en.wiki article title
["bua"] = {"Buryat"}, -- Buriat; this is a macro language; these four use wp preferred transliteration;
+
["bua"] = "Buryat", -- Buriat; this is a macro language; these four use wp preferred transliteration;
["bxm"] = {"Mongolian Buryat"}, -- Mongolia Buriat; these three all redirect to Buryat
+
["bxm"] = "Mongolian Buryat", -- Mongolia Buriat; these three all redirect to Buryat
["bxr"] = {"Russian Buryat"}, -- Russia Buriat;
+
["bxr"] = "Russian Buryat", -- Russia Buriat;
["bxu"] = {"Chinese Buryat"}, -- China Buriat;
+
["bxu"] = "Chinese Buryat", -- China Buriat;
["byr"] = {"Yipma"}, -- Baruya, Yipma
+
["byr"] = "Yipma", -- Baruya, Yipma
["egy"] = {"Ancient Egyptian"}, -- Egyptian (Ancient); distinguish from contemporary arz: Egyptian Arabic
+
["egy"] = "Ancient Egyptian", -- Egyptian (Ancient); distinguish from contemporary arz: Egyptian Arabic
["ems"] = {"Alutiiq"}, -- Pacific Gulf Yupik; to match en.wiki article title
+
["ems"] = "Alutiiq", -- Pacific Gulf Yupik; to match en.wiki article title
["esx"] = {"Eskimo–Aleut languages"}, -- to match en.wiki article title (endash)
+
["esx"] = "Eskimo–Aleut languages", -- to match en.wiki article title (endash)
["frr"] = {"North Frisian"}, -- Northern Frisian
+
["frr"] = "North Frisian", -- Northern Frisian
["frs"] = {"East Frisian Low Saxon"}, -- Eastern Frisian
+
["frs"] = "East Frisian Low Saxon", -- Eastern Frisian
["gsw-fr"] = {"Alsatian"}, -- match MediaWiki
+
["gsw-fr"] = "Alsatian", -- match MediaWiki
["hmx"] = {"Hmong–Mien languages"}, -- to match en.wiki article title (endash)
+
["haa"] = "Hän", -- Han; to match en.wiki article title
["ilo"] = {"Ilocano"}, -- Iloko; to match en.wiki article title
+
["hmx"] = "Hmong–Mien languages", -- to match en.wiki article title (endash)
["jam"] = {"Jamaican Patois"}, -- Jamaican Creole English
+
["ilo"] = "Ilocano", -- Iloko; to match en.wiki article title
["luo"] = {"Dholuo"}, -- IANA (primary) /ISO 639-3: Luo (Kenya and Tanzania); IANA (secondary): Dholuo
+
["jam"] = "Jamaican Patois", -- Jamaican Creole English
["mhr"] = {"Meadow Mari"}, -- Eastern Mari
+
["luo"] = "Dholuo", -- IANA (primary) /ISO 639-3: Luo (Kenya and Tanzania); IANA (secondary): Dholuo
["mid"] = {"Modern Mandaic"}, -- Mandaic
+
["mhr"] = "Meadow Mari", -- Eastern Mari
["mkh"] = {"Mon–Khmer languages"}, -- to match en.wiki article title (endash)
+
["mid"] = "Modern Mandaic", -- Mandaic
["mla"] = {"Tamambo"}, -- Malo
+
['mis'] = "uncoded", -- Uncoded languages; capitalization; special scope, not collective scope;
['mte'] = {"Mono-Alu"}, -- Mono (Solomon Islands)
+
["mkh"] = "Mon–Khmer languages", -- to match en.wiki article title (endash)
 
["mla"] = "Tamambo", -- Malo
["nan-tw"] = {"Taiwanese Hokkien"}, -- make room for IANA / 639-3 nan Min Nan Chinese; match en.wiki article title
 
["new"] = {"Newar"}, -- Newari, Nepal Bhasa; to match en,wiki article title
+
['mte'] = "Mono-Alu", -- Mono (Solomon Islands)
  +
['mul'] = "multiple", -- Multiple languages; capitalization; special scope, not collective scope;
["ngf"] = {"Trans–New Guinea languages"}, -- to match en.wiki article title (endash)
 
["nic"] = {"Niger–Congo languages"}, -- Niger-Kordofanian languages; to match en,wiki article title
+
["nan-tw"] = "Taiwanese Hokkien", -- make room for IANA / 639-3 nan Min Nan Chinese; match en.wiki article title
["nrf"] = {"Norman"}, -- not quite a collective - IANA name: Jèrriais + Guernésiais; categorizes to Norman-language text
+
["new"] = "Newar", -- Newari, Nepal Bhasa; to match en,wiki article title
["nrf-gg"] = {"Guernésiais"}, -- match MediaWiki
+
["ngf"] = "Trans–New Guinea languages", -- to match en.wiki article title (endash)
["nrf-je"] = {"Jèrriais"}, -- match MediaWiki
+
["nic"] = "Niger–Congo languages", -- Niger-Kordofanian languages; to match en,wiki article title
["nzi"] = {"Nzema"}, -- Nzima; to match en.wiki article title
+
["nrf"] = "Norman", -- not quite a collective - IANA name: Jèrriais + Guernésiais; categorizes to Norman-language text
["oma"] = {"Omaha–Ponca"}, -- to match en.wiki article title (endash)
+
["nrf-gg"] = "Guernésiais", -- match MediaWiki
["orv"] = {"Old East Slavic"}, -- Old Russian
+
["nrf-je"] = "Jèrriais", -- match MediaWiki
["pfl"] = {"Palatine German"}, -- Pfaelzisch; to match en.wiki article
+
["nzi"] = "Nzema", -- Nzima; to match en.wiki article title
["pms"] = {"Piedmontese"}, -- Piemontese; to match en.wiki article title
+
["oma"] = "Omaha–Ponca", -- to match en.wiki article title (endash)
  +
["orv"] = "Old East Slavic", -- Old Russian
["pnb"] = {"Punjabi (Western)"}, -- Western Panjabi; dab added to override import from ~/wp languages and distinguish pnb from pa in reverse look up tag_from_name()
 
["sdo"] = {"Bukar–Sadong"}, -- Bukar-Sadung Bidayuh; to match en.wiki article title
+
["pfl"] = "Palatine German", -- Pfaelzisch; to match en.wiki article
["stq"] = {"Saterland Frisian"}, -- Saterfriesisch
+
["pie"] = "Piro Pueblo", -- Piro; to match en.wiki article
["und"] = {"undetermined"}, -- capitalization to match existing category
+
["pms"] = "Piedmontese", -- Piemontese; to match en.wiki article title
 
["pnb"] = "Punjabi (Western)", -- Western Panjabi; dab added to override import from ~/wp languages and distinguish pnb from pa in reverse look up tag_from_name()
["wrg"] = {"Warrongo"}, -- Warungu
 
["xal-ru"] = {"Kalmyk"}, -- to match en.wiki article title
+
["rop"] = "Australian Kriol", -- Kriol; en.wiki article is a dab; point to correct en.wiki article
["xgf"] = {"Tongva"}, -- ISO 639-3 is Gabrielino-Fernandeño
+
["sdo"] = "Bukar–Sadong", -- Bukar-Sadung Bidayuh; to match en.wiki article title
["yuf"] = {"Havasupai–Hualapai"}, -- Havasupai-Walapai-Yavapai; to match en.wiki article title
+
["stq"] = "Saterland Frisian", -- Saterfriesisch
  +
["und"] = "undetermined", -- capitalization to match existing category
  +
["wrg"] = "Warrongo", -- Warungu
 
["xal-ru"] = "Kalmyk", -- to match en.wiki article title
  +
["xgf"] = "Tongva", -- ISO 639-3 is Gabrielino-Fernandeño
 
["yuf"] = "Havasupai–Hualapai", -- Havasupai-Walapai-Yavapai; to match en.wiki article title
  +
["zxx"] = "no linguistic content", -- capitalization
   
 
-- these ISO 639-2, -3 language-name overrides imported from Module:Language/data/wp_languages
 
-- these ISO 639-2, -3 language-name overrides imported from Module:Language/data/wp_languages
 
--<begin do-not-edit except to comment out>--
 
--<begin do-not-edit except to comment out>--
["ace"] = {"Acehnese"}, -- Achinese
+
["ace"] = "Acehnese", -- Achinese
["aec"] = {"Sa'idi Arabic"}, -- Saidi Arabic
+
["aec"] = "Sa'idi Arabic", -- Saidi Arabic
["akl"] = {"Aklan"}, -- Aklanon
+
["akl"] = "Aklan", -- Aklanon
["alt"] = {"Altay"}, -- Southern Altai
+
["alt"] = "Altay", -- Southern Altai
["apm"] = {"Mescalero-Chiricahua"}, -- Mescalero-Chiricahua Apache
+
["apm"] = "Mescalero-Chiricahua", -- Mescalero-Chiricahua Apache
["bal"] = {"Balochi"}, -- Baluchi
+
["bal"] = "Balochi", -- Baluchi
-- ["bcl"] = {"Central Bicolano"}, -- Central Bikol
+
-- ["bcl"] = "Central Bicolano", -- Central Bikol
["bin"] = {"Edo"}, -- Bini
+
["bin"] = "Edo", -- Bini
["bpy"] = {"Bishnupriya Manipuri"}, -- Bishnupriya
+
["bpy"] = "Bishnupriya Manipuri", -- Bishnupriya
["chg"] = {"Chagatay"}, -- Chagatai
+
["chg"] = "Chagatay", -- Chagatai
["ckb"] = {"Sorani Kurdish"}, -- Central Kurdish
+
["ckb"] = "Sorani Kurdish", -- Central Kurdish
["cnu"] = {"Shenwa"}, -- Chenoua
+
["cnu"] = "Shenwa", -- Chenoua
["coc"] = {"Cocopah"}, -- Cocopa
+
["coc"] = "Cocopah", -- Cocopa
["diq"] = {"Zazaki"}, -- Dimli
+
["diq"] = "Zazaki", -- Dimli
["fit"] = {"Meänkieli"}, -- Tornedalen Finnish
+
["fit"] = "Meänkieli", -- Tornedalen Finnish
["fkv"] = {"Kven"}, -- Kven Finnish
+
["fkv"] = "Kven", -- Kven Finnish
["frk"] = {"Old Frankish"}, -- Frankish
+
["frk"] = "Old Frankish", -- Frankish
["gez"] = {"Ge'ez"}, -- Geez
+
["gez"] = "Ge'ez", -- Geez
["gju"] = {"Gujari"}, -- Gujari
+
["gju"] = "Gujari", -- Gujari
["gsw"] = {"Alemannic German"}, -- Swiss German
+
["gsw"] = "Alemannic German", -- Swiss German
["gul"] = {"Gullah"}, -- Sea Island Creole English
+
["gul"] = "Gullah", -- Sea Island Creole English
["hak"] = {"Hakka"}, -- Hakka Chinese
+
["hak"] = "Hakka", -- Hakka Chinese
["hbo"] = {"Biblical Hebrew"}, -- Ancient Hebrew
+
["hbo"] = "Biblical Hebrew", -- Ancient Hebrew
["hnd"] = {"Hindko"}, -- Southern Hindko
+
["hnd"] = "Hindko", -- Southern Hindko
-- ["ikt"] = {"Inuvialuk"}, -- Inuinnaqtun
+
-- ["ikt"] = "Inuvialuk", -- Inuinnaqtun
["kaa"] = {"Karakalpak"}, -- Kara-Kalpak
+
["kaa"] = "Karakalpak", -- Kara-Kalpak
["khb"] = {"Tai Lü"}, -- Lü
+
["khb"] = "Tai Lü", -- Lü
["kmr"] = {"Kurmanji Kurdish"}, -- Northern Kurdish
+
["kmr"] = "Kurmanji Kurdish", -- Northern Kurdish
["kpo"] = {"Kposo"}, -- Ikposo
+
["kpo"] = "Kposo", -- Ikposo
["krj"] = {"Kinaray-a"}, -- Kinaray-A
+
["krj"] = "Kinaray-a", -- Kinaray-A
["ktz"] = {"Juǀ'hoan"}, -- Juǀʼhoan
+
["ktz"] = "Juǀ'hoan", -- Juǀʼhoan
["lez"] = {"Lezgian"}, -- Lezghian
+
["lez"] = "Lezgian", -- Lezghian
["liv"] = {"Livonian"}, -- Liv
+
["liv"] = "Livonian", -- Liv
["lng"] = {"Lombardic"}, -- Langobardic
+
["lng"] = "Lombardic", -- Langobardic
["mia"] = {"Miami-Illinois"}, -- Miami
+
["mia"] = "Miami-Illinois", -- Miami
["miq"] = {"Miskito"}, -- Mískito
+
["miq"] = "Miskito", -- Mískito
["mix"] = {"Mixtec"}, -- Mixtepec Mixtec
+
["mix"] = "Mixtec", -- Mixtepec Mixtec
["mni"] = {"Meitei"}, -- Manipuri
+
["mni"] = "Meitei", -- Manipuri
["mrj"] = {"Hill Mari"}, -- Western Mari
+
["mrj"] = "Hill Mari", -- Western Mari
["mww"] = {"White Hmong"}, -- Hmong Daw
+
["mww"] = "White Hmong", -- Hmong Daw
["nds-nl"] = {"Dutch Low Saxon"}, -- Low German
+
["nds-nl"] = "Dutch Low Saxon", -- Low German
-- ["new"] = {"Nepal Bhasa"}, -- Newari
+
-- ["new"] = "Nepal Bhasa", -- Newari
["nso"] = {"Northern Sotho"}, -- Pedi
+
["nso"] = "Northern Sotho", -- Pedi
-- ["nwc"] = {"Classical Nepal Bhasa"}, -- Classical Newari, Classical Nepal Bhasa, Old Newari
+
-- ["nwc"] = "Classical Nepal Bhasa", -- Classical Newari, Classical Nepal Bhasa, Old Newari
["ood"] = {"O'odham"}, -- Tohono O'odham
+
["ood"] = "O'odham", -- Tohono O'odham
["otk"] = {"Old Turkic"}, -- Old Turkish
+
["otk"] = "Old Turkic", -- Old Turkish
["pal"] = {"Middle Persian"}, -- Pahlavi
+
["pal"] = "Middle Persian", -- Pahlavi
["pam"] = {"Kapampangan"}, -- Pampanga
+
["pam"] = "Kapampangan", -- Pampanga
["phr"] = {"Potwari"}, -- Pahari-Potwari
+
["phr"] = "Potwari", -- Pahari-Potwari
["pka"] = {"Jain Prakrit"}, -- Ardhamāgadhī Prākrit
+
["pka"] = "Jain Prakrit", -- Ardhamāgadhī Prākrit
-- ["pnb"] = {"Punjabi"}, -- Western Panjabi
+
-- ["pnb"] = "Punjabi", -- Western Panjabi
["psu"] = {"Shauraseni"}, -- Sauraseni Prākrit
+
["psu"] = "Shauraseni", -- Sauraseni Prākrit
["rap"] = {"Rapa Nui"}, -- Rapanui
+
["rap"] = "Rapa Nui", -- Rapanui
["rar"] = {"Cook Islands Māori"}, -- Rarotongan
+
["rar"] = "Cook Islands Māori", -- Rarotongan
["rmu"] = {"Scandoromani"}, -- Tavringer Romani
+
["rmu"] = "Scandoromani", -- Tavringer Romani
["rom"] = {"Romani"}, -- Romany
+
["rom"] = "Romani", -- Romany
["rup"] = {"Aromanian"}, -- Macedo-Romanian
+
["rup"] = "Aromanian", -- Macedo-Romanian
["ryu"] = {"Okinawan"}, -- Central Okinawan
+
["ryu"] = "Okinawan", -- Central Okinawan
["sdc"] = {"Sassarese"}, -- Sassarese Sardinian
+
["sdc"] = "Sassarese", -- Sassarese Sardinian
["sdn"] = {"Gallurese"}, -- Gallurese Sardinian
+
["sdn"] = "Gallurese", -- Gallurese Sardinian
["shp"] = {"Shipibo"}, -- Shipibo-Conibo
+
["shp"] = "Shipibo", -- Shipibo-Conibo
["src"] = {"Logudorese"}, -- Logudorese Sardinian
+
["src"] = "Logudorese", -- Logudorese Sardinian
["sro"] = {"Campidanese"}, -- Campidanese Sardinian
+
["sro"] = "Campidanese", -- Campidanese Sardinian
["tkl"] = {"Tokelauan"}, -- Tokelau
+
["tkl"] = "Tokelauan", -- Tokelau
["tvl"] = {"Tuvaluan"}, -- Tuvalu
+
["tvl"] = "Tuvaluan", -- Tuvalu
["tyv"] = {"Tuvan"}, -- Tuvinian
+
["tyv"] = "Tuvan", -- Tuvinian
["vls"] = {"West Flemish"}, -- Vlaams
+
["vls"] = "West Flemish", -- Vlaams
["wep"] = {"Westphalian"}, -- Westphalien
+
["wep"] = "Westphalian", -- Westphalien
["xal"] = {"Oirat"}, -- Kalmyk
+
["xal"] = "Oirat", -- Kalmyk
["xcl"] = {"Old Armenian"}, -- Classical Armenian
+
["xcl"] = "Old Armenian", -- Classical Armenian
["yua"] = {"Yucatec Maya"}, -- Yucateco
+
["yua"] = "Yucatec Maya", -- Yucateco
 
--<end do-not-edit except to comment out>--
 
--<end do-not-edit except to comment out>--
   
Line 249: Line 304:
 
------------------------------< P R I V A T E _ U S E _ T A G S >----------------------------------------------
 
------------------------------< P R I V A T E _ U S E _ T A G S >----------------------------------------------
   
["cel-x-proto"] = {"Proto-Celtic"}, -- cel in IANA is Celtic languages
+
["alg-x-proto"] = "Proto-Algonquian", -- alg in IANA is Algonquian languages
["gem-x-proto"] = {"Proto-Germanic"}, -- gem in IANA is Germanic languages
+
["cel-x-proto"] = "Proto-Celtic", -- cel in IANA is Celtic languages
  +
["gem-x-proto"] = "Proto-Germanic", -- gem in IANA is Germanic languages
["gmw-x-ecg"] = {"East Central German"},
+
["gmw-x-ecg"] = "East Central German",
["grc-x-aeolic"] = {"Aeolic Greek"}, -- these grc-x-... codes are preferred alternates to the non-standard catchall code grc-gre
+
["grc-x-aeolic"] = "Aeolic Greek", -- these grc-x-... codes are preferred alternates to the non-standard catchall code grc-gre
["grc-x-attic"] = {"Attic Greek"},
 
["grc-x-biblical"] = {"Biblical Greek"},
+
["grc-x-attic"] = "Attic Greek",
["grc-x-byzant"] = {"Byzantine Greek"},
+
["grc-x-biblical"] = "Biblical Greek",
["grc-x-classic"] = {"Classical Greek"},
+
["grc-x-byzant"] = "Byzantine Greek",
["grc-x-doric"] = {"Doric Greek"},
+
["grc-x-classic"] = "Classical Greek",
["grc-x-hellen"] = {"Hellenistic Greek"},
+
["grc-x-doric"] = "Doric Greek",
["grc-x-ionic"] = {"Ionic Greek"},
+
["grc-x-hellen"] = "Hellenistic Greek",
["grc-x-koine"] = {"Koinē Greek"},
+
["grc-x-ionic"] = "Ionic Greek",
["grc-x-medieval"] = {"Medieval Greek"},
+
["grc-x-koine"] = "Koinē Greek",
["grc-x-patris"] = {"Patristic Greek"},
+
["grc-x-medieval"] = "Medieval Greek",
["grk-x-proto"] = {"Proto-Greek"}, -- grk in IANA is Greek languages
+
["grc-x-patris"] = "Patristic Greek",
["iir-x-proto"] = {"Proto-Indo-Iranian"}, -- iir in IANA is Indo-Iranian Languages
+
["grk-x-proto"] = "Proto-Greek", -- grk in IANA is Greek languages
["ine-x-proto"] = {"Proto-Indo-European"},
+
["iir-x-proto"] = "Proto-Indo-Iranian", -- iir in IANA is Indo-Iranian Languages
["ira-x-proto"] = {"Proto-Iranian"}, -- ira in IANA is Iranian languages
+
["ine-x-proto"] = "Proto-Indo-European",
["itc-x-proto"] = {"Proto-Italic"}, -- itc in IANA is Italic languages
+
["ira-x-proto"] = "Proto-Iranian", -- ira in IANA is Iranian languages
["ksh-x-colog"] = {"Colognian"}, -- en.wiki article is Colognian; ksh (Kölsch) redirects there
+
["itc-x-proto"] = "Proto-Italic", -- itc in IANA is Italic languages
  +
["ksh-x-colog"] = "Colognian", -- en.wiki article is Colognian; ksh (Kölsch) redirects there
["la-x-medieval"] = {"Medieval Latin"},
+
["la-x-medieval"] = "Medieval Latin",
["mis-x-ripuar"] = {"Ripuarian"}, -- replaces improper use of ksh in wp_languages
+
["mis-x-ripuar"] = "Ripuarian", -- replaces improper use of ksh in wp_languages
["sem-x-proto"] = {"Proto-Semitic"},
 
["sla-x-proto"] = {"Proto-Slavic"}, -- sla in IANA is Slavic languages
+
["sem-x-proto"] = "Proto-Semitic",
["yuf-x-hav"] = {"Havasupai"}, -- IANA name for these three is Havasupai-Walapai-Yavapai
+
["sla-x-proto"] = "Proto-Slavic", -- sla in IANA is Slavic languages
  +
["yuf-x-hav"] = "Havasupai", -- IANA name for these three is Havasupai-Walapai-Yavapai
["yuf-x-wal"] = {"Walapai"},
 
["yuf-x-yav"] = {"Yavapai"},
+
["yuf-x-wal"] = "Walapai",
 
["yuf-x-yav"] = "Yavapai",
 
}
 
}
   
Line 288: Line 344:
   
 
local article_name = {
 
local article_name = {
["lij"] = {"Ligurian (Romance language)"}, -- Ligurian; see Template_talk:Lang#Ligurian_dab
+
["lij"] = "Ligurian (Romance language)", -- Ligurian; see Template_talk:Lang#Ligurian_dab
['mnh'] = {"Mono language (Congo)"}, -- Mono (Democratic Republic of Congo); see Template_talk:Lang#Mono_languages
+
['mnh'] = "Mono language (Congo)", -- Mono (Democratic Republic of Congo); see Template_talk:Lang#Mono_languages
['mnr'] = {"Mono language (California)"}, -- Mono (USA)
+
['mnr'] = "Mono language (California)", -- Mono (USA)
['mru'] = {"Mono language (Cameroon)"}, -- Mono (Cameroon)
+
['mru'] = "Mono language (Cameroon)", -- Mono (Cameroon)
["xlg"] = {"Ligurian (ancient language)"}, -- see Template_talk:Lang#Ligurian_dab
+
["xlg"] = "Ligurian (ancient language)", -- see Template_talk:Lang#Ligurian_dab
 
}
 
}
   
Line 476: Line 532:
 
['jyutping'] = {
 
['jyutping'] = {
 
['default'] = 'Jyutping transliteration',
 
['default'] = 'Jyutping transliteration',
  +
},
  +
  +
['mlcts'] = {
  +
['default'] = 'Myanmar Language Commission Transcription System',
 
},
 
},
   
Line 533: Line 593:
 
return
 
return
 
{
 
{
  +
this_wiki_lang_tag = this_wiki_lang_tag,
  +
this_wiki_lang_dir = lang_obj:getDir(), -- wiki's language direction
  +
 
article_name = article_name,
 
article_name = article_name,
lang_name_table = lang_name_table,
+
lang_name_table = lang_name_table_t,
 
override = override,
 
override = override,
 
rtl_scripts = rtl_scripts,
 
rtl_scripts = rtl_scripts,
  +
special_tags_table = special_tags_table,
 
translit_title_table = translit_title_table,
 
translit_title_table = translit_title_table,
 
};
 
};

Latest revision as of 23:47, 23 February 2022

Documentation for this module may be created at Module:Lang/data/doc

local lang_obj = mw.language.getContentLanguage();
local this_wiki_lang_tag = lang_obj.code;										-- get this wiki's language tag


--[[--------------------------< L A N G _ N A M E _ T A B L E >------------------------------------------------

primary table of tables that decode:
	lang -> language tags and names
	script -> ISO 15924 script tags
	region -> ISO 3166 region tags
	variant -> iana registered variant tags
	suppressed -> map of scripts tags and their associated language tags
	
all of these data come from separate modules that are derived from the IANA language-subtag-registry file

key_to_lower() avoids the metatable trap and sets all keys in the subtables to lowercase. Many language codes
have multiple associated names; Module:lang is only concerned with the first name so key_to_lower() only fetches
the first name.

]]

local function key_to_lower (module, src_type)
	local out = {};
	local source = (('var_sup' == src_type) and require (module)) or mw.loadData (module);		-- fetch data from this module; require() avoids metatable trap for variant data
	if 'var_sup' == src_type then
		for k, v in pairs (source) do
			out[k:lower()] = v;													-- for variant and suppressed everything is needed
		end

	elseif 'lang' == src_type and source.active then							-- for ~/iana_languages (active)
		for k, v in pairs (source.active) do
			out[k:lower()] = v[1];												-- ignore multiple names; take first name only
		end

	elseif 'lang_dep' == src_type and source.deprecated then					-- for ~/iana_languages (deprecated)
		for k, v in pairs (source.deprecated) do
			out[k:lower()] = v[1];												-- ignore multiple names; take first name only
		end

	else																		-- here for all other sources
		for k, v in pairs (source) do
			out[k:lower()] = v[1];												-- ignore multiple names; take first name only
		end
	end
	return out;
end

local lang_name_table_t = {
	lang = key_to_lower ('Module:Language/data/iana languages', 'lang'),
	lang_dep = key_to_lower ('Module:Language/data/iana languages', 'lang_dep'),
	script = key_to_lower ('Module:Language/data/iana scripts'),				-- script keys are capitalized; set to lower
	region = key_to_lower ('Module:Language/data/iana regions'),				-- region keys are uppercase; set to lower
	variant = key_to_lower ('Module:Language/data/iana variants', 'var_sup'),
	suppressed = key_to_lower ('Module:Language/data/iana suppressed scripts', 'var_sup'),	-- script keys are capitalized; set to lower
	}


--[[--------------------------< I 1 8 N   M E D I A W I K I   O V E R R I D E >--------------------------------

For internationalization; not used at en.wiki

The language names taken from the IANA language-subtag-registry file are given in English. That may not be ideal.
Translating ~8,000 language names is also not ideal.  MediaWiki maintains (much) shorter lists of language names
in most languages for which there is a Wikipedia edition.  When desired, Module:Lang can use the MediaWiki 
language list for the local language.

Caveat lector: the list of MediaWiki language names for your language may not be complete or may not exist at all.
When incomplete, MediaWiki's list will 'fall back' to another language (typically English).  When that happens
add an appropriate entry to the override table below.

Caveat lector: the list of MediaWiki language names for your language may not be correct.  At en.wiki, the
MediaWiki language names do not agree with the IANA language names for these ISO 639-1 tags.  Often it is simply
spelling differences:
	bh: IANA: Bihari languages MW: Bhojpuri – the ISO 639-3 tag for Bhojpuri is bho
	bn: IANA: Bengali MW: Bangla – Bengali is the exonym, Bangla is the endonym
	dv: IANA: Dhivehi MW: Divehi
	el: IANA: Modern Greek MW: Greek
	ht: IANA: Haitian MW: Haitian Creole
	ky: IANA: Kirghiz MW: Kyrgyz
	li: IANA: Limburgan MW: Limburgish
	or: IANA: Oriya MW: Odia
	os: IANA: Ossetian MW: Ossetic
	"pa: IANA: Panjabi MW: Punjabi
	"ps: IANA: Pushto MW: Pashto
	"to: IANA: Tonga MW: Tongan
	"ug: IANA: Uighur MW: Uyghur
use the override table to override language names that are incorrect for your project

To see the list of names that MediaWiki has for your language, enter this in the Debug colsole:
	=mw.dumpObject (mw.language.fetchLanguageNames ('<tag>', 'all'))
(replacing <tag> with the language tag for your language)

Use of the MediaWiki language names lists is enabled when media_wiki_override_enable is set to boolean true.
	
]]

local media_wiki_override_enable = false;										-- set to true to override IANA names with MediaWiki names; always false at en.wiki
																				-- caveat lector: the list of MediaWiki language names for your language may not be complete or may not exist at all
	if true == media_wiki_override_enable then
		local mw_languages_by_tag_t = mw.language.fetchLanguageNames (this_wiki_lang_tag, 'all');	-- get a table of language tag/name pairs known to MediaWiki
		for tag, name in pairs (mw_languages_by_tag_t) do						-- loop through each tag/name pair in the MediaWiki list
			if lang_name_table_t.lang[tag] then									-- if the tag is in the main list
				lang_name_table_t.lang[tag] = name;								-- overwrite exisiting name with the name from MediaWiki
			end
		end
	end


--[[--------------------------< O V E R R I D E >--------------------------------------------------------------

Language codes and names in this table override the BCP47 names in lang_name_table.

indexes in this table shall always be lower case

]]

local override = {
------------------------------< I S O _ 6 3 9 - 1 >------------------------------------------------------------

	["ca-valencia"] = "Valencian",
	["cu"] = "Church Slavonic",													-- 2nd IANA name;
	["de-at"] = "Austrian German",												-- these code-region and code-variant tags to match en.wiki article names
	["de-ch"] = "Swiss Standard German",
	["en-au"] = "Australian English",
	["en-ca"] = "Canadian English",
	["en-emodeng"] = "Early Modern English",
	["en-gb"] = "British English",
	["en-ie"] = "Irish English",
	["en-in"] = "Indian English",
	["en-nz"] = "New Zealand English",
	["en-us"] = "American English",
	["en-za"] = "South African English",
	["fy"] = "West Frisian",													-- Western Frisian
	["mo"] = "Moldovan",														-- Moldavian (deprecated code); to match en.wiki article title
	["nl-be"] = "Flemish",														-- match MediaWiki
	["oc-provenc"] = "Provençal",
	["ps"] = "Pashto",															-- Pushto
	["pt-br"] = "Brazilian Portuguese",											-- match MediaWiki
	["tw-asante"] = "Asante Twi",

-- these ISO 639-1 language-name overrides imported from Module:Language/data/wp_languages
--<begin do-not-edit except to comment out>--
		["av"] = "Avar",														-- Avaric
		["bo"] = "Standard Tibetan",											-- Tibetan
		["el"] = "Greek",														-- Modern Greek
--		["en-SA"] = "South African English",									-- English; no; SA is not South Africa it Saudi Arabia; ZA is South Africa
		["ff"] = "Fula",														-- Fulah
		["ht"] = "Haitian Creole",												-- Haitian
		["hz"] = "Otjiherero",													-- Herero
		["ii"] = "Yi",															-- Sichuan Yi
		["ki"] = "Gikuyu",														-- Kikuyu
		["kl"] = "Greenlandic",													-- Kalaallisut
		["ky"] = "Kyrgyz",														-- Kirghiz
		["lg"] = "Luganda",														-- Ganda
		["li"] = "Limburgish",													-- Limburgan
		["mi"] = "Māori",														-- Maori
		["na"] = "Nauruan",														-- Nauru
		["nb"] = "Bokmål",														-- Norwegian Bokmål
		["nd"] = "Northern Ndebele",											-- North Ndebele
		["nn"] = "Nynorsk",														-- Norwegian Nynorsk
		["nr"] = "Southern Ndebele",											-- South Ndebele
		["ny"] = "Chichewa",													-- Nyanja
		["oj"] = "Ojibwe",														-- Ojibwa
		["or"] = "Odia",														-- Oriya
		["pa"] = "Punjabi",														-- Panjabi
		["rn"] = "Kirundi",														-- Rundi
		["sl"] = "Slovene",														-- Slovenian
		["ss"] = "Swazi",														-- Swati
		["st"] = "Sotho",														-- Southern Sotho
		["to"] = "Tongan",														-- Tonga
--<end do-not-edit except to comment out>--


------------------------------< I S O _ 6 3 9 - 2,   - 3,   - 5 >----------------------------------------------

	["alv"] = "Atlantic–Congo languages",										-- to match en.wiki article title (endash)
	["arc"] = "Aramaic",														-- Official Aramaic (700-300 BCE), Imperial Aramaic (700-300 BCE);
	["art"] = "constructed",													-- to match en.wiki article; lowercase for category name
	["bhd"] = "Bhadarwahi",														-- Bhadrawahi; to match en.wiki article title
	["bla"] = "Blackfoot",														-- Siksika; to match en.wiki article title
	["bua"] = "Buryat",															-- Buriat; this is a macro language; these four use wp preferred transliteration;
	["bxm"] = "Mongolian Buryat",												-- Mongolia Buriat; these three all redirect to Buryat
	["bxr"] = "Russian Buryat",													-- Russia Buriat;
	["bxu"] = "Chinese Buryat",													-- China Buriat;
	["byr"] = "Yipma",															-- Baruya, Yipma
	["egy"] = "Ancient Egyptian",												-- Egyptian (Ancient); distinguish from contemporary arz: Egyptian Arabic 
	["ems"] = "Alutiiq",														-- Pacific Gulf Yupik; to match en.wiki article title
	["esx"] = "Eskimo–Aleut languages",											-- to match en.wiki article title (endash)
	["frr"] = "North Frisian",													-- Northern Frisian
	["frs"] = "East Frisian Low Saxon",											-- Eastern Frisian
	["gsw-fr"] = "Alsatian",													-- match MediaWiki
	["haa"] = "Hän",															-- Han; to match en.wiki article title
	["hmx"] = "Hmong–Mien languages",											-- to match en.wiki article title (endash)
	["ilo"] = "Ilocano",														-- Iloko; to match en.wiki article title
	["jam"] = "Jamaican Patois",												-- Jamaican Creole English
	["luo"] = "Dholuo",															-- IANA (primary) /ISO 639-3: Luo (Kenya and Tanzania); IANA (secondary): Dholuo
	["mhr"] = "Meadow Mari",													-- Eastern Mari
	["mid"] = "Modern Mandaic",													-- Mandaic
	['mis'] = "uncoded",														-- Uncoded languages; capitalization; special scope, not collective scope;
	["mkh"] = "Mon–Khmer languages",											-- to match en.wiki article title (endash)
	["mla"] = "Tamambo",														-- Malo
	['mte'] = "Mono-Alu",														-- Mono (Solomon Islands)
	['mul'] = "multiple",														-- Multiple languages; capitalization; special scope, not collective scope;
	["nan-tw"] = "Taiwanese Hokkien",											-- make room for IANA / 639-3 nan Min Nan Chinese; match en.wiki article title
	["new"] = "Newar",															-- Newari, Nepal Bhasa; to match en,wiki article title
	["ngf"] = "Trans–New Guinea languages",										-- to match en.wiki article title (endash)
	["nic"] = "Niger–Congo languages",											-- Niger-Kordofanian languages; to match en,wiki article title
	["nrf"] = "Norman",															-- not quite a collective - IANA name: Jèrriais + Guernésiais; categorizes to Norman-language text
	["nrf-gg"] = "Guernésiais",													-- match MediaWiki
	["nrf-je"] = "Jèrriais",													-- match MediaWiki
	["nzi"] = "Nzema",															-- Nzima; to match en.wiki article title
	["oma"] = "Omaha–Ponca",													-- to match en.wiki article title (endash)
	["orv"] = "Old East Slavic",												-- Old Russian
	["pfl"] = "Palatine German",												-- Pfaelzisch; to match en.wiki article
	["pie"] = "Piro Pueblo",													-- Piro; to match en.wiki article
	["pms"] = "Piedmontese",													-- Piemontese; to match en.wiki article title
	["pnb"] = "Punjabi (Western)",												-- Western Panjabi; dab added to override import from ~/wp languages and distinguish pnb from pa in reverse look up tag_from_name()
	["rop"] = "Australian Kriol",												-- Kriol; en.wiki article is a dab; point to correct en.wiki article
	["sdo"] = "Bukar–Sadong",													-- Bukar-Sadung Bidayuh; to match en.wiki article title
	["stq"] = "Saterland Frisian",												-- Saterfriesisch
	["und"] = "undetermined",													-- capitalization to match existing category
	["wrg"] = "Warrongo",														-- Warungu
	["xal-ru"] = "Kalmyk",														-- to match en.wiki article title
	["xgf"] = "Tongva",															-- ISO 639-3 is Gabrielino-Fernandeño
	["yuf"] = "Havasupai–Hualapai",												-- Havasupai-Walapai-Yavapai; to match en.wiki article title
	["zxx"] = "no linguistic content",											-- capitalization

-- these ISO 639-2, -3 language-name overrides imported from Module:Language/data/wp_languages
--<begin do-not-edit except to comment out>--
		["ace"] = "Acehnese",													-- Achinese
		["aec"] = "Sa'idi Arabic",												-- Saidi Arabic
		["akl"] = "Aklan",														-- Aklanon
		["alt"] = "Altay",														-- Southern Altai
		["apm"] = "Mescalero-Chiricahua",										-- Mescalero-Chiricahua Apache
		["bal"] = "Balochi",													-- Baluchi
--		["bcl"] = "Central Bicolano",											-- Central Bikol
		["bin"] = "Edo",														-- Bini
		["bpy"] = "Bishnupriya Manipuri",										-- Bishnupriya
		["chg"] = "Chagatay",													-- Chagatai
		["ckb"] = "Sorani Kurdish",												-- Central Kurdish
		["cnu"] = "Shenwa",														-- Chenoua
		["coc"] = "Cocopah",													-- Cocopa
		["diq"] = "Zazaki",														-- Dimli
		["fit"] = "Meänkieli",													-- Tornedalen Finnish
		["fkv"] = "Kven",														-- Kven Finnish
		["frk"] = "Old Frankish",												-- Frankish
		["gez"] = "Ge'ez",														-- Geez
		["gju"] = "Gujari",														-- Gujari
		["gsw"] = "Alemannic German",											-- Swiss German
		["gul"] = "Gullah",														-- Sea Island Creole English
		["hak"] = "Hakka",														-- Hakka Chinese
		["hbo"] = "Biblical Hebrew",											-- Ancient Hebrew
		["hnd"] = "Hindko",														-- Southern Hindko
--		["ikt"] = "Inuvialuk",													-- Inuinnaqtun
		["kaa"] = "Karakalpak",													-- Kara-Kalpak
		["khb"] = "Tai Lü",														-- Lü
		["kmr"] = "Kurmanji Kurdish",											-- Northern Kurdish
		["kpo"] = "Kposo",														-- Ikposo
		["krj"] = "Kinaray-a",													-- Kinaray-A
		["ktz"] = "Juǀ'hoan",													-- Juǀʼhoan
		["lez"] = "Lezgian",													-- Lezghian
		["liv"] = "Livonian",													-- Liv
		["lng"] = "Lombardic",													-- Langobardic
		["mia"] = "Miami-Illinois",												-- Miami
		["miq"] = "Miskito",													-- Mískito
		["mix"] = "Mixtec",														-- Mixtepec Mixtec
		["mni"] = "Meitei",														-- Manipuri
		["mrj"] = "Hill Mari",													-- Western Mari
		["mww"] = "White Hmong",												-- Hmong Daw
		["nds-nl"] = "Dutch Low Saxon",											-- Low German
--		["new"] = "Nepal Bhasa",												-- Newari
		["nso"] = "Northern Sotho",												-- Pedi
--		["nwc"] = "Classical Nepal Bhasa",										-- Classical Newari, Classical Nepal Bhasa, Old Newari
		["ood"] = "O'odham",													-- Tohono O'odham
		["otk"] = "Old Turkic",													-- Old Turkish
		["pal"] = "Middle Persian",												-- Pahlavi
		["pam"] = "Kapampangan",												-- Pampanga
		["phr"] = "Potwari",													-- Pahari-Potwari
		["pka"] = "Jain Prakrit",												-- Ardhamāgadhī Prākrit
--		["pnb"] = "Punjabi",													-- Western Panjabi
		["psu"] = "Shauraseni",													-- Sauraseni Prākrit
		["rap"] = "Rapa Nui",													-- Rapanui
		["rar"] = "Cook Islands Māori",											-- Rarotongan
		["rmu"] = "Scandoromani",												-- Tavringer Romani
		["rom"] = "Romani",														-- Romany
		["rup"] = "Aromanian",													-- Macedo-Romanian
		["ryu"] = "Okinawan",													-- Central Okinawan
		["sdc"] = "Sassarese",													-- Sassarese Sardinian
		["sdn"] = "Gallurese",													-- Gallurese Sardinian
		["shp"] = "Shipibo",													-- Shipibo-Conibo
		["src"] = "Logudorese",													-- Logudorese Sardinian
		["sro"] = "Campidanese",												-- Campidanese Sardinian
		["tkl"] = "Tokelauan",													-- Tokelau
		["tvl"] = "Tuvaluan",													-- Tuvalu
		["tyv"] = "Tuvan",														-- Tuvinian
		["vls"] = "West Flemish",												-- Vlaams
		["wep"] = "Westphalian",												-- Westphalien
		["xal"] = "Oirat",														-- Kalmyk
		["xcl"] = "Old Armenian",												-- Classical Armenian
		["yua"] = "Yucatec Maya",												-- Yucateco
--<end do-not-edit except to comment out>--


------------------------------< P R I V A T E _ U S E _ T A G S >----------------------------------------------

	["alg-x-proto"] = "Proto-Algonquian",										-- alg in IANA is Algonquian languages
	["cel-x-proto"] = "Proto-Celtic",											-- cel in IANA is Celtic languages
	["gem-x-proto"] = "Proto-Germanic",											-- gem in IANA is Germanic languages
	["gmw-x-ecg"] = "East Central German",
	["grc-x-aeolic"] = "Aeolic Greek",											-- these grc-x-... codes are preferred alternates to the non-standard catchall code grc-gre
	["grc-x-attic"] = "Attic Greek",
	["grc-x-biblical"] = "Biblical Greek",
	["grc-x-byzant"] = "Byzantine Greek",
	["grc-x-classic"] = "Classical Greek",
	["grc-x-doric"] = "Doric Greek",
	["grc-x-hellen"] = "Hellenistic Greek",
	["grc-x-ionic"] = "Ionic Greek",
	["grc-x-koine"] = "Koinē Greek",
	["grc-x-medieval"] = "Medieval Greek",
	["grc-x-patris"] = "Patristic Greek",
	["grk-x-proto"] = "Proto-Greek",											-- grk in IANA is Greek languages
	["iir-x-proto"] = "Proto-Indo-Iranian",										-- iir in IANA is Indo-Iranian Languages
	["ine-x-proto"] = "Proto-Indo-European",
	["ira-x-proto"] = "Proto-Iranian",											-- ira in IANA is Iranian languages
	["itc-x-proto"] = "Proto-Italic",											-- itc in IANA is Italic languages
	["ksh-x-colog"] = "Colognian",												-- en.wiki article is Colognian; ksh (Kölsch) redirects there
	["la-x-medieval"] = "Medieval Latin",
	["mis-x-ripuar"] = "Ripuarian",												-- replaces improper use of ksh in wp_languages
	["sem-x-proto"] = "Proto-Semitic",
	["sla-x-proto"] = "Proto-Slavic",											-- sla in IANA is Slavic languages
	["yuf-x-hav"] = "Havasupai",												-- IANA name for these three is Havasupai-Walapai-Yavapai
	["yuf-x-wal"] = "Walapai",
	["yuf-x-yav"] = "Yavapai",
	}


--[[--------------------------< A R T I C L E _ L I N K >------------------------------------------------------

for those rare occasions when article titles don't fit with the normal '<language name>-language', this table
maps language code to article title. Use of this table should be avoided and the use of redirects preferred as
that is the long-standing method of handling article names that don't fit with the normal pattern

]]

local article_name = {
	["lij"] = "Ligurian (Romance language)",									-- Ligurian; see Template_talk:Lang#Ligurian_dab
	['mnh'] = "Mono language (Congo)",											-- Mono (Democratic Republic of Congo); see Template_talk:Lang#Mono_languages
	['mnr'] = "Mono language (California)",										-- Mono (USA)
	['mru'] = "Mono language (Cameroon)",										-- Mono (Cameroon)
	["xlg"] = "Ligurian (ancient language)",									-- see Template_talk:Lang#Ligurian_dab
	}


--[=[-------------------------< R T L _ S C R I P T S >--------------------------------------------------------

ISO 15924 scripts that are written right-to-left. Data in this table taken from [[ISO 15924#List of codes]]

last update to this list: 2017-12-24

]=]

local rtl_scripts = {
	'adlm', 'arab', 'aran', 'armi', 'avst', 'cprt', 'egyd', 'egyh', 'hatr', 'hebr',
	'hung', 'inds', 'khar', 'lydi', 'mand', 'mani', 'mend', 'merc', 'mero', 'narb',
	'nbat', 'nkoo', 'orkh', 'palm', 'phli', 'phlp', 'phlv', 'phnx', 'prti', 'rohg',
	'samr', 'sarb', 'sogd', 'sogo', 'syrc', 'syre', 'syrj', 'syrn', 'thaa', 'wole',
	};


--[[--------------------------< T R A N S L I T _ T I T L E S >------------------------------------------------

This is a table of tables of transliteration standards and the language codes or language scripts that apply to
those standards. This table is used to create the tool-tip text associated with the transliterated text displayed
by some of the {{lang-??}} templates.

These tables are more-or-less copied directly from {{transl}}. The standard 'NO_STD' is a construct to allow for
the cases when no |std= parameter value is provided.

]]

local translit_title_table = {
	['ahl'] = {
		['default'] = 'Academy of the Hebrew Language transliteration',
		},

	['ala'] = {
		['default'] = 'American Library Association – Library of Congress transliteration',
		},

	['ala-lc'] = {
		['default'] = 'American Library Association – Library of Congress transliteration',
		},

	['batr'] = {
		['default'] = 'Bikdash Arabic Transliteration Rules',
		},

	['bgn/pcgn'] = {
		['default'] = 'Board on Geographic Names / Permanent Committee on Geographical Names transliteration',
		},

	['din'] = {
		['ar'] = 'DIN 31635 Arabic',
		['fa'] = 'DIN 31635 Arabic',
		['ku'] = 'DIN 31635 Arabic',
		['ps'] = 'DIN 31635 Arabic',
		['tg'] = 'DIN 31635 Arabic',
		['ug'] = 'DIN 31635 Arabic',
		['ur'] = 'DIN 31635 Arabic',
		['arab'] = 'DIN 31635 Arabic',

		['default'] = 'DIN transliteration',
		},

	['eae'] = {
		['default'] = 'Encyclopaedia Aethiopica transliteration',
		},

	['hepburn'] = {
		['default'] = 'Hepburn transliteration',
		},

	['hunterian'] = {
		['default'] = 'Hunterian transliteration',
		},

	['iast'] = {
		['default'] = 'International Alphabet of Sanskrit transliteration',
		},

	['iso'] = {																	-- when a transliteration standard is supplied
		['ab'] = 'ISO 9 Cyrillic',
		['ba'] = 'ISO 9 Cyrillic',
		['be'] = 'ISO 9 Cyrillic',
		['bg'] = 'ISO 9 Cyrillic',
		['kk'] = 'ISO 9 Cyrillic',
		['ky'] = 'ISO 9 Cyrillic',
		['mn'] = 'ISO 9 Cyrillic',
		['ru'] = 'ISO 9 Cyrillic',
		['tg'] = 'ISO 9 Cyrillic',
		['uk'] = 'ISO 9 Cyrillic',
		['bua'] = 'ISO 9 Cyrillic',
		['sah'] = 'ISO 9 Cyrillic',
		['tut'] = 'ISO 9 Cyrillic',
		['xal'] = 'ISO 9 Cyrillic',
		['cyrl'] = 'ISO 9 Cyrillic',

		['ar'] = 'ISO 233 Arabic',
		['ku'] = 'ISO 233 Arabic',
		['ps'] = 'ISO 233 Arabic',
		['ug'] = 'ISO 233 Arabic',
		['ur'] = 'ISO 233 Arabic',
		['arab'] = 'ISO 233 Arabic',

		['he'] = 'ISO 259 Hebrew',
		['yi'] = 'ISO 259 Hebrew',
		['hebr'] = 'ISO 259 Hebrew',

		['el'] = 'ISO 843 Greek',
		['grc'] = 'ISO 843 Greek',

		['ja'] = 'ISO 3602 Japanese',
		['hira'] = 'ISO 3602 Japanese',
		['hrkt'] = 'ISO 3602 Japanese',
		['jpan'] = 'ISO 3602 Japanese',
		['kana'] = 'ISO 3602 Japanese',

		['zh'] = 'ISO 7098 Chinese',
		['chi'] = 'ISO 7098 Chinese',
		['pny'] = 'ISO 7098 Chinese',
		['zho'] = 'ISO 7098 Chinese',
--		['han'] = 'ISO 7098 Chinese',											-- unicode alias of Hani? doesn't belong here? should be Hani?
		['hans'] = 'ISO 7098 Chinese',
		['hant'] = 'ISO 7098 Chinese',

		['ka'] = 'ISO 9984 Georgian',
		['kat'] = 'ISO 9984 Georgian',

		['arm'] = 'ISO 9985 Armenian',
		['hy'] = 'ISO 9985 Armenian',

		['th'] = 'ISO 11940 Thai',
		['tha'] = 'ISO 11940 Thai',

		['ko'] = 'ISO 11941 Korean',
		['kor'] = 'ISO 11941 Korean',

		['awa'] = 'ISO 15919 Indic',
		['bho'] = 'ISO 15919 Indic',
		['bn'] = 'ISO 15919 Indic',
		['bra'] = 'ISO 15919 Indic',
		['doi'] = 'ISO 15919 Indic',
		['dra'] = 'ISO 15919 Indic',
		['gon'] = 'ISO 15919 Indic',
		['gu'] = 'ISO 15919 Indic',
		['hi'] = 'ISO 15919 Indic',
		['inc'] = 'ISO 15919 Indic',
		['kn'] = 'ISO 15919 Indic',
		['kok'] = 'ISO 15919 Indic',
		['ks'] = 'ISO 15919 Indic',
		['mag'] = 'ISO 15919 Indic',
		['mai'] = 'ISO 15919 Indic',
		['ml'] = 'ISO 15919 Indic',
		['mr'] = 'ISO 15919 Indic',
		['ne'] = 'ISO 15919 Indic',
		['new'] = 'ISO 15919 Indic',
		['or'] = 'ISO 15919 Indic',
		['pa'] = 'ISO 15919 Indic',
		['raj'] = 'ISO 15919 Indic',
		['sa'] = 'ISO 15919 Indic',
		['sat'] = 'ISO 15919 Indic',
		['sd'] = 'ISO 15919 Indic',
		['si'] = 'ISO 15919 Indic',
		['ta'] = 'ISO 15919 Indic',
		['tcy'] = 'ISO 15919 Indic',
		['te'] = 'ISO 15919 Indic',
		['beng'] = 'ISO 15919 Indic',
		['brah'] = 'ISO 15919 Indic',
		['deva'] = 'ISO 15919 Indic',
		['gujr'] = 'ISO 15919 Indic',
		['guru'] = 'ISO 15919 Indic',
		['knda'] = 'ISO 15919 Indic',
		['mlym'] = 'ISO 15919 Indic',
		['orya'] = 'ISO 15919 Indic',
		['sinh'] = 'ISO 15919 Indic',
		['taml'] = 'ISO 15919 Indic',
		['telu'] = 'ISO 15919 Indic',

		['default'] = 'ISO transliteration',
		},

	['jyutping'] = {
		['default'] = 'Jyutping transliteration',
		},

	['mlcts'] = {
		['default'] = 'Myanmar Language Commission Transcription System',
		},

	['mr'] = {
		['default'] = 'McCune–Reischauer transliteration',
		},

	['nihon-shiki'] = {
		['default'] = 'Nihon-shiki transliteration',
		},

	['no_std'] = {																-- when no transliteration standard is supplied
		['akk'] = 'Semitic transliteration',
		['sem'] = 'Semitic transliteration',
		['phnx'] = 'Semitic transliteration',
		['xsux'] = 'Cuneiform transliteration',
		},

	['pinyin'] = {
		['default'] = 'Pinyin transliteration',
		},

	['rr'] = {
		['default'] = 'Revised Romanization of Korean transliteration',
		},

	['rtgs'] = {
		['default'] = 'Royal Thai General System of Transcription',
		},
	
	['satts'] = {
		['default'] = 'Standard Arabic Technical Transliteration System transliteration',
		},

	['scientific'] = {
		['default'] = 'scientific transliteration',
		},

	['ukrainian'] = {
		['default'] = 'Ukrainian National system of romanization',
		},

	['ungegn'] = {
		['default'] = 'United Nations Group of Experts on Geographical Names transliteration',
		},

	['wadegile'] = {
		['default'] = 'Wade–Giles transliteration',
		},

	['wehr'] = {
		['default'] = 'Hans Wehr transliteration',
		},
	};


return
	{
	this_wiki_lang_tag = this_wiki_lang_tag,
	this_wiki_lang_dir = lang_obj:getDir(),										-- wiki's language direction
	
	article_name = article_name,
	lang_name_table = lang_name_table_t,
	override = override,
	rtl_scripts = rtl_scripts,
	special_tags_table = special_tags_table,
	translit_title_table = translit_title_table,
	};