Difference between revisions of "Module:Linguistic"
Jump to navigation
Jump to search
(update from /sandbox: function p.noungroup(): adjective-noun order: use primary language subtag if no order for subtag) |
m (1 revision imported) |
(No difference)
|
Latest revision as of 22:35, 1 April 2022
Documentation for this module may be created at Module:Linguistic/doc
--[[ __ __ _ _ _ _ _ _ _ | \/ | ___ __| |_ _| | ___ _| | (_)_ __ __ _ _ _(_)___| |_(_) ___ | |\/| |/ _ \ / _` | | | | |/ _ (_) | | | '_ \ / _` | | | | / __| __| |/ __| | | | | (_) | (_| | |_| | | __/_| |___| | | | | (_| | |_| | \__ \ |_| | (__ |_| |_|\___/ \__,_|\__,_|_|\___(_)_____|_|_| |_|\__, |\__,_|_|___/\__|_|\___| |___/ Simple internationalization functions that can be called by other modules. This Module was copied from Wikimedia Commons, so please request changes there. Maintainers: * Zolo - original version * Jarekt Dependencies: - Module uses c:data:I18n/Or.tab ]] require('Module:No globals') -- ================================================== -- === Internal functions =========================== -- ================================================== local function langSwitch(list,lang) local langList = mw.language.getFallbacksFor(lang) table.insert(langList,1,lang) for i,language in ipairs(langList) do if list[language] then return list[language] end end return nil end ------------------------------------------------------------------------------ -- read Commons Data:SOMENAME.tab dataset and look for message identified by a -- "key" in a language "lang". See editAtWikidata as an example. local function formatMessage(dataset, key, lang) for _, row in pairs(mw.ext.data.get(dataset, lang).data) do local id, msg = unpack(row) if id == key then return mw.message.newRawMessage(msg):plain() end end error('Invalid message key "' .. key .. '"') end ------------------------------------------------------------------------------ local function nowiki(str) -- remove all the links if not str then return str end str = mw.ustring.gsub(str, "<[^>]*>", "") -- remove all html tags from str str = mw.ustring.gsub(str, "'''", "") -- remove bold str = mw.ustring.gsub(str, "''", "") -- remove italics str = mw.ustring.gsub(str, "%[%[[Ff]ile:[^%]]+%]%]", "") -- remove file icons str = mw.ustring.gsub(str, "%[%[[^|]*|", "") -- remove piped links, like "[[:en:test|" str = mw.ustring.gsub(str, "%[[^ ]+ ", "") -- remove URL links, like "[https://www.wikidata.org/wiki/Q2706250 " str = mw.ustring.gsub(str, "%[%[", "" ) -- remove piped links, like "[[" str = mw.ustring.gsub(str, "%]", "" ) -- remove piped links, like "]" or "]]" return str end ------------------------------------------------------------------------------ -- Function allowing for consistent treatment of boolean-like wikitext input. -- It works similarly to Module:Yesno local function yesno(val, default) if type(val) == 'boolean' then return val elseif type(val) == 'number' then if val==1 then return true elseif val==0 then return false end elseif type(val) == 'string' then val = mw.ustring.lower(val) -- put in lower case if val == 'no' or val == 'n' or val == 'false' or val == '0' then return false elseif val == 'yes' or val == 'y' or val == 'true' or val == '1' then return true end end return default end -- ================================================== -- === External functions =========================== -- ================================================== local p = {} -- =========================================================================== -- === Version of the function to be called from other LUA codes -- =========================================================================== ------------------------------------------------------------------------------------------ function p.vowelfirst (str) if str then local vowels = 'aeiouyąăẵằẳặȃắâẫấầẩậãäǟāáàȁǎảẚåǻḁạǡæǣǽĕȇêễếềểệḙẽḛëēḕéḗèȅěẻẹęȩḝǝĭȋîĩḭï'.. 'ḯīíìȉǐỉịįıŏȏôỗốồổộõṏṍöōṑóṓòȍǒỏọǫǭơỡớờởợøǿŭȗûṷũṻṹṵüǖǘǜǚṳūúùȕǔủůụųưữứừửựŷỹÿȳýỳỷẙỵ' str = mw.ustring.lower(mw.ustring.sub(str,1,1)) return mw.ustring.find(vowels, str, 1, true ) end end ------------------------------------------------------------------------------------------ function p.inparentheses(str) if (not str) or (str == "") then return nil end local str2 = mw.message.new('parentheses', str):inLanguage(lang):plain() return (str2~=nil and str2) or ('('..str..')') -- in case this module is moved to a project where {{int:parenthesis}} is not set end ------------------------------------------------------------------------------------------ function p.of(word, lang, raw, gender, number, determiner) -- rough translation of "of" in various languages -- note that the cases when on "of" is employed varies a lot among languages, so it is more prudent to call this from lang specific function only if not raw or mw.text.trim(raw) == "" then raw = mw.ustring.lower(nowiki(word)) end -- raw is the string without the Wikiformatting so that it correctly analyses the string that is [[:fr:Italie|Italie]] -> 'italie' -- any way to automate this ? if lang == 'fr' then determiner = yesno(determiner,false) if determiner then if string.sub(number or '',1,1)=='p' then -- number == 'plural' return 'des ' .. word elseif p.vowelfirst(raw) then return 'de l’' .. word elseif string.sub(gender or '',1,1)=='f' then -- gender == 'feminine' return 'de la ' .. word else return 'du ' .. word end else if p.vowelfirst(raw) then return 'd’' .. word else return 'de ' .. word end end elseif lang == 'ca' then -- implement [[Template:Of/ca]] or https://ca.wikipedia.org/wiki/Plantilla:Deod%27/base for case where "{{{context}}}" is "en" (default on Commons) if ( p.vowelfirst(raw) and not mw.ustring.find( 'ia|ià|ie|io|iu|ua|ue|ui|uí|uï|uo|ya|ye|yi|yo|yu|', mw.ustring.sub(raw,1,2) .. '|')) then return 'd\'' .. word else return 'de ' .. word end end end ------------------------------------------------------------------------------------------ function p.noungroup(noun, adj, lang) if not noun or noun == '' then return nil -- not '' so that it is not counted as a string by mw.listToText end if not adj or adj == '' then return noun end local wordsep = mw.message.new( "Word-separator" ):inLanguage(lang):plain() -- Assign order of words per language: 1 means adjective before the noun and 2 means adjective after the noun -- original source for many: language subpages of [[Template:Technique]] -- corrections to that using https://wals.info/feature/87A, still different: -- * vi: WALS says 2 -- * gl isn't consistent in [[Template:Technique/gl]], but seems to be 2 local LUT = {ar=2, ca=2, cs=1, da=1, de=1, el=1, en=1, es=2, et=1, fi=1, fr=2, gl=2, he=2, hu=1, it=2, ja=1, la=2, mk=1, nds=1, nb=1, nl=1, no=1, pl=1, pt=2, ro=2, ru=1, scn=2, sk=1, sl=1, sr=1, sv=1, sw=1, tr=1, vec=1, vi=1, zh=1} local primaryLangSubtag = mw.text.split(lang, '-', true)[1] local case = LUT[lang] or LUT[primaryLangSubtag] if case==1 then -- adjective before the noun return adj .. wordsep .. noun elseif case==2 then -- adjective after the noun return noun .. wordsep .. adj else -- order unknown return noun .. ' (' .. adj .. ')' end end ------------------------------------------------------------------------------------------ function p.conj(args, lang, conjtype) local comma = mw.message.new( "comma-separator"):inLanguage(lang):plain() local wordsep = mw.message.new( "Word-separator" ):inLanguage(lang):plain() local andtable = { ar=' و', he=' ו', ja='および', pl=' i '} -- languages with a problem with the MediaWiki:And local sep1, sep2 = comma, nil if conjtype == 'comma' then sep2 = comma elseif conjtype == 'or' or conjtype == 'explicit or' then local wordor = formatMessage('I18n/Or.tab', 'or', lang) -- get translated "or" sep2 = wordsep .. wordor .. wordsep -- pad it with spaces (if needed) sep1 = (conjtype == 'explicit or' and sep2) or comma -- adds "or" betwen all words when the context can be confusing elseif conjtype and conjtype ~= 'and' and conjtype ~= '' then sep1, sep2 = conjtype, conjtype elseif andtable[lang] then sep2 = andtable[lang] else sep2 = mw.message.new( "and" ):inLanguage(lang):plain() .. wordsep end return mw.text.listToText(args, sep1, sep2) end -- =========================================================================== -- === Version of the functions to be called from template namespace -- =========================================================================== ------------------------------------------------------------------------------------------ function p.offromwiki(frame) local args = frame.args return p.of(args.word, args.lang, args.raw, args.gender, args.number, args.determiner) end ------------------------------------------------------------------------------------------ function p.conjfromWiki(frame) local args = frame.args if not args or not args[1] then args = frame:getParent().args end local lang = args.lang if not lang or mw.text.trim(lang) == '' then lang = frame:callParserFunction( "int", "lang" ) end local newargs = {} -- transform args metatable into a table so it can be concetenated for i, j in pairs(args) do if type(i) == 'number' then j = mw.text.trim(j) if j ~= '' then table.insert(newargs, j) end else if i ~= 'type' and i ~= 'lang' then return 'error: bad parameter in template:Conj: ' .. i .. '[[Category:Pages with incorrect template usage/Conj|A]]' end end end return p.conj(newargs, lang, args.type) end return p