Jump to content

Module:Wikt-lang

From Wikipedia, the free encyclopedia
Module documentation[view] [edit] [history] [purge]
This module is rated as beta. It is considered ready for widespread use, but as it is still relatively new, it should be applied with some caution to ensure results are as expected.
This module is currently under extended confirmed protection.
Extended confirmed protection prevents edits from all unregistered editors and registered users with fewer than 30 days tenure and 500 edits. The policy on community use specifies that extended confirmed protection can be applied to combat disruption, if semi-protection has proven to be ineffective. Extended confirmed protection may also be applied to enforce arbitration sanctions. Please discuss any changes on the talk page; you may submit an edit request to ask for uncontroversial changes supported by consensus.
Language templates
Language names (ISO 639)
Interwiki links
Foreign-language text
Other
This module depends on the following other modules:

This module is used by {{Wikt-lang}}. It is inspired by the templates {{m}} and {{l}} and their associated modules on Wiktionary. It has a Wiktionary link function that links to the correct section of the Wiktionary entry, and applies correct language formatting and italics. The language-tagging function does most of what {{Lang}} does, except that italics can't be customized and categories aren't added.

The module uses Module:Wikt-lang/data to retrieve the language name for a language code, and to perform the necessary entry-name replacements (for instance, removing macrons from Latin entry names). These are unfortunately not automatically retrieved from Wiktionary's Wikt-lang data modules. For language codes that do not have a name value in this module, the language name is fetched with mw.language.fetchLanguage. When mw.language.fetchLanguage does not fetch the correct language name (or any language name at all), please add it to Module:Wikt-lang/data, and similarly when the correct entry name is not generated, please add the entry name replacements to the module.

Examples

Invalid codes

Errors

Comparison of codes

Language code Wiktionary name English Wikipedia name
aaq Penobscot Eastern Abnaki
abe Abenaki Western Abnaki
ajp South Levantine Arabic South Levantine Arabic
alg-pro Proto-Algonquian Error: unrecognized language tag: alg-pro
apc North Levantine Arabic Levantine Arabic
arb Modern Standard Arabic Standard Arabic
cel-x-bryproto Proto-Brythonic Error: unrecognized private tag: bryproto
cu Old Church Slavonic Church Slavonic
egy Egyptian Ancient Egyptian
frp Franco-Provencal Arpitan
gmw-x-proto Proto-West Germanic Proto-West Germanic
grk-x-proto Proto-Hellenic Proto-Greek
ine-x-bsproto Proto-Balto-Slavic Error: unrecognized private tag: bsproto
moe Cree Innu
mul Translingual multiple
nds-de German Low German Low German
non-x-proto Proto-Norse Error: unrecognized private tag: proto
poz-x-polproto Proto-Nuclear Polynesian Error: unrecognized private tag: polproto
rw Rwanda-Rundi Kinyarwanda
tts Isan Northeastern Thai
xlu Luwian Cuneiform Luwian
zle-x-ort Old Ruthenian Error: unrecognized private tag: ort

Tracking categories

The above documentation is transcluded from Module:Wikt-lang/doc. (edit | history)
Editors can experiment in this module's sandbox (edit | diff) and testcases (edit | run) pages.
Subpages of this module.

require('strict')
local m_data = mw.loadData("Module:Wikt-lang/data")
local langData = m_data.languages or m_data

local p = {}

local function ifNotEmpty(value)
if value == "" then
return nil
else
return value
end
end

local function makeEntryName(word, languageCode)
local data = langData[languageCode]
local ugsub = mw.ustring.gsub
word = tostring(word)
if word == nil then
error("The function makeEntryName requires a string argument")
elseif word == "" then
return ""
else
-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping.
word = word:gsub("\'\'\'", "")
word = word:gsub("\'\'", "")
if data == nil then
return word
else
local replacements = data and data["replacements"]
if replacements == nil then
return word
else
-- Decompose so that the diacritics of characters such
-- as a can be removed in one go.
-- No need to compose at the end, because the MediaWiki software
-- will handle that.
if replacements.decompose then
word = mw.ustring.toNFD(word)
for i, from in ipairs(replacements.from) do
word = ugsub(
word,
from,
replacements.to and replacements.to[i] or "")
end
else
for regex, replacement in pairs(replacements) do
word = ugsub(word, regex, replacement)
end
end
return word
end
end
end
end

local function fixScriptCode(firstLetter, threeLetters)
return string.upper(firstLetter) .. string.lower(threeLetters)
end

local function getCodes(codes)
local languageCode, scriptCode, invalidCode
local errorText
if codes == nil or codes == "" then
errorText = 'no language or script code provided'
elseif codes:find("^%a%a%a?$") or codes:find("^%a%a%a?%-%a%a%a%a$") then
-- A three- or two-letter lowercase sequence at beginning of first parameter
languageCode =
codes:find("^%a%a%a?") and (
codes:match("^(%l%l%l?)")
or codes:match("^(%a%a%a?)")
:gsub("(%a%a%a?)", string.lower, 1)
)
-- One uppercase and three lowercase letters at the end of the first parameter
scriptCode =
codes:find("%a%a%a%a$") and (
codes:match("(%u%l%l%l)$")
or gsub(
codes:match("(%a%a%a%a)$"),
"(%a)(%a%a%a)",
fixScriptCode,
1
)
)
elseif codes:find("^%a%a%a?%-%a%a%a?$")
or codes:find("^%a%a%a%-%a%a%a%-%a%a%a$") then
languageCode = codes

-- Private-use subtag: x followed by one or more sequences of 1-8 lowercase
-- letters separated by hyphens. This only allows for one sequence, as it is
-- needed for proto-languages such as ine-x-proto (Proto-Indo-European).
elseif codes:find("^%a%a%a?%-x%-%a%a?%a?%a?%a?%a?%a?%a?$") then
languageCode, scriptCode =
codes:match("^(%a%a%a%-x%-%a%a?%a?%a?%a?%a?%a?%a?)%-?(.*)$")
if not languageCode then
errorText = ''..codes..' is not a valid language or script code.'
elseif scriptCode ~= "" and not scriptCode:find("%a%a%a%a") then
errorText = ''..scriptCode..' is not a valid script code.'
else
scriptCode = scriptCode:gsub(
"(%a)(%a%a%a)",
fixScriptCode,
1
)
end
elseif codes:find("^%a%a%a?") then
languageCode, invalidCode = codes:match("^(%a%a%a?)%-?(.*)")
languageCode = string.lower(languageCode)
errorText = ''..invalidCode..' is not a valid script code.'
elseif codes:find("%-?%a%a%a%a$") then
invalidCode, scriptCode = codes:match("(.*)%-?(%a%a%a%a)$")
scriptCode = gsub(
scriptCode,
"(%a)(%a%a%a)",
fixScriptCode
)
errorText = ''..invalidCode..' is not a valid language code.'
else
errorText = ''..codes..' is not a valid language or script code.'
end
if errorText then
errorText = ' [' .. errorText .. ']'
else
errorText = ""
end
languageCode = m_data.redirects[languageCode] or languageCode
return languageCode, scriptCode, errorText
end

local function tag(text, languageCode, script, italicize)
local data = langData[languageCode]
-- Use Wikipedia code if it has been given: for instance,
-- Proto-Indo-European has the Wiktionary code "ine-pro" but the Wikipedia
-- code "ine-x-proto".
languageCode = data and data.Wikipedia_code or languageCode
if script and script ~= "" then
languageCode = languageCode .. "-" .. script
end

if not text then text = "[text?]" end

local textDirectionMarkers = { "", "", "" }
if data and data["direction"] == "rtl" then
textDirectionMarkers = { ' dir="rtl"', '‏', '‎' }
end

local out = { textDirectionMarkers[2] }
if italicize then
table.insert(out, "\"" .. languageCode .. "\"" .. textDirectionMarkers[1] .. ">" .. text .. "")
else
table.insert(out, "\"" .. languageCode .. "\"" .. textDirectionMarkers[1] .. ">" .. text .. "")
end
table.insert(out, textDirectionMarkers[3])

return table.concat(out)
end

local function linkToWiktionary(entry, linkText, languageCode)
local data = langData[languageCode]
local name
if languageCode then
if data and data.name then
name = data.name
else
-- On other languages' wikis, use mw.getContentLanguage():getCode(),
-- or replace 'en' with that wiki's language code.
name = mw.language.fetchLanguageName(languageCode, 'en')
if name == "" then
error("Name for the language code " .. ("%q"):format(languageCode or nil)
.. " could not be retrieved with mw.language.fetchLanguageName, "
.. "so it should be added to [[Module:Wikt-lang/data]]")
end
end
if entry:sub(1, 1) == "*" then
if name ~= "" then
entry = "Reconstruction:" .. name .. "/" .. entry:sub(2)
else
error("Language name is empty")
end
elseif data and data.type == "reconstructed" then
mw.log("Reconstructed language without asterisk:", languageCode, name, entry)
local frame = mw.getCurrentFrame()
-- Track reconstructed entries with no asterisk by transcluding
-- a nonexistent template. This technique is used in Wiktionary:
-- see [[wikt:Module:debug]].
-- [[Special:WhatLinksHere/tracking/wikt-lang/reconstructed with no asterisk]]
pcall(frame.expandTemplate, frame,
{ title = 'tracking/wikt-lang/reconstructed with no asterisk' })
if name ~= "" then
entry = "Reconstruction:" .. name .. "/" .. entry
else
error("Language name is empty")
end
elseif data and data.type == "appendix" then
if name ~= "" then
entry = "Appendix:" .. name .. "/" .. entry
else
error("Language name is empty")
end
end
if entry and linkText then
return "[[wikt:" .. entry .. "#" .. name .. "|" .. linkText .. "]]"
else
error("linkToWiktionary needs a Wiktionary entry or link text, or both")
end
else
return "[[wikt:" .. entry .. "|" .. linkText .. "]]"
end
end

function p.wiktlang(frame)
local parent = frame:getParent()
local args = parent.args[1] and parent.args or frame.args

local codes = args[1] and mw.text.trim(args[1])
local word1 = ifNotEmpty(args[2])
local word2 = ifNotEmpty(args[3])

if not args[2] or '' == args[2] then
return '[text?] Parameter 2 is required';
end

local languageCode, scriptCode, errorText = getCodes(codes)

local italics = args.italics or args.i or args.italic
italics = not (italics == "n" or italics == "-" or italics == "no")

local entry, linkText
if word2 and word1 then
entry = makeEntryName(word1, languageCode)
linkText = word2
elseif word1 then
entry = makeEntryName(word1, languageCode)
linkText = word1
end

local italicize = italics and (scriptCode == "Latn" or require("Module:Unicode data").is_Latin(linkText))

local out
if languageCode and entry and linkText then
out = tag(linkToWiktionary(entry, linkText, languageCode), languageCode, scriptCode, italicize)
elseif entry and linkText then
out = linkToWiktionary(entry, linkText)
else
out = '[text?]'
end

if out and errorText then
return out .. errorText
else
return errorText or error("The function wiktlang generated nothing")
end
end

return p