پرش به محتوا

پودمان:utilities: تفاوت میان نسخه‌ها

از ویکی‌واژه
محتوای حذف‌شده محتوای افزوده‌شده
HAKHSIN (بحث | مشارکت‌ها)
جز ویرایش 37.255.152.38 (بحث) به آخرین تغییری که Beginneruser انجام داده بود واگردانده شد
برچسب: واگردانی
بدون خلاصۀ ویرایش
برچسب: برگردانده‌شده
خط ۱: خط ۱:
local mw = mw
local mw_text = mw.text
local package = package
local table = table

local require = require
local concat = table.concat
local decode_entities = require("Module:string utilities").decode_entities
local get_current_frame = mw.getCurrentFrame
local insert = table.insert
local ipairs = ipairs
local maxn = table.maxn
local tonumber = tonumber
local trim = mw_text.trim
local type = type
local unstrip = mw_text.unstrip
local unstripNoWiki = mw_text.unstripNoWiki

local export = {}
local export = {}


do
local data = mw.loadData("Module:utilities/data")
local notneeded = data.notneeded
local loaded = package.loaded
local loader = package.loaders[2]
local neededhassubpage = data.neededhassubpage


--[==[
-- A helper function to escape magic characters in a string
Like require, but return false if a module does not exist instead of throwing an error.
-- Magic characters: ^$()%.[]*+-?
Outputs are cached in {package.loaded}, which is faster for all module types, but much faster for nonexistent modules since require will attempt to use the full loader each time (since they don't get cached in {package.loaded}).
function export.pattern_escape(text)
Note: although nonexistent modules are cached as {false} in {package.loaded}, they still won't work with conventional require, since it uses a falsy check instead of checking the return value is not {nil}.
if type(text) == "table" then
]==]
text = text.args[1]
function export.safe_require(modname)
local module = loaded[modname]
if module ~= nil then
return module
end
-- The loader returns a function if the module exists, or nil if it doesn't, and checking this is faster than using pcall with require. If found, we still use require instead of loading and caching directly, because require contains safety checks against infinite loading loops (and we do want those to throw an error).
module = loader(modname)
if module then
return require(modname)
end
loaded[modname] = false
return false
end
end
text = mw.ustring.gsub(text, "([%^$()%%.%[%]*+%-?])", "%%%1")
return text
end
end


--[==[
function export.plain_gsub(text, pattern, replacement)
Convert decimal to hexadecimal.
local invoked = false

Note: About three times as fast as the hex library.
if type(text) == "table" then
]==]
invoked = true
function export.dec_to_hex(dec)
dec = tonumber(dec)
if text.args then
if not dec or dec % 1 ~= 0 then
local frame = text
error("Input should be a decimal integer.")
end
local params = {
return ("%x"):format(dec):upper()
[1] = {},
end
[2] = {},

[3] = { allow_empty = true },
do
}
local function check_level(lvl)
if type(lvl) ~= "number" then
local args = require("Module:parameters").process(frame.args, params)
error("Heading levels must be numbers.")
elseif lvl < 1 or lvl > 6 or lvl % 1 ~= 0 then
text = args[1]
error("Heading levels must be integers between 1 and 6.")
pattern = args[2]
replacement = args[3]
else
error("If the first argument to plain_gsub is a table, it should be a frame object.")
end
end
return lvl
else
end
if not ( type(pattern) == "string" or type(pattern) == "number" ) then

error("The second argument to plain_gsub should be a string or a number.")
--[==[
A helper function which iterates over the headings in `text`, which should be the content of a page or (main) section.

Each iteration returns three values: `sec` (the section title), `lvl` (the section level) and `loc` (the index of the section in the given text, from the first equals sign). The section title will be automatically trimmed, and any HTML entities will be resolved.
The optional parameter `a` (which should be an integer between 1 and 6) can be used to ensure that only headings of the specified level are iterated over. If `b` is also given, then they are treated as a range.
The optional parameters `a` and `b` can be used to specify a range, so that only headings with levels in that range are returned. If only `a` is given ...
]==]
function export.find_headings(text, a, b)
a = a and check_level(a) or nil
b = b and check_level(b) or a or nil
local start, loc, lvl, sec = 1

return function()
repeat
loc, lvl, sec, start = text:match("()%f[^%z\n](==?=?=?=?=?)([^\n]+)%2[\t ]*%f[%z\n]()", start)
lvl = lvl and #lvl
until not (sec and a) or (lvl >= a and lvl <= b)
return sec and trim(decode_entities(sec)) or nil, lvl, loc
end
end

local function get_section(content, name, level)
if not (content and name) then
return nil
elseif name:find("\n", 1, true) then
error("Heading name cannot contain a newline.")
end
end
level = level and check_level(level) or nil
name = trim(decode_entities(name))
if not ( type(replacement) == "string" or type(replacement) == "number" ) then
local start
error("The third argument to plain_gsub should be a string or a number.")
for sec, lvl, loc in export.find_headings(content, level and 1 or nil, level) do
if start and lvl <= level then
return content:sub(start, loc - 1)
elseif not start and (not level or lvl == level) and sec == name then
start, level = loc, lvl
end
end
end
return start and content:sub(start)
end
end

--[==[
pattern = export.pattern_escape(pattern)
A helper function to return the content of a page section.

if invoked then
`content` is raw wikitext, `name` is the requested section, and `level` is an optional parameter that specifies
text = mw.ustring.gsub(text, pattern, replacement)
the required section heading level. If `level` is not supplied, then the first section called `name` is returned.
return text
`name` can either be a string or table of section names. If a table, each name represents a section that has the
else
next as a subsection. For example, { {"Spanish", "Noun"}} will return the first matching section called "Noun"
return mw.ustring.gsub(text, pattern, replacement)
under a section called "Spanish". These do not have to be at adjacent levels ("Noun" might be L4, while "Spanish"
is L2). If `level` is given, it refers to the last name in the table (i.e. the name of the section to be returned).

The returned section includes all of its subsections. If no matching section is found, return {nil}.
]==]
function export.get_section(content, names, level)
if type(names) == "string" then
return get_section(content, names, level)
end
local names_len = maxn(names)
if names_len > 6 then
error("Not possible specify more than 5 subsections: headings only go up to level 6.")
end
for i, name in ipairs(names) do
content = get_section(content, name, i == names_len and level or nil)
end
return content
end
end
end
end


--[[
--[==[
A function which returns the number of the page section which contains the current {#invoke}.
Format the categories with the appropriate sort key. CATEGORIES is a list of
]==]
categories.
function export.get_current_section()
-- LANG is an object encapsulating a language; if nil, the object for
local frame = get_current_frame()
language code 'und' (undetermined) will be used.
-- We determine the section via the heading strip marker count, since they're numbered sequentially, but the only way to do this is to generate a fake heading via frame:preprocess(). The native parser assigns each heading a unique marker, but frame:preprocess() will return copies of older markers if the heading is identical to one further up the page, so the fake heading has to be unique to the page. The best way to do this is to feed it a heading containing a nowiki marker (which we will need later), since those are always unique.
-- SORT_KEY is placed in the category invocation, and indicates how the
local nowiki_marker = frame:extensionTag("nowiki")
page will sort in the respective category. Normally this should be nil,
-- Note: heading strip markers have a different syntax to the ones used for tags.
and a default sort key based on the subpage name (the part after the
local h = tonumber(frame:preprocess("=" .. nowiki_marker .. "=")
colon) will be used.
:match("\127'\"`UNIQ%-%-h%-(%d+)%-%-QINU`\"'\127"))
-- SORT_BASE lets you override the default sort key used when SORT_KEY is
-- For some reason, [[Special:ExpandTemplates]] doesn't generate a heading strip marker, so if that happens we simply abort early.
nil. Normally, this should be nil, and a language-specific default sort
if not h then
key is computed from the subpage name (e.g. for Russian this converts
return 0
Cyrillic ё to a string consisting of Cyrillic е followed by U+10FFFF,
so that effectively ё sorts after е instead of the default Wikimedia
sort, which (I think) is based on Unicode sort order and puts ё after я,
the last letter of the Cyrillic alphabet.
-- FORCE_OUTPUT forces normal output in all namespaces. Normally, nothing
is output if the page isn't in the main, Appendix:, Reconstruction: or
Citations: namespaces.
]]
export.format_categories = require("Module:utilities/format_categories")

-- Used by {{categorize}}
function export.template_categorize(frame)
local NAMESPACE = mw.title.getCurrentTitle().nsText
local format = frame.args["format"]
local args = frame:getParent().args
local langcode = args[1]; if langcode == "" then langcode = nil end
local sort_key = args["sort"]; if sort_key == "" then sort_key = nil end
local categories = {}
if not langcode then
if NAMESPACE == "الگو" then return "" end
error("Language code has not been specified. Please pass parameter 1 to the template.")
end
end
-- The only way to get the section number is to increment the heading count, so we store the offset in nowiki strip markers which can be retrieved by procedurally unstripping nowiki markers, counting backwards until we find a match.
local n, offset = tonumber(nowiki_marker:match("\127'\"`UNIQ%-%-nowiki%-([%dA-F]+)%-QINU`\"'\127"), 16)
local lang = require("Module:languages").getByCode(langcode)
while not offset and n > 0 do
n = n - 1
if not lang then
offset = unstripNoWiki(("\127'\"`UNIQ--nowiki-%08X-QINU`\"'\127"):format(n))
if NAMESPACE == "الگو" then return "" end
:match("^HEADING\1(%d+)") -- Prefix "HEADING\1" prevents collisions.
error("The language code \"" .. langcode .. "\" is not valid.")
end
end
offset = offset and (offset + 1) or 0
frame:extensionTag("nowiki", "HEADING\1" .. offset)
local prefix = ""
return h - offset
end
if format == "pos" then

prefix = lang:getCanonicalName() .. " "
do
elseif format == "topic" then
local L2_sections
prefix = lang:getCode() .. ":"
--[==[
end
A function which returns the name of the L2 language section which contains the current {#invoke}.
]==]
local i = 2
function export.get_current_L2()
local cat = args[i]
local section = export.get_current_section()
if section == 0 then
while cat do
return
if cat ~= "" then
end
table.insert(categories, prefix .. cat)
L2_sections = L2_sections or mw.loadData("Module:headword/data").page.L2_sections
while section > 0 do
local L2 = L2_sections[section]
if L2 then
return L2
end
section = section - 1
end
end
i = i + 1
cat = args[i]
end
end
return export.format_categories(categories, lang, sort_key)
end
end


--[==[
function export.catfix(lang, sc)
A helper function to strip wiki markup, giving the plaintext of what is displayed on the page.
if not lang then
]==]
require("Module:debug").track("catfix/no lang")
function export.get_plaintext(text)
return nil
text = text
elseif type(lang) ~= "table" then
:gsub("%[%[", "\1")
require("Module:debug").track("catfix/lang not table")
:gsub("%]%]", "\2")
return nil

-- Remove strip markers and HTML tags.
text = unstrip(text):gsub("<[^<>\1\2]+>", "")

-- Parse internal links for the display text, and remove categories.
text = require("Module:links").remove_links(text)

-- Remove files.
for _, falsePositive in ipairs({"File", "Image"}) do
text = text:gsub("\1" .. falsePositive .. ":[^\1\2]+\2", "")
end
end

local canonicalName = lang:getCanonicalName() or error('The first argument to the function "catfix" should be a language object from Module:languages.')
-- Parse external links for the display text.
text = text:gsub("%[(https?://[^%[%]]+)%]",
if sc and not sc.getCode then
function(capture)
error('The second argument to the function "catfix" should be a script object from Module:scripts.')
return capture:match("https?://[^%s%]]+%s([^%]]+)") or ""
end)
-- Any remaining square brackets aren't involved in links, but must be escaped to avoid creating new links.
:gsub("\1", "&#91;&#91;")
:gsub("\2", "&#93;&#93;")
:gsub("%[", "&#91;")
:gsub("]", "&#93;")
-- Strip bold, italics and soft hyphens.
:gsub("('*)'''(.-'*)'''", "%1%2")
:gsub("('*)''(.-'*)''", "%1%2")
:gsub("­", "")

-- Get any HTML entities.
-- Note: don't decode URL percent encoding, as it shouldn't be used in display text and may cause problems if % is used.
text = decode_entities(text)

return trim(text)
end

do
local title_obj, category_namespaces, page_data, pagename, pagename_defaultsort
--[==[
Format the categories with the appropriate sort key.
* `categories` is a list of categories. Each entry in the list can be either a string (the full category, minus
the {"Category:"} prefix) or an object. In the latter case, the object should have fields
** `cat`: the full category, minus the {"Category:"} prefix (required);
** `lang`: optional language object to override the overall `lang`;
** `sort_key`: optional sort key to override the overall `sort_key`;
** `sort_base`: optional sort base to override the overall `sort_base`;
** `sc`: optional script object to override the overall `sc`.
* `lang` is an object encapsulating a language; if {nil}, the object for language code {"und"} (undetermined) will
be used. `lang` is used when computing the sort key (either from the subpage name or sort base).
* `sort_key` is placed in the category invocation, and indicates how the page will sort in the respective category.
Normally '''do not use this'''. Instead, leave it {nil}, and if you need to a control the sort order, use
{sort_base}, so that language-specific normalization is applied on top of the specified sort base. If neither
{sort_key} nor {sort_base} is specified, the default is to apply language-specific normalization to the subpage
name; see below.
* `sort_base` lets you override the default sort key while still maintaining appropriate language-specific
normalization. If {nil} is specified, this defaults to the subpage name, which is the portion of the full pagename
after subtracting the namespace prefix (and, in certain namespaces such as {User:}, but notably not in the
mainspace, after subtracting anything up through the final slash). The actual sort key is derived from the sort
base approximately by lowercasing, applying language-specific normalization and then uppercasing; note that the
same process is applied in deriving the sort key when no sort base is specified. For example, for French, Spanish,
etc. the normalization process maps accented letters to their unaccented equivalents, so that e.g. in French,
{{m|fr|ça}} sorts after {{m|fr|ca}} (instead of after the default Wikimedia sort order, which is approximately
based on Unicode sort order and places ç after z) and {{m|fr|côté}} sorts after {{m|fr|coté}} (instead of between
c and d). Similarly, in Russian the normalization process converts Cyrillic ё to a string consisting of Cyrillic е
followed by U+10FFFF, so that effectively ё sorts after е instead of the default Wikimedia sort, which (I think)
puts ё after я, the last letter of the Cyrillic alphabet.
* `force_output` forces normal output in all namespaces. Normally, nothing is output if the page isn't in the main,
Appendix:, Thesaurus:, Reconstruction: or Citations: namespaces.
* `sc` is a script object; if nil, the default will be derived from the sort base (or its default value, the
subpage name) by calling {lang:findBestScript()}. The value of `sc` is used during the sort base normalization
process; for example, languages with multiple scripts will often have script-specific normalization processes.
]==]
function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc)
if type(lang) == "table" and not lang.getCode then
error("The second argument to format_categories should be a language object.")
end

title_obj = title_obj or mw.title.getCurrentTitle()
category_namespaces = category_namespaces or mw.loadData("Module:utilities/data").category_namespaces

if not (
force_output or
category_namespaces[title_obj.namespace] or
title_obj.prefixedText == "Wiktionary:Sandbox"
) then
return ""
elseif not page_data then
page_data = mw.loadData("Module:headword/data").page
pagename = page_data.encoded_pagename
pagename_defaultsort = page_data.pagename_defaultsort
end

local extra_categories
local function generate_sort_key(lang, sort_key, sort_base, sc)
-- Generate a default sort key.
-- If the sort key is "-", bypass the process of generating a sort key altogether. This is desirable when categorising (e.g.) translation requests, as the pages to be categorised are always in English/Translingual.
if sort_key == "-" then
sort_key = sort_base and sort_base:uupper() or pagename_defaultsort
else
lang = lang or require("Module:languages").getByCode("und")
sort_base = lang:makeSortKey(sort_base or pagename, sc) or pagename_defaultsort
if not sort_key or sort_key == "" then
sort_key = sort_base
elseif lang:getCode() ~= "und" then
if not extra_categories then
extra_categories = {}
end
insert(extra_categories, lang:getFullName() .. " terms with " .. (
sort_key:uupper() == sort_base and "redundant" or
"non-redundant non-automated"
) .. " sortkeys")
end
end
if not sort_key or sort_key == "" then
sort_key = pagename_defaultsort
end
return sort_key
end

local ret = {}
local default_sort_key = generate_sort_key(lang, sort_key, sort_base, sc)
local ins_point = 0
local function process_category(cat)
local this_sort_key
if type(cat) == "string" then
this_sort_key = default_sort_key
else
this_sort_key = generate_sort_key(cat.lang or lang, cat.sort_key or sort_key,
cat.sort_base or sort_base, cat.sc or sc)
cat = cat.cat
end
ins_point = ins_point + 1
ret[ins_point] = "[[Category:" .. cat .. "|" .. this_sort_key .. "]]"
end

for _, cat in ipairs(categories) do
process_category(cat)
end
if extra_categories then
for _, cat in ipairs(extra_categories) do
process_category(cat)
end
end

return concat(ret)
end
end
end

-- To add script classes to links on pages created by category boilerplate templates.
do
if not sc then
sc = data.catfix_scripts[lang:getCode()]
local catfix_scripts

if sc then
--[==[
sc = require("Module:scripts").getByCode(sc)
Add a "catfix", which is used on language-specific category pages to add language attributes and often script
classes to all entry names. The addition of language attributes and script classes makes the entry names display
better (using the language- or script-specific styles specified in [[MediaWiki:Common.css]]), which is particularly
important for non-English languages that do not have consistent font support in browsers.

Language attributes are added for all languages, but script classes are only added for languages with one script
listed in their data file, or for languages that have a default script listed in the {catfix_script} list in
[[Module:utilities/data]]. Some languages clearly have a default script, but still have other scripts listed in
their data file and therefore need their default script to be specified. Others do not have a default script.

* Serbo-Croatian is regularly written in both the Latin and Cyrillic scripts. Because it uses two scripts,
Serbo-Croatian cannot have a script class applied to entries in its category pages, as only one script class
can be specified at a time.
* Russian is usually written in the Cyrillic script ({{cd|Cyrl}}), but Braille ({{cd|Brai}}) is also listed in
its data file. So Russian needs an entry in the {catfix_script} list, so that the {{cd|Cyrl}} (Cyrillic) script
class will be applied to entries in its category pages.

To find the scripts listed for a language, go to [[Module:languages]] and use the search box to find the data file
for the language. To find out what a script code means, search the script code in [[Module:scripts/data]].
]==]
function export.catfix(lang, sc)
if not lang or not lang.getCanonicalName then
error('The first argument to the function "catfix" should be a language object from [[Module:languages]] or [[Module:etymology languages]].')
end
if sc and not sc.getCode then
error('The second argument to the function "catfix" should be a script object from [[Module:scripts]].')
end
local canonicalName = lang:getCanonicalName()
local nonEtymologicalName = lang:getFullName()

-- To add script classes to links on pages created by category boilerplate templates.
if not sc then
catfix_scripts = catfix_scripts or mw.loadData("Module:utilities/data").catfix_scripts
sc = catfix_scripts[lang:getCode()] or catfix_scripts[lang:getFullCode()]
if sc then
sc = require("Module:scripts").getByCode(sc)
end
end

local catfix_class = "CATFIX-" .. mw.uri.anchorEncode(canonicalName)
if nonEtymologicalName ~= canonicalName then
catfix_class = catfix_class .. " CATFIX-" .. mw.uri.anchorEncode(nonEtymologicalName)
end
end
return "<span id=\"catfix\" style=\"display:none;\" class=\"" .. catfix_class .. "\">" ..
require("Module:script utilities").tag_text("&nbsp;", lang, sc, nil) ..
"</span>"
end
end
return "<span id=\"catfix\" style=\"display:none;\" class=\"CATFIX-" .. mw.uri.anchorEncode(canonicalName) .. "\">" ..
require("Module:script utilities").tag_text("&nbsp;", lang, sc, nil) ..
"</span>"
end
end


--[==[
Implementation of the {{tl|catfix}} template.
]==]
function export.catfix_template(frame)
function export.catfix_template(frame)
local params = {
local params = {
[1] = {},
[1] = { type = "language", required = true },
[2] = { alias_of = "sc" },
[2] = { alias_of = "sc" },
["sc"] = {},
["sc"] = { type = "script" },
}
}

local args = require("Module:parameters").process(frame:getParent().args, params)
local args = require("Module:parameters").process(frame:getParent().args, params)
local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1)
local sc = args.sc
if sc then
sc = require("Module:scripts").getByCode(sc) or error('The script code "' .. sc .. '", provided in the second parameter, is not valid.')
end
return export.catfix(lang, sc)
end


return export.catfix(args[1], args.sc)
-- Not exporting because it is not used yet.
local function getDateTense(frame)
local name_num_mapping = {["January"] = 1, ["February"] = 2, ["March"] = 3, ["April"] = 4, ["May"] = 5, ["June"] = 6,
["July"] = 7, ["August"] = 8, ["September"] = 9, ["October"] = 10, ["November"] = 11, ["December"] = 12,
[1] = 1, [2] = 2, [3] = 3, [4] = 4, [5] = 5, [6] = 6, [7] = 7, [8] = 8, [9] = 9, [10] = 10, [11] = 11, [12] = 12}
local month = name_num_mapping[frame.args[2]]
local date = os.time({year = frame.args[1], day = frame.args[3], month = month})
local today = os.time() -- 12 AM/PM
local diff = os.difftime(date, today)
local daylength = 24 * 3600
if diff < -daylength / 2 then return "past"
else
if diff > daylength / 2 then return "future"
else return "present" end
end
end
end


--[==[
function export.make_id(lang, str)
Given a type (as a string) and an arbitrary number of entities, checks whether all of those entities are language,
--[[ If called with invoke, first argument is a frame object.
family, script, writing system or Wikimedia language objects. Useful for error handling in functions that require
If called by a module, first argument is a language object. ]]
one of these kinds of object.
local invoked = false

If `noErr` is set, the function returns false instead of throwing an error, which allows customised error handling to
if type(lang) == "table" then
be done in the calling function.
if lang.args then
]==]
invoked = true
function export.check_object(typ, noErr, ...)
local frame = lang
local function fail(message)
if noErr then
return false
local params = {
else
[1] = {},
error(message, 3)
[2] = {},
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local langCode = args[1]
str = args[2]
local m_languages = require("Module:languages")
lang = m_languages.getByCode(langCode) or m_languages.err(langCode, 1)
elseif not lang.getCanonicalName then
error("The first argument to make_id should be a language object.")
end
end
end
end


local objs = {...}
if not ( type(str) == "string" or type(str) == "number" ) then
if #objs == 0 then
error("The second argument to make_id should be a string or a number.")
return fail("Must provide at least one object to check.")
end
end
for _, obj in ipairs(objs) do
if type(obj) ~= "table" or type(obj.hasType) ~= "function" then
local id = require("Module:senseid").anchor(lang, str)
return fail("Function expected a " .. typ .. " object, but received a " .. type(obj) .. " instead.")
elseif not (typ == "object" or obj:hasType(typ)) then
if invoked then
for _, wrong_type in ipairs{"family", "language", "script", "Wikimedia language", "writing system"} do
return '<li class="senseid" id="' .. id .. '">'
if obj:hasType(wrong_type) then
else
return fail("Function expected a " .. typ .. " object, but received a " .. wrong_type .. " object instead.")
return id
end
end
return fail("Function expected a " .. typ .. " object, but received another type of object instead.")
end
end
end
return true
end
end



نسخهٔ ۳ اوت ۲۰۲۴، ساعت ۱۹:۰۱

This module exports various general utility functions, which can be used by other modules.

Detailed documentation

export.safe_require

function export.safe_require(modname)

Like require, but return false if a module does not exist instead of throwing an error. Outputs are cached in package.loaded, which is faster for all module types, but much faster for nonexistent modules since require will attempt to use the full loader each time (since they don't get cached in package.loaded). Note: although nonexistent modules are cached as false in package.loaded, they still won't work with conventional require, since it uses a falsy check instead of checking the return value is not nil.

export.dec_to_hex

function export.dec_to_hex(dec)

Convert decimal to hexadecimal.

Note: About three times as fast as the hex library.

export.find_headings

function export.find_headings(text, a, b)

A helper function which iterates over the headings in text, which should be the content of a page or (main) section.

Each iteration returns three values: sec (the section title), lvl (the section level) and loc (the index of the section in the given text, from the first equals sign). The section title will be automatically trimmed, and any HTML entities will be resolved. The optional parameter a (which should be an integer between 1 and 6) can be used to ensure that only headings of the specified level are iterated over. If b is also given, then they are treated as a range. The optional parameters a and b can be used to specify a range, so that only headings with levels in that range are returned. If only a is given ...

export.get_section

function export.get_section(content, names, level)

A helper function to return the content of a page section.

content is raw wikitext, name is the requested section, and level is an optional parameter that specifies the required section heading level. If level is not supplied, then the first section called name is returned. name can either be a string or table of section names. If a table, each name represents a section that has the next as a subsection. For example, {"Spanish", "Noun"} will return the first matching section called "Noun" under a section called "Spanish". These do not have to be at adjacent levels ("Noun" might be L4, while "Spanish" is L2). If level is given, it refers to the last name in the table (i.e. the name of the section to be returned).

The returned section includes all of its subsections. If no matching section is found, return nil.

export.get_current_section

function export.get_current_section()

A function which returns the number of the page section which contains the current #invoke.

export.get_current_L2

function export.get_current_L2()

A function which returns the name of the L2 language section which contains the current #invoke.

export.get_plaintext

function export.get_plaintext(text)

A helper function to strip wiki markup, giving the plaintext of what is displayed on the page.

export.format_categories

function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc)

Format the categories with the appropriate sort key.

  • categories is a list of categories. Each entry in the list can be either a string (the full category, minus the "Category:" prefix) or an object. In the latter case, the object should have fields
    • cat: the full category, minus the "Category:" prefix (required);
    • lang: optional language object to override the overall lang;
    • sort_key: optional sort key to override the overall sort_key;
    • sort_base: optional sort base to override the overall sort_base;
    • sc: optional script object to override the overall sc.
  • lang is an object encapsulating a language; if nil, the object for language code "und" (undetermined) will be used. lang is used when computing the sort key (either from the subpage name or sort base).
  • sort_key is placed in the category invocation, and indicates how the page will sort in the respective category. Normally do not use this. Instead, leave it nil, and if you need to a control the sort order, use sort_base, so that language-specific normalization is applied on top of the specified sort base. If neither sort_key nor sort_base is specified, the default is to apply language-specific normalization to the subpage name; see below.
  • sort_base lets you override the default sort key while still maintaining appropriate language-specific normalization. If nil is specified, this defaults to the subpage name, which is the portion of the full pagename after subtracting the namespace prefix (and, in certain namespaces such as User:, but notably not in the mainspace, after subtracting anything up through the final slash). The actual sort key is derived from the sort base approximately by lowercasing, applying language-specific normalization and then uppercasing; note that the same process is applied in deriving the sort key when no sort base is specified. For example, for French, Spanish, etc. the normalization process maps accented letters to their unaccented equivalents, so that e.g. in French, خطای لوآ در پودمان:languages/data/2 در خط 1789: attempt to concatenate field '?' (a nil value). sorts after خطای لوآ در پودمان:languages/data/2 در خط 1789: attempt to concatenate field '?' (a nil value). (instead of after the default Wikimedia sort order, which is approximately based on Unicode sort order and places ç after z) and خطای لوآ در پودمان:languages/data/2 در خط 1789: attempt to concatenate field '?' (a nil value). sorts after خطای لوآ در پودمان:languages/data/2 در خط 1789: attempt to concatenate field '?' (a nil value). (instead of between c and d). Similarly, in Russian the normalization process converts Cyrillic ё to a string consisting of Cyrillic е followed by U+10FFFF, so that effectively ё sorts after е instead of the default Wikimedia sort, which (I think) puts ё after я, the last letter of the Cyrillic alphabet.
  • force_output forces normal output in all namespaces. Normally, nothing is output if the page isn't in the main, Appendix:, Thesaurus:, Reconstruction: or Citations: namespaces.
  • sc is a script object; if nil, the default will be derived from the sort base (or its default value, the subpage name) by calling lang:findBestScript(). The value of sc is used during the sort base normalization process; for example, languages with multiple scripts will often have script-specific normalization processes.

export.catfix

function export.catfix(lang, sc)

Add a "catfix", which is used on language-specific category pages to add language attributes and often script classes to all entry names. The addition of language attributes and script classes makes the entry names display better (using the language- or script-specific styles specified in MediaWiki:Common.css), which is particularly important for non-English languages that do not have consistent font support in browsers.

Language attributes are added for all languages, but script classes are only added for languages with one script listed in their data file, or for languages that have a default script listed in the catfix_script list in Module:utilities/data. Some languages clearly have a default script, but still have other scripts listed in their data file and therefore need their default script to be specified. Others do not have a default script.

  • Serbo-Croatian is regularly written in both the Latin and Cyrillic scripts. Because it uses two scripts, Serbo-Croatian cannot have a script class applied to entries in its category pages, as only one script class can be specified at a time.
  • Russian is usually written in the Cyrillic script (الگو:cd), but Braille (الگو:cd) is also listed in its data file. So Russian needs an entry in the catfix_script list, so that the الگو:cd (Cyrillic) script class will be applied to entries in its category pages.

To find the scripts listed for a language, go to Module:languages and use the search box to find the data file for the language. To find out what a script code means, search the script code in Module:scripts/data.

export.catfix_template

function export.catfix_template(frame)

Implementation of the فيليپينی template.

export.check_object

function export.check_object(typ, noErr, ...)

Given a type (as a string) and an arbitrary number of entities, checks whether all of those entities are language, family, script, writing system or Wikimedia language objects. Useful for error handling in functions that require one of these kinds of object.

If noErr is set, the function returns false instead of throwing an error, which allows customised error handling to be done in the calling function.

خطای لوآ در پودمان:module_categorization در خط 173: This template should only be used in the Module namespace, not on page 'پودمان:utilities'..


local mw = mw
local mw_text = mw.text
local package = package
local table = table

local require = require
local concat = table.concat
local decode_entities = require("Module:string utilities").decode_entities
local get_current_frame = mw.getCurrentFrame
local insert = table.insert
local ipairs = ipairs
local maxn = table.maxn
local tonumber = tonumber
local trim = mw_text.trim
local type = type
local unstrip = mw_text.unstrip
local unstripNoWiki = mw_text.unstripNoWiki

local export = {}

do
	local loaded = package.loaded
	local loader = package.loaders[2]

	--[==[
	Like require, but return false if a module does not exist instead of throwing an error.
	Outputs are cached in {package.loaded}, which is faster for all module types, but much faster for nonexistent modules since require will attempt to use the full loader each time (since they don't get cached in {package.loaded}).
	Note: although nonexistent modules are cached as {false} in {package.loaded}, they still won't work with conventional require, since it uses a falsy check instead of checking the return value is not {nil}.
	]==]
	function export.safe_require(modname)
		local module = loaded[modname]
		if module ~= nil then
			return module
		end
		-- The loader returns a function if the module exists, or nil if it doesn't, and checking this is faster than using pcall with require. If found, we still use require instead of loading and caching directly, because require contains safety checks against infinite loading loops (and we do want those to throw an error).
		module = loader(modname)
		if module then
			return require(modname)
		end
		loaded[modname] = false
		return false
	end
end

--[==[
Convert decimal to hexadecimal.

Note: About three times as fast as the hex library.
]==]
function export.dec_to_hex(dec)
	dec = tonumber(dec)
	if not dec or dec % 1 ~= 0 then
		error("Input should be a decimal integer.")
	end
	return ("%x"):format(dec):upper()
end

do
	local function check_level(lvl)
		if type(lvl) ~= "number" then
			error("Heading levels must be numbers.")
		elseif lvl < 1 or lvl > 6 or lvl % 1 ~= 0 then
			error("Heading levels must be integers between 1 and 6.")
		end
		return lvl
	end

	--[==[
	A helper function which iterates over the headings in `text`, which should be the content of a page or (main) section.

	Each iteration returns three values: `sec` (the section title), `lvl` (the section level) and `loc` (the index of the section in the given text, from the first equals sign). The section title will be automatically trimmed, and any HTML entities will be resolved.
	The optional parameter `a` (which should be an integer between 1 and 6) can be used to ensure that only headings of the specified level are iterated over. If `b` is also given, then they are treated as a range.
	The optional parameters `a` and `b` can be used to specify a range, so that only headings with levels in that range are returned. If only `a` is given ...
	]==]
	function export.find_headings(text, a, b)
		a = a and check_level(a) or nil
		b = b and check_level(b) or a or nil
		local start, loc, lvl, sec = 1

		return function()
			repeat
				loc, lvl, sec, start = text:match("()%f[^%z\n](==?=?=?=?=?)([^\n]+)%2[\t ]*%f[%z\n]()", start)
				lvl = lvl and #lvl
			until not (sec and a) or (lvl >= a and lvl <= b)
			return sec and trim(decode_entities(sec)) or nil, lvl, loc
		end
	end

	local function get_section(content, name, level)
		if not (content and name) then
			return nil
		elseif name:find("\n", 1, true) then
			error("Heading name cannot contain a newline.")
		end
		level = level and check_level(level) or nil
		name = trim(decode_entities(name))
		local start
		for sec, lvl, loc in export.find_headings(content, level and 1 or nil, level) do
			if start and lvl <= level then
				return content:sub(start, loc - 1)
			elseif not start and (not level or lvl == level) and sec == name then
				start, level = loc, lvl
			end
		end
		return start and content:sub(start)
	end

	--[==[
	A helper function to return the content of a page section.

	`content` is raw wikitext, `name` is the requested section, and `level` is an optional parameter that specifies
	the required section heading level. If `level` is not supplied, then the first section called `name` is returned.
	`name` can either be a string or table of section names. If a table, each name represents a section that has the
	next as a subsection. For example, { {"Spanish", "Noun"}} will return the first matching section called "Noun"
	under a section called "Spanish". These do not have to be at adjacent levels ("Noun" might be L4, while "Spanish"
	is L2). If `level` is given, it refers to the last name in the table (i.e. the name of the section to be returned).

	The returned section includes all of its subsections. If no matching section is found, return {nil}.
	]==]
	function export.get_section(content, names, level)
		if type(names) == "string" then
			return get_section(content, names, level)
		end
		local names_len = maxn(names)
		if names_len > 6 then
			error("Not possible specify more than 5 subsections: headings only go up to level 6.")
		end
		for i, name in ipairs(names) do
			content = get_section(content, name, i == names_len and level or nil)
		end
		return content
	end
end

--[==[
A function which returns the number of the page section which contains the current {#invoke}.
]==]
function export.get_current_section()
	local frame = get_current_frame()
	-- We determine the section via the heading strip marker count, since they're numbered sequentially, but the only way to do this is to generate a fake heading via frame:preprocess(). The native parser assigns each heading a unique marker, but frame:preprocess() will return copies of older markers if the heading is identical to one further up the page, so the fake heading has to be unique to the page. The best way to do this is to feed it a heading containing a nowiki marker (which we will need later), since those are always unique.
	local nowiki_marker = frame:extensionTag("nowiki")
	-- Note: heading strip markers have a different syntax to the ones used for tags.
	local h = tonumber(frame:preprocess("=" .. nowiki_marker .. "=")
		:match("\127'\"`UNIQ%-%-h%-(%d+)%-%-QINU`\"'\127"))
	-- For some reason, [[Special:ExpandTemplates]] doesn't generate a heading strip marker, so if that happens we simply abort early.
	if not h then
		return 0
	end
	-- The only way to get the section number is to increment the heading count, so we store the offset in nowiki strip markers which can be retrieved by procedurally unstripping nowiki markers, counting backwards until we find a match.
	local n, offset = tonumber(nowiki_marker:match("\127'\"`UNIQ%-%-nowiki%-([%dA-F]+)%-QINU`\"'\127"), 16)
	while not offset and n > 0 do
		n = n - 1
		offset = unstripNoWiki(("\127'\"`UNIQ--nowiki-%08X-QINU`\"'\127"):format(n))
			:match("^HEADING\1(%d+)") -- Prefix "HEADING\1" prevents collisions.
	end
	offset = offset and (offset + 1) or 0
	frame:extensionTag("nowiki", "HEADING\1" .. offset)
	return h - offset
end

do
	local L2_sections
	--[==[
	A function which returns the name of the L2 language section which contains the current {#invoke}.
	]==]
	function export.get_current_L2()
		local section = export.get_current_section()
		if section == 0 then
			return
		end
		L2_sections = L2_sections or mw.loadData("Module:headword/data").page.L2_sections
		while section > 0 do
			local L2 = L2_sections[section]
			if L2 then
				return L2
			end
			section = section - 1
		end
	end
end

--[==[
A helper function to strip wiki markup, giving the plaintext of what is displayed on the page.
]==]
function export.get_plaintext(text)
	text = text
		:gsub("%[%[", "\1")
		:gsub("%]%]", "\2")

	-- Remove strip markers and HTML tags.
	text = unstrip(text):gsub("<[^<>\1\2]+>", "")

	-- Parse internal links for the display text, and remove categories.
	text = require("Module:links").remove_links(text)

	-- Remove files.
	for _, falsePositive in ipairs({"File", "Image"}) do
		text = text:gsub("\1" .. falsePositive .. ":[^\1\2]+\2", "")
	end

	-- Parse external links for the display text.
	text = text:gsub("%[(https?://[^%[%]]+)%]",
		function(capture)
			return capture:match("https?://[^%s%]]+%s([^%]]+)") or ""
		end)
		-- Any remaining square brackets aren't involved in links, but must be escaped to avoid creating new links.
		:gsub("\1", "&#91;&#91;")
		:gsub("\2", "&#93;&#93;")
		:gsub("%[", "&#91;")
		:gsub("]", "&#93;")
		-- Strip bold, italics and soft hyphens.
		:gsub("('*)'''(.-'*)'''", "%1%2")
		:gsub("('*)''(.-'*)''", "%1%2")
		:gsub("­", "")

	-- Get any HTML entities.
	-- Note: don't decode URL percent encoding, as it shouldn't be used in display text and may cause problems if % is used.
	text = decode_entities(text)

	return trim(text)
end

do
	local title_obj, category_namespaces, page_data, pagename, pagename_defaultsort
	--[==[
	Format the categories with the appropriate sort key.
	* `categories` is a list of categories. Each entry in the list can be either a string (the full category, minus
	  the {"Category:"} prefix) or an object. In the latter case, the object should have fields
	  ** `cat`: the full category, minus the {"Category:"} prefix (required);
	  ** `lang`: optional language object to override the overall `lang`;
	  ** `sort_key`: optional sort key to override the overall `sort_key`;
	  ** `sort_base`: optional sort base to override the overall `sort_base`;
	  ** `sc`: optional script object to override the overall `sc`.
	* `lang` is an object encapsulating a language; if {nil}, the object for language code {"und"} (undetermined) will
	  be used. `lang` is used when computing the sort key (either from the subpage name or sort base).
	* `sort_key` is placed in the category invocation, and indicates how the page will sort in the respective category.
	  Normally '''do not use this'''. Instead, leave it {nil}, and if you need to a control the sort order, use
	  {sort_base}, so that language-specific normalization is applied on top of the specified sort base. If neither
	  {sort_key} nor {sort_base} is specified, the default is to apply language-specific normalization to the subpage
	  name; see below.
	* `sort_base` lets you override the default sort key while still maintaining appropriate language-specific
	  normalization. If {nil} is specified, this defaults to the subpage name, which is the portion of the full pagename
	  after subtracting the namespace prefix (and, in certain namespaces such as {User:}, but notably not in the
	  mainspace, after subtracting anything up through the final slash). The actual sort key is derived from the sort
	  base approximately by lowercasing, applying language-specific normalization and then uppercasing; note that the
	  same process is applied in deriving the sort key when no sort base is specified. For example, for French, Spanish,
	  etc. the normalization process maps accented letters to their unaccented equivalents, so that e.g. in French,
	  {{m|fr|ça}} sorts after {{m|fr|ca}} (instead of after the default Wikimedia sort order, which is approximately
	  based on Unicode sort order and places ç after z) and {{m|fr|côté}} sorts after {{m|fr|coté}} (instead of between
	  c and d). Similarly, in Russian the normalization process converts Cyrillic ё to a string consisting of Cyrillic е
	  followed by U+10FFFF, so that effectively ё sorts after е instead of the default Wikimedia sort, which (I think)
	  puts ё after я, the last letter of the Cyrillic alphabet.
	* `force_output` forces normal output in all namespaces. Normally, nothing is output if the page isn't in the main,
	  Appendix:, Thesaurus:, Reconstruction: or Citations: namespaces.
	* `sc` is a script object; if nil, the default will be derived from the sort base (or its default value, the
	  subpage name) by calling {lang:findBestScript()}. The value of `sc` is used during the sort base normalization
	  process; for example, languages with multiple scripts will often have script-specific normalization processes.
	]==]
	function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc)
		if type(lang) == "table" and not lang.getCode then
			error("The second argument to format_categories should be a language object.")
		end

		title_obj = title_obj or mw.title.getCurrentTitle()
		category_namespaces = category_namespaces or mw.loadData("Module:utilities/data").category_namespaces

		if not (
			force_output or
			category_namespaces[title_obj.namespace] or
			title_obj.prefixedText == "Wiktionary:Sandbox"
		) then
			return ""
		elseif not page_data then
			page_data = mw.loadData("Module:headword/data").page
			pagename = page_data.encoded_pagename
			pagename_defaultsort = page_data.pagename_defaultsort
		end

		local extra_categories
		local function generate_sort_key(lang, sort_key, sort_base, sc)
			-- Generate a default sort key.
			-- If the sort key is "-", bypass the process of generating a sort key altogether. This is desirable when categorising (e.g.) translation requests, as the pages to be categorised are always in English/Translingual.
			if sort_key == "-" then
				sort_key = sort_base and sort_base:uupper() or pagename_defaultsort
			else
				lang = lang or require("Module:languages").getByCode("und")
				sort_base = lang:makeSortKey(sort_base or pagename, sc) or pagename_defaultsort
				if not sort_key or sort_key == "" then
					sort_key = sort_base
				elseif lang:getCode() ~= "und" then
					if not extra_categories then
						extra_categories = {}
					end
					insert(extra_categories, lang:getFullName() .. " terms with " .. (
						sort_key:uupper() == sort_base and "redundant" or
						"non-redundant non-automated"
					) .. " sortkeys")
				end
			end
			if not sort_key or sort_key == "" then
				sort_key = pagename_defaultsort
			end
			return sort_key
		end

		local ret = {}
		local default_sort_key = generate_sort_key(lang, sort_key, sort_base, sc)
		local ins_point = 0
		local function process_category(cat)
			local this_sort_key
			if type(cat) == "string" then
				this_sort_key = default_sort_key
			else
				this_sort_key = generate_sort_key(cat.lang or lang, cat.sort_key or sort_key,
					cat.sort_base or sort_base, cat.sc or sc)
				cat = cat.cat
			end
			ins_point = ins_point + 1
			ret[ins_point] = "[[Category:" .. cat .. "|" .. this_sort_key .. "]]"
		end

		for _, cat in ipairs(categories) do
			process_category(cat)
		end
		if extra_categories then
			for _, cat in ipairs(extra_categories) do
				process_category(cat)
			end
		end

		return concat(ret)
	end
end

do
	local catfix_scripts

	--[==[
	Add a "catfix", which is used on language-specific category pages to add language attributes and often script
	classes to all entry names. The addition of language attributes and script classes makes the entry names display
	better (using the language- or script-specific styles specified in [[MediaWiki:Common.css]]), which is particularly
	important for non-English languages that do not have consistent font support in browsers.

	Language attributes are added for all languages, but script classes are only added for languages with one script
	listed in their data file, or for languages that have a default script listed in the {catfix_script} list in
	[[Module:utilities/data]]. Some languages clearly have a default script, but still have other scripts listed in
	their data file and therefore need their default script to be specified. Others do not have a default script.

	* Serbo-Croatian is regularly written in both the Latin and Cyrillic scripts. Because it uses two scripts,
	  Serbo-Croatian cannot have a script class applied to entries in its category pages, as only one script class
	  can be specified at a time.
	* Russian is usually written in the Cyrillic script ({{cd|Cyrl}}), but Braille ({{cd|Brai}}) is also listed in
	  its data file. So Russian needs an entry in the {catfix_script} list, so that the {{cd|Cyrl}} (Cyrillic) script
	  class will be applied to entries in its category pages.

	To find the scripts listed for a language, go to [[Module:languages]] and use the search box to find the data file
	for the language. To find out what a script code means, search the script code in [[Module:scripts/data]].
	]==]
	function export.catfix(lang, sc)
		if not lang or not lang.getCanonicalName then
			error('The first argument to the function "catfix" should be a language object from [[Module:languages]] or [[Module:etymology languages]].')
		end
		if sc and not sc.getCode then
			error('The second argument to the function "catfix" should be a script object from [[Module:scripts]].')
		end
		local canonicalName = lang:getCanonicalName()
		local nonEtymologicalName = lang:getFullName()

		-- To add script classes to links on pages created by category boilerplate templates.
		if not sc then
			catfix_scripts = catfix_scripts or mw.loadData("Module:utilities/data").catfix_scripts
			sc = catfix_scripts[lang:getCode()] or catfix_scripts[lang:getFullCode()]
			if sc then
				sc = require("Module:scripts").getByCode(sc)
			end
		end

		local catfix_class = "CATFIX-" .. mw.uri.anchorEncode(canonicalName)
		if nonEtymologicalName ~= canonicalName then
			catfix_class = catfix_class .. " CATFIX-" .. mw.uri.anchorEncode(nonEtymologicalName)
		end
		return "<span id=\"catfix\" style=\"display:none;\" class=\"" .. catfix_class .. "\">" ..
			require("Module:script utilities").tag_text("&nbsp;", lang, sc, nil) ..
			"</span>"
	end
end

--[==[
Implementation of the {{tl|catfix}} template.
]==]
function export.catfix_template(frame)
	local params = {
		[1] = { type = "language", required = true },
		[2] = { alias_of = "sc" },
		["sc"] = { type = "script" },
	}

	local args = require("Module:parameters").process(frame:getParent().args, params)

	return export.catfix(args[1], args.sc)
end

--[==[
Given a type (as a string) and an arbitrary number of entities, checks whether all of those entities are language,
family, script, writing system or Wikimedia language objects. Useful for error handling in functions that require
one of these kinds of object.

If `noErr` is set, the function returns false instead of throwing an error, which allows customised error handling to
be done in the calling function.
]==]
function export.check_object(typ, noErr, ...)
	local function fail(message)
		if noErr then
			return false
		else
			error(message, 3)
		end
	end

	local objs = {...}
	if #objs == 0 then
		return fail("Must provide at least one object to check.")
	end
	for _, obj in ipairs(objs) do
		if type(obj) ~= "table" or type(obj.hasType) ~= "function" then
			return fail("Function expected a " .. typ .. " object, but received a " .. type(obj) .. " instead.")
		elseif not (typ == "object" or obj:hasType(typ)) then
			for _, wrong_type in ipairs{"family", "language", "script", "Wikimedia language", "writing system"} do
				if obj:hasType(wrong_type) then
					return fail("Function expected a " .. typ .. " object, but received a " .. wrong_type .. " object instead.")
				end
			end
			return fail("Function expected a " .. typ .. " object, but received another type of object instead.")
		end
	end
	return true
end

return export