Diff: Module:Detect singular
Comparing revision #1 (2023-01-02 22:05:13) with revision #2 (2023-02-02 05:35:15).
| Old | New |
|---|---|
local p = {} |
local p = {} |
local getArgs = require('Module:Arguments').getArgs |
local getArgs = require('Module:Arguments').getArgs |
local yesNo = require('Module:Yesno') |
local yesNo = require('Module:Yesno') |
local getPlain = require('Module:Text').Text().getPlain |
local getPlain = require('Module:Text').Text().getPlain |
-- function to determine whether "sub" occurs in "s" |
-- function to determine whether "sub" occurs in "s" |
local function plainFind(s, sub) |
local function plainFind(s, sub) |
return mw.ustring.find(s, sub, 1, true) |
return mw.ustring.find(s, sub, 1, true) |
end |
end |
-- function to count the number of times "pattern" (a regex) occurs in "s" |
-- function to count the number of times "pattern" (a regex) occurs in "s" |
local function countMatches(s, pattern) |
local function countMatches(s, pattern) |
local _, count = mw.ustring.gsub(s, pattern, '') |
local _, count = mw.ustring.gsub(s, pattern, '') |
return count |
return count |
end |
end |
local singular = 1 |
local singular = 1 |
local likelyPlural = 2 |
local likelyPlural = 2 |
local plural = 3 |
local plural = 3 |
-- Determine whether a string is singular or plural (i.e., it represents one |
-- Determine whether a string is singular or plural (i.e., it represents one |
-- item or many) |
-- item or many) |
-- Arguments: |
-- Arguments: |
-- origArgs[1]: string to process |
-- origArgs[1]: string to process |
-- origArgs.no_comma: if false, use commas to detect plural (default false) |
-- origArgs.no_comma: if false, use commas to detect plural (default false) |
-- origArgs.parse_links: if false, treat wikilinks as opaque singular objects (default false) |
-- origArgs.parse_links: if false, treat wikilinks as opaque singular objects (default false) |
-- Returns: |
-- Returns: |
-- singular, likelyPlural, or plural (see constants above), or nil for completely unknown |
-- singular, likelyPlural, or plural (see constants above), or nil for completely unknown |
function p._main(origArgs) |
function p._main(origArgs) |
origArgs = type(origArgs) == 'table' and origArgs or {} |
origArgs = type(origArgs) == 'table' and origArgs or {} |
local args = {} |
local args = {} |
-- canonicalize boolean arguments |
-- canonicalize boolean arguments |
for key, default in pairs({no_comma=false,parse_links=false,any_comma=false,no_and=false}) do |
for key, default in pairs({no_comma=false,parse_links=false,any_comma=false,no_and=false}) do |
if origArgs[key] == nil then |
if origArgs[key] == nil then |
args[key] = default |
args[key] = default |
else |
else |
args[key] = yesNo(origArgs[key],default) |
args[key] = yesNo(origArgs[key],default) |
end |
end |
end |
end |
local checkComma = not args.no_comma |
local checkComma = not args.no_comma |
local checkAnd = not args.no_and |
local checkAnd = not args.no_and |
local rewriteLinks = not args.parse_links |
local rewriteLinks = not args.parse_links |
local anyComma = args.any_comma |
local anyComma = args.any_comma |
local s = origArgs[1] -- the input string |
local s = origArgs[1] -- the input string |
if not s then |
if not s then |
return nil -- empty input returns nil |
return nil -- empty input returns nil |
end |
end |
s = tostring(s) |
s = tostring(s) |
s = mw.text.decode(s,true) --- replace HTML entities (to avoid spurious semicolons) |
s = mw.text.decode(s,true) --- replace HTML entities (to avoid spurious semicolons) |
if plainFind(s,'data-plural="0"') then -- magic data string to return true |
if plainFind(s,'data-plural="0"') then -- magic data string to return true |
return singular |
return singular |
end |
end |
if plainFind(s,'data-plural="1"') then -- magic data string to return false |
if plainFind(s,'data-plural="1"') then -- magic data string to return false |
return plural |
return plural |
end |
end |
-- count number of list items |
-- count number of list items |
local numListItems = countMatches(s,'<%s*li') |
local numListItems = countMatches(s,'<%s*li') |
-- if exactly one, then singular, if more than one, then plural |
-- if exactly one, then singular, if more than one, then plural |
if numListItems == 1 then |
if numListItems == 1 then |
return singular |
return singular |
end |
end |
if numListItems > 1 then |
if numListItems > 1 then |
return plural |
return plural |
end |
end |
-- if "list of" occurs inside of wlink, then it's plural |
-- if "list of" occurs inside of wlink, then it's plural |
if mw.ustring.find(s:lower(), '%[%[[^%]]*list of[^%]]+%]%]') then |
if mw.ustring.find(s:lower(), '%[%[[^%]]*list of[^%]]+%]%]') then |
return plural |
return plural |
end |
end |
-- fix for trailing br tags passed through [[template:marriage]] |
-- fix for trailing br tags passed through [[template:marriage]] |
s = mw.ustring.gsub(s, '<%s*br[^>]*>%s*(</div>)', '%1') |
s = mw.ustring.gsub(s, '<%s*br[^>]*>%s*(</div>)', '%1') |
-- replace all wikilinks with fixed string |
-- replace all wikilinks with fixed string |
if rewriteLinks then |
if rewriteLinks then |
s = mw.ustring.gsub(s,'%b[]','WIKILINK') |
s = mw.ustring.gsub(s,'%b[]','WIKILINK') |
end |
end |
-- Five conditions: any one of them can make the string a likely plural or plural |
-- Five conditions: any one of them can make the string a likely plural or plural |
local hasBreak = mw.ustring.find(s,'<%s*br') |
local hasBreak = mw.ustring.find(s,'<%s*br') |
-- For the last 4, evaluate on string stripped of wikimarkup |
-- For the last 4, evaluate on string stripped of wikimarkup |
s = getPlain(s) |
s = getPlain(s) |
local hasBullets = countMatches(s,'%*+') > 1 |
local hasBullets = countMatches(s,'%*+') > 1 |
local multipleQids = mw.ustring.find(s,'Q%d+[%p%s]+Q%d+') -- has multiple QIDs in a row |
local multipleQids = mw.ustring.find(s,'Q%d+[%p%s]+Q%d+') -- has multiple QIDs in a row |
if hasBullets or multipleQids then |
if hasBullets or multipleQids then |
return plural |
return plural |
end |
end |
local commaPattern = anyComma and '[,;]' or '%D[,;]%D' -- semi-colon similar to comma |
local commaPattern = anyComma and '[,;]' or '%D[,;]%D' -- semi-colon similar to comma |
local hasComma = checkComma and mw.ustring.find(s, commaPattern) |
local hasComma = checkComma and mw.ustring.find(s, commaPattern) |
local hasAnd = checkAnd and mw.ustring.find(s,'[,%s]and%s') |
local hasAnd = checkAnd and mw.ustring.find(s,'[,%s]and%s') |
if hasBreak or hasComma or hasAnd then |
if hasBreak or hasComma or hasAnd then |
return likelyPlural |
return likelyPlural |
end |
end |
return singular |
return singular |
end |
end |
function p._pluralize(args) |
function p._pluralize(args) |
args = type(args) == 'table' and args or {} |
args = type(args) == 'table' and args or {} |
local singularForm = args[3] or args.singular or "" |
local singularForm = args[3] or args.singular or "" |
local pluralForm = args[4] or args.plural or "" |
local pluralForm = args[4] or args.plural or "" |
local likelyForm = args.likely or pluralForm |
local likelyForm = args.likely or pluralForm |
local link = args[5] or args.link |
local link = args[5] or args.link |
if link then |
if link then |
link = tostring(link) |
link = tostring(link) |
singularForm = '[['..link..'|'..singularForm..']]' |
singularForm = '[['..link..'|'..singularForm..']]' |
pluralForm = '[['..link..'|'..pluralForm..']]' |
pluralForm = '[['..link..'|'..pluralForm..']]' |
likelyForm = '[['..link..'|'..likelyForm..']]' |
likelyForm = '[['..link..'|'..likelyForm..']]' |
end |
end |
if args[2] then |
if args[2] then |
return pluralForm |
return pluralForm |
end |
end |
local detect = p._main(args) |
local detect = p._main(args) |
if detect == nil then |
if detect == nil then |
return "" -- return blank on complete failure |
return "" -- return blank on complete failure |
end |
end |
if detect == singular then |
if detect == singular then |
return singularForm |
return singularForm |
elseif detect == likelyPlural then |
elseif detect == likelyPlural then |
return likelyForm |
return likelyForm |
else |
else |
return pluralForm |
return pluralForm |
end |
end |
end |
end |
function p.main(frame) |
function p.main(frame) |
local args = getArgs(frame) |
local args = getArgs(frame) |
-- For template, return 1 if singular, blank if plural or empty |
-- For template, return 1 if singular, blank if plural or empty |
local result = p._main(args) |
local result = p._main(args) |
if result == nil then |
if result == nil then |
return 1 |
return 1 |
end |
end |
return result == singular and 1 or "" |
return result == singular and 1 or "" |
end |
end |
function p.pluralize(frame) |
function p.pluralize(frame) |
local args = getArgs(frame) |
local args = getArgs(frame) |
return p._pluralize(args) |
return p._pluralize(args) |
end |
end |
return p |
return p |