Diff: Module:Text
Comparing revision #1 (2022-07-21 16:43:48) with revision #2 (2023-02-02 05:35:16).
| Old | New |
|---|---|
local yesNo = require("Module:Yesno") |
local yesNo = require("Module:Yesno") |
local Text = { serial = "2022-07-21", |
local Text = { serial = "2022-07-21", |
suite = "Text" } |
suite = "Text" } |
--[=[ |
--[=[ |
Text utilities |
Text utilities |
]=] |
]=] |
-- local globals |
-- local globals |
local PatternCJK = false |
local PatternCJK = false |
local PatternCombined = false |
local PatternCombined = false |
local PatternLatin = false |
local PatternLatin = false |
local PatternTerminated = false |
local PatternTerminated = false |
local QuoteLang = false |
local QuoteLang = false |
local QuoteType = false |
local QuoteType = false |
local RangesLatin = false |
local RangesLatin = false |
local SeekQuote = false |
local SeekQuote = false |
local function initLatinData() |
local function initLatinData() |
if not RangesLatin then |
if not RangesLatin then |
RangesLatin = { { 7, 687 }, |
RangesLatin = { { 7, 687 }, |
{ 7531, 7578 }, |
{ 7531, 7578 }, |
{ 7680, 7935 }, |
{ 7680, 7935 }, |
{ 8194, 8250 } } |
{ 8194, 8250 } } |
end |
end |
if not PatternLatin then |
if not PatternLatin then |
local range |
local range |
PatternLatin = "^[" |
PatternLatin = "^[" |
for i = 1, #RangesLatin do |
for i = 1, #RangesLatin do |
range = RangesLatin[ i ] |
range = RangesLatin[ i ] |
PatternLatin = PatternLatin .. |
PatternLatin = PatternLatin .. |
mw.ustring.char( range[ 1 ], 45, range[ 2 ] ) |
mw.ustring.char( range[ 1 ], 45, range[ 2 ] ) |
end -- for i |
end -- for i |
PatternLatin = PatternLatin .. "]*$" |
PatternLatin = PatternLatin .. "]*$" |
end |
end |
end |
end |
local function initQuoteData() |
local function initQuoteData() |
-- Create quote definitions |
-- Create quote definitions |
if not QuoteLang then |
if not QuoteLang then |
QuoteLang = |
QuoteLang = |
{ af = "bd", |
{ af = "bd", |
ar = "la", |
ar = "la", |
be = "labd", |
be = "labd", |
bg = "bd", |
bg = "bd", |
ca = "la", |
ca = "la", |
cs = "bd", |
cs = "bd", |
da = "bd", |
da = "bd", |
de = "bd", |
de = "bd", |
dsb = "bd", |
dsb = "bd", |
et = "bd", |
et = "bd", |
el = "lald", |
el = "lald", |
en = "ld", |
en = "ld", |
es = "la", |
es = "la", |
eu = "la", |
eu = "la", |
-- fa = "la", |
-- fa = "la", |
fi = "rd", |
fi = "rd", |
fr = "laSPC", |
fr = "laSPC", |
ga = "ld", |
ga = "ld", |
he = "ldla", |
he = "ldla", |
hr = "bd", |
hr = "bd", |
hsb = "bd", |
hsb = "bd", |
hu = "bd", |
hu = "bd", |
hy = "labd", |
hy = "labd", |
id = "rd", |
id = "rd", |
is = "bd", |
is = "bd", |
it = "ld", |
it = "ld", |
ja = "x300C", |
ja = "x300C", |
ka = "bd", |
ka = "bd", |
ko = "ld", |
ko = "ld", |
lt = "bd", |
lt = "bd", |
lv = "bd", |
lv = "bd", |
nl = "ld", |
nl = "ld", |
nn = "la", |
nn = "la", |
no = "la", |
no = "la", |
pl = "bdla", |
pl = "bdla", |
pt = "lald", |
pt = "lald", |
ro = "bdla", |
ro = "bdla", |
ru = "labd", |
ru = "labd", |
sk = "bd", |
sk = "bd", |
sl = "bd", |
sl = "bd", |
sq = "la", |
sq = "la", |
sr = "bx", |
sr = "bx", |
sv = "rd", |
sv = "rd", |
th = "ld", |
th = "ld", |
tr = "ld", |
tr = "ld", |
uk = "la", |
uk = "la", |
zh = "ld", |
zh = "ld", |
["de-ch"] = "la", |
["de-ch"] = "la", |
["en-gb"] = "lsld", |
["en-gb"] = "lsld", |
["en-us"] = "ld", |
["en-us"] = "ld", |
["fr-ch"] = "la", |
["fr-ch"] = "la", |
["it-ch"] = "la", |
["it-ch"] = "la", |
["pt-br"] = "ldla", |
["pt-br"] = "ldla", |
["zh-tw"] = "x300C", |
["zh-tw"] = "x300C", |
["zh-cn"] = "ld" } |
["zh-cn"] = "ld" } |
end |
end |
if not QuoteType then |
if not QuoteType then |
QuoteType = |
QuoteType = |
{ bd = { { 8222, 8220 }, { 8218, 8217 } }, |
{ bd = { { 8222, 8220 }, { 8218, 8217 } }, |
bdla = { { 8222, 8220 }, { 171, 187 } }, |
bdla = { { 8222, 8220 }, { 171, 187 } }, |
bx = { { 8222, 8221 }, { 8218, 8217 } }, |
bx = { { 8222, 8221 }, { 8218, 8217 } }, |
la = { { 171, 187 }, { 8249, 8250 } }, |
la = { { 171, 187 }, { 8249, 8250 } }, |
laSPC = { { 171, 187 }, { 8249, 8250 }, true }, |
laSPC = { { 171, 187 }, { 8249, 8250 }, true }, |
labd = { { 171, 187 }, { 8222, 8220 } }, |
labd = { { 171, 187 }, { 8222, 8220 } }, |
lald = { { 171, 187 }, { 8220, 8221 } }, |
lald = { { 171, 187 }, { 8220, 8221 } }, |
ld = { { 8220, 8221 }, { 8216, 8217 } }, |
ld = { { 8220, 8221 }, { 8216, 8217 } }, |
ldla = { { 8220, 8221 }, { 171, 187 } }, |
ldla = { { 8220, 8221 }, { 171, 187 } }, |
lsld = { { 8216, 8217 }, { 8220, 8221 } }, |
lsld = { { 8216, 8217 }, { 8220, 8221 } }, |
rd = { { 8221, 8221 }, { 8217, 8217 } }, |
rd = { { 8221, 8221 }, { 8217, 8217 } }, |
x300C = { { 0x300C, 0x300D }, |
x300C = { { 0x300C, 0x300D }, |
{ 0x300E, 0x300F } } } |
{ 0x300E, 0x300F } } } |
end |
end |
end -- initQuoteData() |
end -- initQuoteData() |
local function fiatQuote( apply, alien, advance ) |
local function fiatQuote( apply, alien, advance ) |
-- Quote text |
-- Quote text |
-- Parameter: |
-- Parameter: |
-- apply -- string, with text |
-- apply -- string, with text |
-- alien -- string, with language code |
-- alien -- string, with language code |
-- advance -- number, with level 1 or 2 |
-- advance -- number, with level 1 or 2 |
local r = apply and tostring(apply) or "" |
local r = apply and tostring(apply) or "" |
alien = alien or "en" |
alien = alien or "en" |
advance = tonumber(advance) or 0 |
advance = tonumber(advance) or 0 |
local suite |
local suite |
initQuoteData() |
initQuoteData() |
local slang = alien:match( "^(%l+)-" ) |
local slang = alien:match( "^(%l+)-" ) |
suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"] |
suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"] |
if suite then |
if suite then |
local quotes = QuoteType[ suite ] |
local quotes = QuoteType[ suite ] |
if quotes then |
if quotes then |
local space |
local space |
if quotes[ 3 ] then |
if quotes[ 3 ] then |
space = " " |
space = " " |
else |
else |
space = "" |
space = "" |
end |
end |
quotes = quotes[ advance ] |
quotes = quotes[ advance ] |
if quotes then |
if quotes then |
r = mw.ustring.format( "%s%s%s%s%s", |
r = mw.ustring.format( "%s%s%s%s%s", |
mw.ustring.char( quotes[ 1 ] ), |
mw.ustring.char( quotes[ 1 ] ), |
space, |
space, |
apply, |
apply, |
space, |
space, |
mw.ustring.char( quotes[ 2 ] ) ) |
mw.ustring.char( quotes[ 2 ] ) ) |
end |
end |
else |
else |
mw.log( "fiatQuote() " .. suite ) |
mw.log( "fiatQuote() " .. suite ) |
end |
end |
end |
end |
return r |
return r |
end -- fiatQuote() |
end -- fiatQuote() |
Text.char = function ( apply, again, accept ) |
Text.char = function ( apply, again, accept ) |
-- Create string from codepoints |
-- Create string from codepoints |
-- Parameter: |
-- Parameter: |
-- apply -- table (sequence) with numerical codepoints, or nil |
-- apply -- table (sequence) with numerical codepoints, or nil |
-- again -- number of repetitions, or nil |
-- again -- number of repetitions, or nil |
-- accept -- true, if no error messages to be appended |
-- accept -- true, if no error messages to be appended |
-- Returns: string |
-- Returns: string |
local r = "" |
local r = "" |
apply = type(apply) == "table" and apply or {} |
apply = type(apply) == "table" and apply or {} |
again = math.floor(tonumber(again) or 1) |
again = math.floor(tonumber(again) or 1) |
if again < 1 then |
if again < 1 then |
return "" |
return "" |
end |
end |
local bad = { } |
local bad = { } |
local codes = { } |
local codes = { } |
for _, v in ipairs( apply ) do |
for _, v in ipairs( apply ) do |
local n = tonumber(v) |
local n = tonumber(v) |
if not n or (n < 32 and n ~= 9 and n ~= 10) then |
if not n or (n < 32 and n ~= 9 and n ~= 10) then |
table.insert(bad, tostring(v)) |
table.insert(bad, tostring(v)) |
else |
else |
table.insert(codes, math.floor(n)) |
table.insert(codes, math.floor(n)) |
end |
end |
end |
end |
if #bad > 0 then |
if #bad > 0 then |
if not accept then |
if not accept then |
r = tostring( mw.html.create( "span" ) |
r = tostring( mw.html.create( "span" ) |
:addClass( "error" ) |
:addClass( "error" ) |
:wikitext( "bad codepoints: " .. table.concat( bad, " " )) ) |
:wikitext( "bad codepoints: " .. table.concat( bad, " " )) ) |
end |
end |
return r |
return r |
end |
end |
if #codes > 0 then |
if #codes > 0 then |
r = mw.ustring.char( unpack( codes ) ) |
r = mw.ustring.char( unpack( codes ) ) |
if again > 1 then |
if again > 1 then |
r = r:rep(again) |
r = r:rep(again) |
end |
end |
end |
end |
return r |
return r |
end -- Text.char() |
end -- Text.char() |
local function trimAndFormat(args, fmt) |
local function trimAndFormat(args, fmt) |
local result = {} |
local result = {} |
if type(args) ~= 'table' then |
if type(args) ~= 'table' then |
args = {args} |
args = {args} |
end |
end |
for _, v in ipairs(args) do |
for _, v in ipairs(args) do |
v = mw.text.trim(tostring(v)) |
v = mw.text.trim(tostring(v)) |
if v ~= "" then |
if v ~= "" then |
table.insert(result,fmt and mw.ustring.format(fmt, v) or v) |
table.insert(result,fmt and mw.ustring.format(fmt, v) or v) |
end |
end |
end |
end |
return result |
return result |
end |
end |
Text.concatParams = function ( args, apply, adapt ) |
Text.concatParams = function ( args, apply, adapt ) |
-- Concat list items into one string |
-- Concat list items into one string |
-- Parameter: |
-- Parameter: |
-- args -- table (sequence) with numKey=string |
-- args -- table (sequence) with numKey=string |
-- apply -- string (optional); separator (default: "|") |
-- apply -- string (optional); separator (default: "|") |
-- adapt -- string (optional); format including "%s" |
-- adapt -- string (optional); format including "%s" |
-- Returns: string |
-- Returns: string |
local collect = { } |
local collect = { } |
return table.concat(trimAndFormat(args,adapt), apply or "|") |
return table.concat(trimAndFormat(args,adapt), apply or "|") |
end -- Text.concatParams() |
end -- Text.concatParams() |
Text.containsCJK = function ( s ) |
Text.containsCJK = function ( s ) |
-- Is any CJK code within? |
-- Is any CJK code within? |
-- Parameter: |
-- Parameter: |
-- s -- string |
-- s -- string |
-- Returns: true, if CJK detected |
-- Returns: true, if CJK detected |
s = s and tostring(s) or "" |
s = s and tostring(s) or "" |
if not patternCJK then |
if not patternCJK then |
patternCJK = mw.ustring.char( 91, |
patternCJK = mw.ustring.char( 91, |
4352, 45, 4607, |
4352, 45, 4607, |
11904, 45, 42191, |
11904, 45, 42191, |
43072, 45, 43135, |
43072, 45, 43135, |
44032, 45, 55215, |
44032, 45, 55215, |
63744, 45, 64255, |
63744, 45, 64255, |
65072, 45, 65103, |
65072, 45, 65103, |
65381, 45, 65500, |
65381, 45, 65500, |
131072, 45, 196607, |
131072, 45, 196607, |
93 ) |
93 ) |
end |
end |
return mw.ustring.find( s, patternCJK ) ~= nil |
return mw.ustring.find( s, patternCJK ) ~= nil |
end -- Text.containsCJK() |
end -- Text.containsCJK() |
Text.removeDelimited = function (s, prefix, suffix) |
Text.removeDelimited = function (s, prefix, suffix) |
-- Remove all text in s delimited by prefix and suffix (inclusive) |
-- Remove all text in s delimited by prefix and suffix (inclusive) |
-- Arguments: |
-- Arguments: |
-- s = string to process |
-- s = string to process |
-- prefix = initial delimiter |
-- prefix = initial delimiter |
-- suffix = ending delimiter |
-- suffix = ending delimiter |
-- Returns: stripped string |
-- Returns: stripped string |
s = s and tostring(s) or "" |
s = s and tostring(s) or "" |
prefix = prefix and tostring(prefix) or "" |
prefix = prefix and tostring(prefix) or "" |
suffix = suffix and tostring(suffix) or "" |
suffix = suffix and tostring(suffix) or "" |
local prefixLen = mw.ustring.len(prefix) |
local prefixLen = mw.ustring.len(prefix) |
local suffixLen = mw.ustring.len(suffix) |
local suffixLen = mw.ustring.len(suffix) |
if prefixLen == 0 or suffixLen == 0 then |
if prefixLen == 0 or suffixLen == 0 then |
return s |
return s |
end |
end |
local i = s:find(prefix, 1, true) |
local i = s:find(prefix, 1, true) |
local r = s |
local r = s |
local j |
local j |
while i do |
while i do |
j = r:find(suffix, i + prefixLen) |
j = r:find(suffix, i + prefixLen) |
if j then |
if j then |
r = r:sub(1, i - 1)..r:sub(j+suffixLen) |
r = r:sub(1, i - 1)..r:sub(j+suffixLen) |
else |
else |
r = r:sub(1, i - 1) |
r = r:sub(1, i - 1) |
end |
end |
i = r:find(prefix, 1, true) |
i = r:find(prefix, 1, true) |
end |
end |
return r |
return r |
end |
end |
Text.getPlain = function ( adjust ) |
Text.getPlain = function ( adjust ) |
-- Remove wikisyntax from string, except templates |
-- Remove wikisyntax from string, except templates |
-- Parameter: |
-- Parameter: |
-- adjust -- string |
-- adjust -- string |
-- Returns: string |
-- Returns: string |
local r = Text.removeDelimited(adjust,"<!--","-->") |
local r = Text.removeDelimited(adjust,"<!--","-->") |
r = r:gsub( "(</?%l[^>]*>)", "" ) |
r = r:gsub( "(</?%l[^>]*>)", "" ) |
:gsub( "'''", "" ) |
:gsub( "'''", "" ) |
:gsub( "''", "" ) |
:gsub( "''", "" ) |
:gsub( " ", " " ) |
:gsub( " ", " " ) |
return r |
return r |
end -- Text.getPlain() |
end -- Text.getPlain() |
Text.isLatinRange = function (s) |
Text.isLatinRange = function (s) |
-- Are characters expected to be latin or symbols within latin texts? |
-- Are characters expected to be latin or symbols within latin texts? |
-- Arguments: |
-- Arguments: |
-- s = string to analyze |
-- s = string to analyze |
-- Returns: true, if valid for latin only |
-- Returns: true, if valid for latin only |
s = s and tostring(s) or "" --- ensure input is always string |
s = s and tostring(s) or "" --- ensure input is always string |
initLatinData() |
initLatinData() |
return mw.ustring.match(s, PatternLatin) ~= nil |
return mw.ustring.match(s, PatternLatin) ~= nil |
end -- Text.isLatinRange() |
end -- Text.isLatinRange() |
Text.isQuote = function ( s ) |
Text.isQuote = function ( s ) |
-- Is this character any quotation mark? |
-- Is this character any quotation mark? |
-- Parameter: |
-- Parameter: |
-- s = single character to analyze |
-- s = single character to analyze |
-- Returns: true, if s is quotation mark |
-- Returns: true, if s is quotation mark |
s = s and tostring(s) or "" |
s = s and tostring(s) or "" |
if s == "" then |
if s == "" then |
return false |
return false |
end |
end |
if not SeekQuote then |
if not SeekQuote then |
SeekQuote = mw.ustring.char( 34, -- " |
SeekQuote = mw.ustring.char( 34, -- " |
39, -- ' |
39, -- ' |
171, -- laquo |
171, -- laquo |
187, -- raquo |
187, -- raquo |
8216, -- lsquo |
8216, -- lsquo |
8217, -- rsquo |
8217, -- rsquo |
8218, -- sbquo |
8218, -- sbquo |
8220, -- ldquo |
8220, -- ldquo |
8221, -- rdquo |
8221, -- rdquo |
8222, -- bdquo |
8222, -- bdquo |
8249, -- lsaquo |
8249, -- lsaquo |
8250, -- rsaquo |
8250, -- rsaquo |
0x300C, -- CJK |
0x300C, -- CJK |
0x300D, -- CJK |
0x300D, -- CJK |
0x300E, -- CJK |
0x300E, -- CJK |
0x300F ) -- CJK |
0x300F ) -- CJK |
end |
end |
return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil |
return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil |
end -- Text.isQuote() |
end -- Text.isQuote() |
Text.listToText = function ( args, adapt ) |
Text.listToText = function ( args, adapt ) |
-- Format list items similar to mw.text.listToText() |
-- Format list items similar to mw.text.listToText() |
-- Parameter: |
-- Parameter: |
-- args -- table (sequence) with numKey=string |
-- args -- table (sequence) with numKey=string |
-- adapt -- string (optional); format including "%s" |
-- adapt -- string (optional); format including "%s" |
-- Returns: string |
-- Returns: string |
return mw.text.listToText(trimAndFormat(args, adapt)) |
return mw.text.listToText(trimAndFormat(args, adapt)) |
end -- Text.listToText() |
end -- Text.listToText() |
Text.quote = function ( apply, alien, advance ) |
Text.quote = function ( apply, alien, advance ) |
-- Quote text |
-- Quote text |
-- Parameter: |
-- Parameter: |
-- apply -- string, with text |
-- apply -- string, with text |
-- alien -- string, with language code, or nil |
-- alien -- string, with language code, or nil |
-- advance -- number, with level 1 or 2, or nil |
-- advance -- number, with level 1 or 2, or nil |
-- Returns: quoted string |
-- Returns: quoted string |
apply = apply and tostring(apply) or "" |
apply = apply and tostring(apply) or "" |
local mode, slang |
local mode, slang |
if type( alien ) == "string" then |
if type( alien ) == "string" then |
slang = mw.text.trim( alien ):lower() |
slang = mw.text.trim( alien ):lower() |
else |
else |
slang = mw.title.getCurrentTitle().pageLanguage |
slang = mw.title.getCurrentTitle().pageLanguage |
if not slang then |
if not slang then |
-- TODO FIXME: Introduction expected 2017-04 |
-- TODO FIXME: Introduction expected 2017-04 |
slang = mw.language.getContentLanguage():getCode() |
slang = mw.language.getContentLanguage():getCode() |
end |
end |
end |
end |
if advance == 2 then |
if advance == 2 then |
mode = 2 |
mode = 2 |
else |
else |
mode = 1 |
mode = 1 |
end |
end |
return fiatQuote( mw.text.trim( apply ), slang, mode ) |
return fiatQuote( mw.text.trim( apply ), slang, mode ) |
end -- Text.quote() |
end -- Text.quote() |
Text.quoteUnquoted = function ( apply, alien, advance ) |
Text.quoteUnquoted = function ( apply, alien, advance ) |
-- Quote text, if not yet quoted and not empty |
-- Quote text, if not yet quoted and not empty |
-- Parameter: |
-- Parameter: |
-- apply -- string, with text |
-- apply -- string, with text |
-- alien -- string, with language code, or nil |
-- alien -- string, with language code, or nil |
-- advance -- number, with level 1 or 2, or nil |
-- advance -- number, with level 1 or 2, or nil |
-- Returns: string; possibly quoted |
-- Returns: string; possibly quoted |
local r = mw.text.trim( apply and tostring(apply) or "" ) |
local r = mw.text.trim( apply and tostring(apply) or "" ) |
local s = mw.ustring.sub( r, 1, 1 ) |
local s = mw.ustring.sub( r, 1, 1 ) |
if s ~= "" and not Text.isQuote( s, advance ) then |
if s ~= "" and not Text.isQuote( s, advance ) then |
s = mw.ustring.sub( r, -1, 1 ) |
s = mw.ustring.sub( r, -1, 1 ) |
if not Text.isQuote( s ) then |
if not Text.isQuote( s ) then |
r = Text.quote( r, alien, advance ) |
r = Text.quote( r, alien, advance ) |
end |
end |
end |
end |
return r |
return r |
end -- Text.quoteUnquoted() |
end -- Text.quoteUnquoted() |
Text.removeDiacritics = function ( adjust ) |
Text.removeDiacritics = function ( adjust ) |
-- Remove all diacritics |
-- Remove all diacritics |
-- Parameter: |
-- Parameter: |
-- adjust -- string |
-- adjust -- string |
-- Returns: string; all latin letters should be ASCII |
-- Returns: string; all latin letters should be ASCII |
-- or basic greek or cyrillic or symbols etc. |
-- or basic greek or cyrillic or symbols etc. |
local cleanup, decomposed |
local cleanup, decomposed |
if not PatternCombined then |
if not PatternCombined then |
PatternCombined = mw.ustring.char( 91, |
PatternCombined = mw.ustring.char( 91, |
0x0300, 45, 0x036F, |
0x0300, 45, 0x036F, |
0x1AB0, 45, 0x1AFF, |
0x1AB0, 45, 0x1AFF, |
0x1DC0, 45, 0x1DFF, |
0x1DC0, 45, 0x1DFF, |
0xFE20, 45, 0xFE2F, |
0xFE20, 45, 0xFE2F, |
93 ) |
93 ) |
end |
end |
decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" ) |
decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" ) |
cleanup = mw.ustring.gsub( decomposed, PatternCombined, "" ) |
cleanup = mw.ustring.gsub( decomposed, PatternCombined, "" ) |
return mw.ustring.toNFC( cleanup ) |
return mw.ustring.toNFC( cleanup ) |
end -- Text.removeDiacritics() |
end -- Text.removeDiacritics() |
Text.sentenceTerminated = function ( analyse ) |
Text.sentenceTerminated = function ( analyse ) |
-- Is string terminated by dot, question or exclamation mark? |
-- Is string terminated by dot, question or exclamation mark? |
-- Quotation, link termination and so on granted |
-- Quotation, link termination and so on granted |
-- Parameter: |
-- Parameter: |
-- analyse -- string |
-- analyse -- string |
-- Returns: true, if sentence terminated |
-- Returns: true, if sentence terminated |
local r |
local r |
if not PatternTerminated then |
if not PatternTerminated then |
PatternTerminated = mw.ustring.char( 91, |
PatternTerminated = mw.ustring.char( 91, |
12290, |
12290, |
65281, |
65281, |
65294, |
65294, |
65311 ) |
65311 ) |
.. "!%.%?…][\"'%]‹›«»‘’“”]*$" |
.. "!%.%?…][\"'%]‹›«»‘’“”]*$" |
end |
end |
if mw.ustring.find( analyse, PatternTerminated ) then |
if mw.ustring.find( analyse, PatternTerminated ) then |
r = true |
r = true |
else |
else |
r = false |
r = false |
end |
end |
return r |
return r |
end -- Text.sentenceTerminated() |
end -- Text.sentenceTerminated() |
Text.ucfirstAll = function ( adjust) |
Text.ucfirstAll = function ( adjust) |
-- Capitalize all words |
-- Capitalize all words |
-- Arguments: |
-- Arguments: |
-- adjust = string to adjust |
-- adjust = string to adjust |
-- Returns: string with all first letters in upper case |
-- Returns: string with all first letters in upper case |
adjust = adjust and tostring(adjust) or "" |
adjust = adjust and tostring(adjust) or "" |
local r = mw.text.decode(adjust,true) |
local r = mw.text.decode(adjust,true) |
local i = 1 |
local i = 1 |
local c, j, m |
local c, j, m |
m = (r ~= adjust) |
m = (r ~= adjust) |
r = " "..r |
r = " "..r |
while i do |
while i do |
i = mw.ustring.find( r, "%W%l", i ) |
i = mw.ustring.find( r, "%W%l", i ) |
if i then |
if i then |
j = i + 1 |
j = i + 1 |
c = mw.ustring.upper( mw.ustring.sub( r, j, j ) ) |
c = mw.ustring.upper( mw.ustring.sub( r, j, j ) ) |
r = string.format( "%s%s%s", |
r = string.format( "%s%s%s", |
mw.ustring.sub( r, 1, i ), |
mw.ustring.sub( r, 1, i ), |
c, |
c, |
mw.ustring.sub( r, i + 2 ) ) |
mw.ustring.sub( r, i + 2 ) ) |
i = j |
i = j |
end |
end |
end -- while i |
end -- while i |
r = r:sub( 2 ) |
r = r:sub( 2 ) |
if m then |
if m then |
r = mw.text.encode(r) |
r = mw.text.encode(r) |
end |
end |
return r |
return r |
end -- Text.ucfirstAll() |
end -- Text.ucfirstAll() |
Text.uprightNonlatin = function ( adjust ) |
Text.uprightNonlatin = function ( adjust ) |
-- Ensure non-italics for non-latin text parts |
-- Ensure non-italics for non-latin text parts |
-- One single greek letter might be granted |
-- One single greek letter might be granted |
-- Precondition: |
-- Precondition: |
-- adjust -- string |
-- adjust -- string |
-- Returns: string with non-latin parts enclosed in <span> |
-- Returns: string with non-latin parts enclosed in <span> |
local r |
local r |
initLatinData() |
initLatinData() |
if mw.ustring.match( adjust, PatternLatin ) then |
if mw.ustring.match( adjust, PatternLatin ) then |
-- latin only, horizontal dashes, quotes |
-- latin only, horizontal dashes, quotes |
r = adjust |
r = adjust |
else |
else |
local c |
local c |
local j = false |
local j = false |
local k = 1 |
local k = 1 |
local m = false |
local m = false |
local n = mw.ustring.len( adjust ) |
local n = mw.ustring.len( adjust ) |
local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>" |
local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>" |
local flat = function ( a ) |
local flat = function ( a ) |
-- isLatin |
-- isLatin |
local range |
local range |
for i = 1, #RangesLatin do |
for i = 1, #RangesLatin do |
range = RangesLatin[ i ] |
range = RangesLatin[ i ] |
if a >= range[ 1 ] and a <= range[ 2 ] then |
if a >= range[ 1 ] and a <= range[ 2 ] then |
return true |
return true |
end |
end |
end -- for i |
end -- for i |
end -- flat() |
end -- flat() |
local focus = function ( a ) |
local focus = function ( a ) |
-- char is not ambivalent |
-- char is not ambivalent |
local r = ( a > 64 ) |
local r = ( a > 64 ) |
if r then |
if r then |
r = ( a < 8192 or a > 8212 ) |
r = ( a < 8192 or a > 8212 ) |
else |
else |
r = ( a == 38 or a == 60 ) -- '&' '<' |
r = ( a == 38 or a == 60 ) -- '&' '<' |
end |
end |
return r |
return r |
end -- focus() |
end -- focus() |
local form = function ( a ) |
local form = function ( a ) |
return string.format( span, |
return string.format( span, |
r, |
r, |
mw.ustring.sub( adjust, k, j - 1 ), |
mw.ustring.sub( adjust, k, j - 1 ), |
mw.ustring.sub( adjust, j, a ) ) |
mw.ustring.sub( adjust, j, a ) ) |
end -- form() |
end -- form() |
r = "" |
r = "" |
for i = 1, n do |
for i = 1, n do |
c = mw.ustring.codepoint( adjust, i, i ) |
c = mw.ustring.codepoint( adjust, i, i ) |
if focus( c ) then |
if focus( c ) then |
if flat( c ) then |
if flat( c ) then |
if j then |
if j then |
if m then |
if m then |
if i == m then |
if i == m then |
-- single greek letter. |
-- single greek letter. |
j = false |
j = false |
end |
end |
m = false |
m = false |
end |
end |
if j then |
if j then |
local nx = i - 1 |
local nx = i - 1 |
local s = "" |
local s = "" |
for ix = nx, 1, -1 do |
for ix = nx, 1, -1 do |
c = mw.ustring.sub( adjust, ix, ix ) |
c = mw.ustring.sub( adjust, ix, ix ) |
if c == " " or c == "(" then |
if c == " " or c == "(" then |
nx = nx - 1 |
nx = nx - 1 |
s = c .. s |
s = c .. s |
else |
else |
break -- for ix |
break -- for ix |
end |
end |
end -- for ix |
end -- for ix |
r = form( nx ) .. s |
r = form( nx ) .. s |
j = false |
j = false |
k = i |
k = i |
end |
end |
end |
end |
elseif not j then |
elseif not j then |
j = i |
j = i |
if c >= 880 and c <= 1023 then |
if c >= 880 and c <= 1023 then |
-- single greek letter? |
-- single greek letter? |
m = i + 1 |
m = i + 1 |
else |
else |
m = false |
m = false |
end |
end |
end |
end |
elseif m then |
elseif m then |
m = m + 1 |
m = m + 1 |
end |
end |
end -- for i |
end -- for i |
if j and ( not m or m < n ) then |
if j and ( not m or m < n ) then |
r = form( n ) |
r = form( n ) |
else |
else |
r = r .. mw.ustring.sub( adjust, k ) |
r = r .. mw.ustring.sub( adjust, k ) |
end |
end |
end |
end |
return r |
return r |
end -- Text.uprightNonlatin() |
end -- Text.uprightNonlatin() |
Text.test = function ( about ) |
Text.test = function ( about ) |
local r |
local r |
if about == "quote" then |
if about == "quote" then |
initQuoteData() |
initQuoteData() |
r = { } |
r = { } |
r.QuoteLang = QuoteLang |
r.QuoteLang = QuoteLang |
r.QuoteType = QuoteType |
r.QuoteType = QuoteType |
end |
end |
return r |
return r |
end -- Text.test() |
end -- Text.test() |
-- Export |
-- Export |
local p = { } |
local p = { } |
for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do |
for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do |
p[func] = function (frame) |
p[func] = function (frame) |
return Text[func]( frame.args[ 1 ] or "" ) and "1" or "" |
return Text[func]( frame.args[ 1 ] or "" ) and "1" or "" |
end |
end |
end |
end |
for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do |
for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do |
p[func] = function (frame) |
p[func] = function (frame) |
return Text[func]( frame.args[ 1 ] or "" ) |
return Text[func]( frame.args[ 1 ] or "" ) |
end |
end |
end |
end |
function p.char( frame ) |
function p.char( frame ) |
local params = frame:getParent().args |
local params = frame:getParent().args |
local story = params[ 1 ] |
local story = params[ 1 ] |
local codes, lenient, multiple |
local codes, lenient, multiple |
if not story then |
if not story then |
params = frame.args |
params = frame.args |
story = params[ 1 ] |
story = params[ 1 ] |
end |
end |
if story then |
if story then |
local items = mw.text.split( mw.text.trim(story), "%s+" ) |
local items = mw.text.split( mw.text.trim(story), "%s+" ) |
if #items > 0 then |
if #items > 0 then |
local j |
local j |
lenient = (yesNo(params.errors) == false) |
lenient = (yesNo(params.errors) == false) |
codes = { } |
codes = { } |
multiple = tonumber( params[ "*" ] ) |
multiple = tonumber( params[ "*" ] ) |
for _, v in ipairs( items ) do |
for _, v in ipairs( items ) do |
j = tonumber((v:sub( 1, 1 ) == "x" and "0" or "") .. v) |
j = tonumber((v:sub( 1, 1 ) == "x" and "0" or "") .. v) |
table.insert( codes, j or v ) |
table.insert( codes, j or v ) |
end |
end |
end |
end |
end |
end |
return Text.char( codes, multiple, lenient ) |
return Text.char( codes, multiple, lenient ) |
end |
end |
function p.concatParams( frame ) |
function p.concatParams( frame ) |
local args |
local args |
local template = frame.args.template |
local template = frame.args.template |
if type( template ) == "string" then |
if type( template ) == "string" then |
template = mw.text.trim( template ) |
template = mw.text.trim( template ) |
template = ( template == "1" ) |
template = ( template == "1" ) |
end |
end |
if template then |
if template then |
args = frame:getParent().args |
args = frame:getParent().args |
else |
else |
args = frame.args |
args = frame.args |
end |
end |
return Text.concatParams( args, |
return Text.concatParams( args, |
frame.args.separator, |
frame.args.separator, |
frame.args.format ) |
frame.args.format ) |
end |
end |
function p.listToFormat(frame) |
function p.listToFormat(frame) |
local lists = {} |
local lists = {} |
local pformat = frame.args["format"] |
local pformat = frame.args["format"] |
local sep = frame.args["sep"] or ";" |
local sep = frame.args["sep"] or ";" |
-- Parameter parsen: Listen |
-- Parameter parsen: Listen |
for k, v in pairs(frame.args) do |
for k, v in pairs(frame.args) do |
local knum = tonumber(k) |
local knum = tonumber(k) |
if knum then lists[knum] = v end |
if knum then lists[knum] = v end |
end |
end |
-- Listen splitten |
-- Listen splitten |
local maxListLen = 0 |
local maxListLen = 0 |
for i = 1, #lists do |
for i = 1, #lists do |
lists[i] = mw.text.split(lists[i], sep) |
lists[i] = mw.text.split(lists[i], sep) |
if #lists[i] > maxListLen then maxListLen = #lists[i] end |
if #lists[i] > maxListLen then maxListLen = #lists[i] end |
end |
end |
-- Ergebnisstring generieren |
-- Ergebnisstring generieren |
local result = "" |
local result = "" |
local result_line = "" |
local result_line = "" |
for i = 1, maxListLen do |
for i = 1, maxListLen do |
result_line = pformat |
result_line = pformat |
for j = 1, #lists do |
for j = 1, #lists do |
result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1) |
result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1) |
end |
end |
result = result .. result_line |
result = result .. result_line |
end |
end |
return result |
return result |
end |
end |
function p.listToText( frame ) |
function p.listToText( frame ) |
local args |
local args |
local template = frame.args.template |
local template = frame.args.template |
if type( template ) == "string" then |
if type( template ) == "string" then |
template = mw.text.trim( template ) |
template = mw.text.trim( template ) |
template = ( template == "1" ) |
template = ( template == "1" ) |
end |
end |
if template then |
if template then |
args = frame:getParent().args |
args = frame:getParent().args |
else |
else |
args = frame.args |
args = frame.args |
end |
end |
return Text.listToText( args, frame.args.format ) |
return Text.listToText( args, frame.args.format ) |
end |
end |
function p.quote( frame ) |
function p.quote( frame ) |
local slang = frame.args[2] |
local slang = frame.args[2] |
if type( slang ) == "string" then |
if type( slang ) == "string" then |
slang = mw.text.trim( slang ) |
slang = mw.text.trim( slang ) |
if slang == "" then |
if slang == "" then |
slang = false |
slang = false |
end |
end |
end |
end |
return Text.quote( frame.args[ 1 ] or "", |
return Text.quote( frame.args[ 1 ] or "", |
slang, |
slang, |
tonumber( frame.args[3] ) ) |
tonumber( frame.args[3] ) ) |
end |
end |
function p.quoteUnquoted( frame ) |
function p.quoteUnquoted( frame ) |
local slang = frame.args[2] |
local slang = frame.args[2] |
if type( slang ) == "string" then |
if type( slang ) == "string" then |
slang = mw.text.trim( slang ) |
slang = mw.text.trim( slang ) |
if slang == "" then |
if slang == "" then |
slang = false |
slang = false |
end |
end |
end |
end |
return Text.quoteUnquoted( frame.args[ 1 ] or "", |
return Text.quoteUnquoted( frame.args[ 1 ] or "", |
slang, |
slang, |
tonumber( frame.args[3] ) ) |
tonumber( frame.args[3] ) ) |
end |
end |
function p.zip(frame) |
function p.zip(frame) |
local lists = {} |
local lists = {} |
local seps = {} |
local seps = {} |
local defaultsep = frame.args["sep"] or "" |
local defaultsep = frame.args["sep"] or "" |
local innersep = frame.args["isep"] or "" |
local innersep = frame.args["isep"] or "" |
local outersep = frame.args["osep"] or "" |
local outersep = frame.args["osep"] or "" |
-- Parameter parsen |
-- Parameter parsen |
for k, v in pairs(frame.args) do |
for k, v in pairs(frame.args) do |
local knum = tonumber(k) |
local knum = tonumber(k) |
if knum then lists[knum] = v else |
if knum then lists[knum] = v else |
if string.sub(k, 1, 3) == "sep" then |
if string.sub(k, 1, 3) == "sep" then |
local sepnum = tonumber(string.sub(k, 4)) |
local sepnum = tonumber(string.sub(k, 4)) |
if sepnum then seps[sepnum] = v end |
if sepnum then seps[sepnum] = v end |
end |
end |
end |
end |
end |
end |
-- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden |
-- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden |
for i = 1, math.max(#seps, #lists) do |
for i = 1, math.max(#seps, #lists) do |
if not seps[i] then seps[i] = defaultsep end |
if not seps[i] then seps[i] = defaultsep end |
end |
end |
-- Listen splitten |
-- Listen splitten |
local maxListLen = 0 |
local maxListLen = 0 |
for i = 1, #lists do |
for i = 1, #lists do |
lists[i] = mw.text.split(lists[i], seps[i]) |
lists[i] = mw.text.split(lists[i], seps[i]) |
if #lists[i] > maxListLen then maxListLen = #lists[i] end |
if #lists[i] > maxListLen then maxListLen = #lists[i] end |
end |
end |
local result = "" |
local result = "" |
for i = 1, maxListLen do |
for i = 1, maxListLen do |
if i ~= 1 then result = result .. outersep end |
if i ~= 1 then result = result .. outersep end |
for j = 1, #lists do |
for j = 1, #lists do |
if j ~= 1 then result = result .. innersep end |
if j ~= 1 then result = result .. innersep end |
result = result .. (lists[j][i] or "") |
result = result .. (lists[j][i] or "") |
end |
end |
end |
end |
return result |
return result |
end |
end |
function p.failsafe() |
function p.failsafe() |
return Text.serial |
return Text.serial |
end |
end |
p.Text = function () |
p.Text = function () |
return Text |
return Text |
end -- p.Text |
end -- p.Text |
return p |
return p |