Jump to content

Module:Text

Revision as of 20:00, 22 April 2026 by Taurussmith (talk | contribs) (Created page with "local yesNo = require("Module:Yesno") local Text = { serial = "2024-09-21", suite = "Text" } --[=[ Text utilities ]=] local function fiatQuote( apply, alien, advance ) -- Quote text -- Parameter: -- apply -- string, with text -- alien -- string, with language code -- advance -- number, with level 1 or 2 local r = apply and tostring(apply) or "" alien = alien or "en" advance = tonumber(advance) or 0 l...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

local yesNo = require("Module:Yesno") local Text = { serial = "2024-09-21",

              suite  = "Text" }

--[=[ Text utilities ]=]

local function fiatQuote( apply, alien, advance )

   -- Quote text
   -- Parameter:
   --     apply    -- string, with text
   --     alien    -- string, with language code
   --     advance  -- number, with level 1 or 2
   local r = apply and tostring(apply) or ""
   alien = alien or "en"
   advance = tonumber(advance) or 0
   local suite
   local data = mw.loadData('Module:Text/data')
   local QuoteLang = data.QuoteLang
   local QuoteType = data.QuoteType
   local slang = alien:match( "^(%l+)-" )
   suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"]
   if suite then
       local quotes = QuoteType[ suite ]
       if quotes then
           local space
           if quotes[ 3 ] then
               space = " "
           else
               space = ""
           end
           quotes = quotes[ advance ]
           if quotes then
               r = mw.ustring.format( "%s%s%s%s%s",
                                      mw.ustring.char( quotes[ 1 ] ),
                                      space,
                                      apply,
                                      space,
                                      mw.ustring.char( quotes[ 2 ] ) )
           end
       else
           mw.log( "fiatQuote() " .. suite )
       end
   end
   return r

end -- fiatQuote()


Text.char = function ( apply, again, accept )

   -- Create string from codepoints
   -- Parameter:
   --     apply   -- table (sequence) with numerical codepoints, or nil
   --     again   -- number of repetitions, or nil
   --     accept  -- true, if no error messages to be appended
   -- Returns: string
   local r = ""
   apply = type(apply) == "table" and apply or {}
   again = math.floor(tonumber(again) or 1)
   if again < 1 then
   	return ""
   end
   local bad   = { }
   local codes = { }
   for _, v in ipairs( apply ) do
   	local n = tonumber(v)
   	if not n or (n < 32 and n ~= 9 and n ~= 10) then
   		table.insert(bad, tostring(v))
   	else
   		table.insert(codes, math.floor(n))

end

   end 
   if #bad > 0 then
   	if not accept then
   		r = tostring(  mw.html.create( "span" )
                   		:addClass( "error" )
                   		:wikitext( "bad codepoints: " .. table.concat( bad, " " )) )
   	end
   	return r
   end
   if #codes > 0 then
   	r = mw.ustring.char( unpack( codes ) )
   	if again > 1 then
   		r = r:rep(again)
   	end

end

   return r

end -- Text.char()

local function trimAndFormat(args, fmt) local result = {} if type(args) ~= 'table' then args = {args} end for _, v in ipairs(args) do v = mw.text.trim(tostring(v)) if v ~= "" then table.insert(result,fmt and mw.ustring.format(fmt, v) or v) end end return result end

Text.concatParams = function ( args, apply, adapt )

   -- Concat list items into one string
   -- Parameter:
   --     args   -- table (sequence) with numKey=string
   --     apply  -- string (optional); separator (default: "|")
   --     adapt  -- string (optional); format including "%s"
   -- Returns: string
   local collect = { }
   return table.concat(trimAndFormat(args,adapt), apply or "|")

end -- Text.concatParams()


Text.containsCJK = function ( s )

   -- Is any CJK code within?
   -- Parameter:
   --     s  -- string
   -- Returns: true, if CJK detected
   s = s and tostring(s) or ""
   local patternCJK = mw.loadData('Module:Text/data').PatternCJK
   return mw.ustring.find( s, patternCJK ) ~= nil

end -- Text.containsCJK()

Text.removeDelimited = function (s, prefix, suffix) -- Remove all text in s delimited by prefix and suffix (inclusive) -- Arguments: -- s = string to process -- prefix = initial delimiter -- suffix = ending delimiter -- Returns: stripped string s = s and tostring(s) or "" prefix = prefix and tostring(prefix) or "" suffix = suffix and tostring(suffix) or "" local prefixLen = mw.ustring.len(prefix) local suffixLen = mw.ustring.len(suffix) if prefixLen == 0 or suffixLen == 0 then return s end local i = s:find(prefix, 1, true) local r = s local j while i do j = r:find(suffix, i + prefixLen) if j then r = r:sub(1, i - 1)..r:sub(j+suffixLen) else r = r:sub(1, i - 1) end i = r:find(prefix, 1, true) end return r end

Text.getPlain = function ( adjust )

   -- Remove wikisyntax from string, except templates
   -- Parameter:
   --     adjust  -- string
   -- Returns: string
   local r = Text.removeDelimited(adjust,"")
   r = r:gsub( "(</?%l[^>]*>)", "" )
        :gsub( "", "" )
        :gsub( "", "" )
        :gsub( " ", " " )
   return r

end -- Text.getPlain()

Text.isLatinRange = function (s)

   -- Are characters expected to be latin or symbols within latin texts?
   -- Arguments:
   --  s = string to analyze
   -- Returns: true, if valid for latin only
   s = s and tostring(s) or ""  --- ensure input is always string
   local PatternLatin = mw.loadData('Module:Text/data').PatternLatin
   return mw.ustring.match(s, PatternLatin) ~= nil

end -- Text.isLatinRange()


Text.isQuote = function ( s )

   -- Is this character any quotation mark?
   -- Parameter:
   --     s = single character to analyze
   -- Returns: true, if s is quotation mark
   s = s and tostring(s) or ""
   if s == "" then
   	return false
   end
   local SeekQuote = mw.loadData('Module:Text/data').SeekQuote
   return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil

end -- Text.isQuote()


Text.listToText = function ( args, adapt )

   -- Format list items similar to mw.text.listToText()
   -- Parameter:
   --     args   -- table (sequence) with numKey=string
   --     adapt  -- string (optional); format including "%s"
   -- Returns: string
   return mw.text.listToText(trimAndFormat(args, adapt))

end -- Text.listToText()


Text.quote = function ( apply, alien, advance )

   -- Quote text
   -- Parameter:
   --     apply    -- string, with text
   --     alien    -- string, with language code, or nil
   --     advance  -- number, with level 1 or 2, or nil
   -- Returns: quoted string
   apply = apply and tostring(apply) or ""
   local mode, slang
   if type( alien ) == "string" then
       slang = mw.text.trim( alien ):lower()
   else
       slang = mw.title.getCurrentTitle().pageLanguage
       if not slang then
           -- TODO FIXME: Introduction expected 2017-04
           slang = mw.language.getContentLanguage():getCode()
       end
   end
   if advance == 2 then
       mode = 2
   else
       mode = 1
   end
   return fiatQuote( mw.text.trim( apply ), slang, mode )

end -- Text.quote()


Text.quoteUnquoted = function ( apply, alien, advance )

   -- Quote text, if not yet quoted and not empty
   -- Parameter:
   --     apply    -- string, with text
   --     alien    -- string, with language code, or nil
   --     advance  -- number, with level 1 or 2, or nil
   -- Returns: string; possibly quoted
   local r = mw.text.trim( apply and tostring(apply) or "" )
   local s = mw.ustring.sub( r, 1, 1 )
   if s ~= ""  and  not Text.isQuote( s, advance ) then
       s = mw.ustring.sub( r, -1, 1 )
       if not Text.isQuote( s ) then
           r = Text.quote( r, alien, advance )
       end
   end
   return r

end -- Text.quoteUnquoted()


Text.removeDiacritics = function ( adjust )

   -- Remove all diacritics
   -- Parameter:
   --     adjust  -- string
   -- Returns: string; all latin letters should be ASCII
   --                  or basic greek or cyrillic or symbols etc.
   local cleanup, decomposed
   local PatternCombined = mw.loadData('Module:Text/data').PatternCombined
   decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" )
   cleanup    = mw.ustring.gsub( decomposed, PatternCombined, "" )
   return mw.ustring.toNFC( cleanup )

end -- Text.removeDiacritics()


Text.sentenceTerminated = function ( analyse )

   -- Is string terminated by dot, question or exclamation mark?
   --     Quotation, link termination and so on granted
   -- Parameter:
   --     analyse  -- string
   -- Returns: true, if sentence terminated
   local r
   local PatternTerminated = mw.loadData('Module:Text/data').PatternTerminated
   if mw.ustring.find( analyse, PatternTerminated ) then
       r = true
   else
       r = false
   end
   return r

end -- Text.sentenceTerminated()


Text.ucfirstAll = function ( adjust)

   -- Capitalize all words
   -- Arguments:
   --     adjust = string to adjust
   -- Returns: string with all first letters in upper case
   adjust = adjust and tostring(adjust) or ""
   local r = mw.text.decode(adjust,true)
   local i = 1
   local c, j, m
   m = (r ~= adjust)
   r = " "..r
   while i do
       i = mw.ustring.find( r, "%W%l", i )
       if i then
           j = i + 1
           c = mw.ustring.upper( mw.ustring.sub( r, j, j ) )
           r = string.format( "%s%s%s",
                              mw.ustring.sub( r, 1, i ),
                              c,
                              mw.ustring.sub( r, i + 2 ) )
           i = j
       end
   end -- while i
   r = r:sub( 2 )
   if m then
   	r = mw.text.encode(r)
   end
   return r

end -- Text.ucfirstAll()


Text.uprightNonlatin = function ( adjust )

   -- Ensure non-italics for non-latin text parts
   --     One single greek letter might be granted
   -- Precondition:
   --     adjust  -- string
   -- Returns: string with non-latin parts enclosed in 
   local r
   local data = mw.loadData('Module:Text/data')
   local PatternLatin = data.PatternLatin
   local RangesLatin = data.RangesLatin
   local NumLatinRanges = data.NumLatinRanges
   if mw.ustring.match( adjust, PatternLatin ) then
       -- latin only, horizontal dashes, quotes
       r = adjust
   else
       local c
       local j    = false
       local k    = 1
       local m    = false
       local n    = mw.ustring.len( adjust )
       local span = "%s%s%s"
       local flat = function ( a )
                 -- isLatin
                 local range
                 -- NumLatinRanges has to be precomputed because # does not work from loadData
                 for i = 1, NumLatinRanges do
                     range = RangesLatin[ i ]
                     if a >= range[ 1 ]  and  a <= range[ 2 ] then
                         return true
                     end
                 end    -- for i
             end -- flat()
       local focus = function ( a )
                 -- char is not ambivalent
                 local r = ( a > 64 )
                 if r then
                     r = ( a < 8192  or  a > 8212 )
                 else
                     r = ( a == 38  or  a == 60 )    -- '&' '<'
                 end
                 return r
             end -- focus()
       local form = function ( a )
               return string.format( span,
                                     r,
                                     mw.ustring.sub( adjust, k, j - 1 ),
                                     mw.ustring.sub( adjust, j, a ) )
             end -- form()
       r = ""
       for i = 1, n do
           c = mw.ustring.codepoint( adjust, i, i )
           if focus( c ) then
               if flat( c ) then
                   if j then
                       if m then
                           if i == m then
                               -- single greek letter.
                               j = false
                           end
                           m = false
                       end
                       if j then
                           local nx = i - 1
                           local s  = ""
                           for ix = nx, 1, -1 do
                               c = mw.ustring.sub( adjust, ix, ix )
                               if c == " "  or  c == "(" then
                                   nx = nx - 1
                                   s  = c .. s
                               else
                                   break -- for ix
                               end
                           end -- for ix
                           r = form( nx ) .. s
                           j = false
                           k = i
                       end
                   end
               elseif not j then
                   j = i
                   if c >= 880  and  c <= 1023 then
                       -- single greek letter?
                       m = i + 1
                   else
                       m = false
                   end
               end
           elseif m then
               m = m + 1
           end
       end    -- for i
       if j  and  ( not m  or  m < n ) then
           r = form( n )
       else
           r = r .. mw.ustring.sub( adjust, k )
       end
   end
   return r

end -- Text.uprightNonlatin()


Text.test = function ( about )

   local r
   if about == "quote" then
       data = mw.loadData('Module:Text/data')
       r = { }
       r.QuoteLang = data.QuoteLang
       r.QuoteType = data.QuoteType
   end
   return r

end -- Text.test()

-- Non Unicode-aware version of mw.text.split and mw.text.gsplit -- based on phab:diffusion/ELUA/browse/master/includes/Engines/LuaCommon/lualib/mw.text.lua -- These run up to 60 times faster than the Unicode-aware versions Text.split = function ( text, pattern, plain ) local ret = {} for m in Text.gsplit( text, pattern, plain ) do ret[#ret+1] = m end return ret end

Text.gsplit = function ( text, pattern, plain ) local s, l = 1, string.len( text ) return function () if s then local e, n = string.find( text, pattern, s, plain ) local ret if not e then ret = string.sub( text, s ) s = nil elseif n < e then -- Empty separator! ret = string.sub( text, s, e ) if e < l then s = e + 1 else s = nil end else ret = e > s and string.sub( text, s, e - 1 ) or s = n + 1 end return ret end end, nil, nil end

-- Export local p = { }

for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do p[func] = function (frame) return Text[func]( frame.args[ 1 ] or "" ) and "1" or "" end end

for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do p[func] = function (frame) return Text[func]( frame.args[ 1 ] or "" ) end end

function p.char( frame )

   local params = frame:getParent().args
   local story = params[ 1 ]
   local codes, lenient, multiple
   if not story then
       params = frame.args
       story  = params[ 1 ]
   end
   if story then
       local items = mw.text.split( mw.text.trim(story), "%s+" )
       if #items > 0 then
           local j
           lenient  = (yesNo(params.errors) == false)
           codes    = { }
           multiple = tonumber( params[ "*" ] )
           for _, v in ipairs( items ) do
           	j = tonumber((v:sub( 1, 1 ) == "x" and "0" or "") .. v)
               table.insert( codes,  j or v )
           end 
       end
   end
   return Text.char( codes, multiple, lenient )

end

function p.concatParams( frame )

   local args
   local template = frame.args.template
   if type( template ) == "string" then
       template = mw.text.trim( template )
       template = ( template == "1" )
   end
   if template then
       args = frame:getParent().args
   else
       args = frame.args
   end
   return Text.concatParams( args,
                             frame.args.separator,
                             frame.args.format )

end


function p.listToFormat(frame)

   local lists = {}
   local pformat = frame.args["format"]
   local sep = frame.args["sep"] or ";"
   -- Parameter parsen: Listen
   for k, v in pairs(frame.args) do
       local knum = tonumber(k)
       if knum then lists[knum] = v end
   end
   -- Listen splitten
   local maxListLen = 0
   for i = 1, #lists do
       lists[i] = mw.text.split(lists[i], sep)
       if #lists[i] > maxListLen then maxListLen = #lists[i] end
   end
   -- Ergebnisstring generieren
   local result = ""
   local result_line = ""
   for i = 1, maxListLen do
       result_line = pformat
       for j = 1, #lists do
           result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1)
       end
       result = result .. result_line
   end
   return result

end


function p.listToText( frame )

   local args
   local template = frame.args.template
   if type( template ) == "string" then
       template = mw.text.trim( template )
       template = ( template == "1" )
   end
   if template then
       args = frame:getParent().args
   else
       args = frame.args
   end
   return Text.listToText( args, frame.args.format )

end


function p.quote( frame )

   local slang = frame.args[2]
   if type( slang ) == "string" then
       slang = mw.text.trim( slang )
       if slang == "" then
           slang = false
       end
   end
   return Text.quote( frame.args[ 1 ] or "",
                      slang,
                      tonumber( frame.args[3] ) )

end


function p.quoteUnquoted( frame )

   local slang = frame.args[2]
   if type( slang ) == "string" then
       slang = mw.text.trim( slang )
       if slang == "" then
           slang = false
       end
   end
   return Text.quoteUnquoted( frame.args[ 1 ] or "",
                              slang,
                              tonumber( frame.args[3] ) )

end


function p.zip(frame)

   local lists = {}
   local seps = {}
   local defaultsep = frame.args["sep"] or ""
   local innersep = frame.args["isep"] or ""
   local outersep = frame.args["osep"] or ""
   -- Parameter parsen
   for k, v in pairs(frame.args) do
       local knum = tonumber(k)
       if knum then lists[knum] = v else
           if string.sub(k, 1, 3) == "sep" then
               local sepnum = tonumber(string.sub(k, 4))
               if sepnum then seps[sepnum] = v end
           end
       end
   end
   -- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden
   for i = 1, math.max(#seps, #lists) do
       if not seps[i] then seps[i] = defaultsep end
   end
   -- Listen splitten
   local maxListLen = 0
   for i = 1, #lists do
       lists[i] = mw.text.split(lists[i], seps[i])
       if #lists[i] > maxListLen then maxListLen = #lists[i] end
   end
   local result = ""
   for i = 1, maxListLen do
       if i ~= 1 then result = result .. outersep end
       for j = 1, #lists do
           if j ~= 1 then result = result .. innersep end
           result = result .. (lists[j][i] or "")
       end
   end
   return result

end


function p.split(frame) local text = frame.args.text or frame.args[1] or local pattern = frame.args.pattern or frame.args[2] or local plain = yesNo(frame.args.plain or frame.args[3]) local index = tonumber(frame.args.index) or tonumber(frame.args[4]) or 1 local a = Text.split(text, pattern, plain) if index < 0 then index = #a + index + 1 end return a[index] end


function p.failsafe()

   return Text.serial

end


p.Text = function ()

   return Text

end -- p.Text

return p

Original content on DGR Wiki is © DGR Entertainment & Multimedia Ltd.