Module:grc-translit/sandbox
Appearance
- The following documentation is located at Module:grc-translit/sandbox/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox of (diff)
2 of 35 tests failed. (refresh)
Text | Expected | Actual | |
---|---|---|---|
ΛΌΓΟΣ | LÓGOS | LÓGOS | |
ΟἿΑΙ | HOÎAI | HoîAi | |
ῬΉΤΩΡ | RHḖTŌR | RhḖTŌR | |
λόγος | lógos | lógos | |
σφίγξ | sphínx | sphínx | |
ϝάναξ | wánax | wánax | |
οἷαι | hoîai | hoîai | |
ταῦρος | taûros | taûros | |
νηῦς | nēûs | nēûs | |
σῦς | sûs | sûs | |
ὗς | hûs | hûs | |
γυῖον | guîon | guîon | |
ἀναῡ̈τέω | anaṻtéō | anaṻtéō | |
δαΐφρων | daḯphrōn | daḯphrōn | |
τῶν | tôn | tôn | |
τοὶ | toì | toì | |
τῷ | tôi | tôi | |
τούτῳ | toútōi | toútōi | |
σοφίᾳ | sophíāi | sophíāi | |
μᾱ̆νός | mānós | mānós | |
ὁ | ho | ho | |
οἱ | hoi | hoi | |
εὕρισκε | heúriske | heúriske | |
ὑϊκός | huïkós | huïkós | |
πυρρός | purrhós | purrhós | |
ῥέω | rhéō | rhéō | |
σάἁμον | sáhamon | sáhamon | |
Ὀδυσσεύς | Odusseús | Odusseús | |
Εἵλως | Heílōs | Heílōs | |
ᾍδης | Hā́idēs | Hā́idēs | |
ἡ Ἑλήνη | hē Helḗnē | hē Helḗnē | |
𐠠𐠒𐠯𐠗 | pi-lo-ti-mo | pi-lo-ti-mo | |
ἔχεις μοι εἰπεῖν, ὦ Σώκρατες, ἆρα διδακτὸν ἡ ἀρετή; | ékheis moi eipeîn, ô Sṓkrates, âra didaktòn hē aretḗ? | ékheis moi eipeîn, ô Sṓkrates, âra didaktòn hē aretḗ? | |
τί τηνικάδε ἀφῖξαι, ὦ Κρίτων; ἢ οὐ πρῲ ἔτι ἐστίν; | tí tēnikáde aphîxai, ô Krítōn? ḕ ou prṑi éti estín? | tí tēnikáde aphîxai, ô Krítōn? ḕ ou prṑi éti estín? | |
τούτων φωνήεντα μέν ἐστιν ἑπτά· α ε η ι ο υ ω. | toútōn phōnḗenta mén estin heptá; a e ē i o u ō. | toútōn phōnḗenta mén estin heptá; a e ē i o u ō. |
local export = {}
local m_data = require('Module:grc-utilities/data')
local tokenize = require('Module:grc-utilities').tokenize
local ufind = mw.ustring.find
local ugsub = mw.ustring.gsub
local U = mw.ustring.char
local ulower = mw.ustring.lower
local uupper = mw.ustring.upper
-- Can't do range with null byte apparently.
local UTF8char = '[\1-\127\194-\244][\128-\191]*'
-- Diacritics
local diacritics = m_data.named
-- Greek
local acute = diacritics.acute
local grave = diacritics.grave
local circumflex = diacritics.circum
local diaeresis = diacritics.diaeresis
local smooth = diacritics.smooth
local rough = diacritics.rough
local macron = diacritics.macron
local breve = diacritics.breve
local subscript = diacritics.subscript
-- Latin
local hat = diacritics.Latin_circum
local macron_diaeresis = macron .. diaeresis .. "?" .. hat
-- equivalent to '[αΑ]'
local alpha = '\206[\177\145]'
local a_subscript = '^' .. alpha .. '.*' .. subscript .. '$'
local is_velar = {
['κ'] = true,
['γ'] = true,
['χ'] = true,
['ξ'] = true,
}
local tt = {
-- Vowels
["α"] = "a",
["ε"] = "e",
["η"] = "e" .. macron,
["ι"] = "i",
["ο"] = "o",
["υ"] = "u",
["ω"] = "o" .. macron,
-- Consonants
["β"] = "b",
["γ"] = "g",
["δ"] = "d",
["ζ"] = "z",
["θ"] = "th",
["κ"] = "k",
["λ"] = "l",
["μ"] = "m",
["ν"] = "n",
["ξ"] = "x",
["π"] = "p",
["ρ"] = "r",
["σ"] = "s",
["ς"] = "s",
["τ"] = "t",
["φ"] = "ph",
["χ"] = "kh",
["ψ"] = "ps",
-- Archaic letters
["ϝ"] = "w",
["ϻ"] = "ś",
["ϙ"] = "q",
["ϡ"] = "š",
["ͷ"] = "v",
-- Diacritics
-- unchanged: macron, diaeresis, grave, acute
[breve] = '',
[smooth] = '',
[rough] = '',
[circumflex] = hat,
[subscript] = 'i',
}
function export.tr(text, lang, sc)
-- If the script is given as Cprt, then forward the transliteration to that module.
-- This should not be necessary, as [[Module:translit-redirect]] redirects
-- to this module only if script is polytonic.
if sc == "Cprt" then
-- [[Special:WhatLinksHere/Wiktionary:Tracking/grc-translit/Cprt]]
require('Module:debug').track('grc-translit/Cprt')
return require('Module:Cprt-translit').tr(text, lang, sc)
end
if text == '῾' then
return 'h'
end
--[[
Replace semicolon or Greek question mark with regular question mark,
except after an ASCII alphanumeric character (to avoid converting
semicolons in HTML entities).
]]
text = ugsub(text, "([^A-Za-z0-9])[;" .. U(0x37E) .. "]", "%1?")
-- Handle the middle dot. It is equivalent to semicolon or colon, but semicolon is probably more common.
text = text:gsub("·", ";")
local tokens = tokenize(text)
--now read the tokens
local output = {}
for i, token in pairs(tokens) do
-- Convert token to lowercase and substitute each character
-- for its transliteration
local translit = ulower(token):gsub(UTF8char, tt)
if token == 'γ' and is_velar[tokens[i + 1]] then
-- γ before a velar should be <n>
translit = 'n'
elseif token == 'ρ' and tokens[i - 1] == 'ρ' then
-- ρ after ρ should be <rh>
translit = 'rh'
elseif token:find(a_subscript) then
-- add macron to ᾳ
translit = translit:gsub('[Aa]', '%0' .. macron)
end
if token:find(rough) then
if ufind(token, '^[Ρρ]') then
translit = translit .. 'h'
else -- vowel
translit = 'h' .. translit
end
end
-- Remove macron from a vowel that has a circumflex.
if ufind(translit, macron_diaeresis) then
translit = translit:gsub(macron, '')
end
-- Capitalize first character of transliteration.
if token ~= ulower(token) then
translit = translit:gsub("^" .. UTF8char, uupper)
end
table.insert(output, translit)
end
output = table.concat(output)
return output
end
return export