From 9b16f2d84a2970c017ed8d2142b6ed16cdc46f84 Mon Sep 17 00:00:00 2001 From: Sarjuuk Date: Wed, 16 Aug 2023 03:59:42 +0200 Subject: [PATCH] Localization/WoW strings * generalize WoW UI escape sequence handling * implement use of declinated words from dbc for locale 8 (ruRU) --- includes/kernel.php | 4 +- includes/shared.php | 2 +- includes/utilities.php | 30 +-- localization/lang.class.php | 284 +++++++++++++++++------ localization/locale_ruru.php | 2 +- setup/db_structure.sql | 31 ++- setup/tools/clisetup/setup.func.php | 1 + setup/tools/dbc.class.php | 4 + setup/tools/sqlgen/declinedword.func.php | 29 +++ setup/updates/1692289951_01.sql | 17 ++ 10 files changed, 306 insertions(+), 98 deletions(-) create mode 100644 setup/tools/sqlgen/declinedword.func.php create mode 100644 setup/updates/1692289951_01.sql diff --git a/includes/kernel.php b/includes/kernel.php index 81b9a16d..3add2593 100644 --- a/includes/kernel.php +++ b/includes/kernel.php @@ -262,7 +262,7 @@ if (!CLI) User::useLocale($loc); } - Lang::load(User::$localeString); + Lang::load(User::$localeId); } // parse page-parameters .. sanitize before use! @@ -274,7 +274,7 @@ if (!CLI) Util::$wowheadLink = 'https://www.wowhead.com/wotlk/'.(User::$localeId ? Util::$subDomains[User::$localeId].'/' : '').$str; } else if (!empty($AoWoWconf['aowow'])) - Lang::load('enus'); + Lang::load(LOCALE_EN); $AoWoWconf = null; // empty auths diff --git a/includes/shared.php b/includes/shared.php index 80a6859f..a5fc5594 100644 --- a/includes/shared.php +++ b/includes/shared.php @@ -1,6 +1,6 @@ /iu', '<\1>', $text); $from = array( - '/\|T([\w]+\\\)*([^\.]+)\.blp:\d+\|t/ui', // images (force size to tiny) |T:|t - '/\|c(\w{6})\w{2}([^\|]+)\|r/ui', // color |c|r '/\$g\s*([^:;]*)\s*:\s*([^:;]*)\s*(:?[^:;]*);/ui',// directed gender-reference $g:: '/\$t([^;]+);/ui', // nonsense, that the client apparently ignores - '/\|\d\-?\d?\(([\$\%]\w)\)/ui', // and another modifier for something russian |3-6($r) '/<([^\"=\/>]+\s[^\"=\/>]+)>/ui', // emotes (workaround: at least one whitespace and never " or = between brackets) '/\$(\d+)w/ui', // worldState(?)-ref found on some pageTexts $1234w '/\$c/i', // class-ref '/\$r/i', // race-ref '/\$n/i', // name-ref - '/\$b/i', // line break - '/\|n/i' // what .. the fuck .. another type of line terminator? (only in spanish though) + '/\$b/i' // line break ); $toMD = array( - '[icon name=\2]', - '[span color=#\1>\2[/span]', '<\1/\2>', '', - '\1', '<\1>', '[span class=q0>WorldState #\1[/span]', '<'.Lang::game('class').'>', '<'.Lang::game('race').'>', '<'.Lang::main('name').'>', - '[br]', - '' + '[br]' ); $toHTML = array( - '', - '\2', '<\1/\2>', '', - '\1', '<\1>', 'WorldState #\1', '<'.Lang::game('class').'>', '<'.Lang::game('race').'>', '<'.Lang::main('name').'>', - '
', - '' + '
' ); - return preg_replace($from, $markdown ? $toMD : $toHTML, $text); + $text = preg_replace($from, $markdown ? $toMD : $toHTML, $text); + + return Lang::unescapeUISequences($text, $markdown ? Lang::FMT_MARKUP : Lang::FMT_HTML); } public static function asHex($val) : string @@ -927,7 +917,7 @@ abstract class Util if (strstr($v, $domain)) { User::useLocale($k); - Lang::load(User::$localeString); + Lang::load($k); return; } } @@ -935,7 +925,7 @@ abstract class Util if ($domain == 'www') { User::useLocale(LOCALE_EN); - Lang::load(User::$localeString); + Lang::load(LOCALE_EN); } } diff --git a/localization/lang.class.php b/localization/lang.class.php index fc58cfa5..bcff2e17 100644 --- a/localization/lang.class.php +++ b/localization/lang.class.php @@ -41,6 +41,7 @@ class Lang private static $emote; private static $enchantment; + private static $locId; private static $locales = array( LOCALE_EN => 'English', LOCALE_FR => 'Français', @@ -54,12 +55,14 @@ class Lang public const FMT_HTML = 1; public const FMT_MARKUP = 2; - public static function load(string $loc) : void + public static function load(int $locale) : void { - if (!file_exists('localization/locale_'.$loc.'.php')) - die('File for localization '.strToUpper($loc).' not found.'); + if (!isset(Util::$localeStrings[$locale])) + die($locale.' is not a known locale!'); + if (!file_exists('localization/locale_'.Util::$localeStrings[$locale].'.php')) + die('File for locale '.$locale.' not found.'); else - require 'localization/locale_'.$loc.'.php'; + require 'localization/locale_'.Util::$localeStrings[$locale].'.php'; foreach ($lang as $k => $v) self::$$k = $v; @@ -68,6 +71,8 @@ class Lang self::$item['cat'][2] = [self::$item['cat'][2], self::$spell['weaponSubClass']]; self::$item['cat'][2][1][14] .= ' ('.self::$item['cat'][2][0].')'; self::$main['moreTitles']['privilege'] = self::$privileges['_privileges']; + + self::$locId = $locale; } public static function __callStatic(string $prop, array $args) // : ?string|array @@ -591,88 +596,221 @@ class Lang return $var; } + if (!$var) // may be null or empty. Handled differently depending on context + return $var; + if ($args) $var = vsprintf($var, $args); - // line break - // |n - $var = str_replace('|n', '
', $var); + return self::unescapeUISequences($var); + } - // color - // |c|r - $var = preg_replace('/\|cff([a-f0-9]{6})(.+?)\|r/i', '$2', $var); + /* Quoted from WoWWiki - UI Escape Sequences (https://wowwiki-archive.fandom.com/wiki/UI_escape_sequences) + * number |1singular;plural; + Will choose a word depending on whether the digit preceding it is 0/1 or not (i.e. 1,11,21 return the first string, as will 0,10,40). Note that unlike |4 singular and plural forms are separated by semi-colon. - // icon - // |T:0:0:0:-1|t - not used, skip if found - $var = preg_replace('/\|T[^\|]+\|t/', '', $var); + * |2text + Before vowels outputs d' (with apostrophe) and removes any leading spaces from text, otherwise outputs de (with trailing space) - // hyperlink - // |H|h|h - not used, truncate structure if found - $var = preg_replace('/\|H[^\|]+\|h([^\|]+)\|h/', '$1', $var); + * |3-formid(text) + Displays text declined to the specified form (index ranges from 1 to GetNumDeclensionSets()). - // french preposition : de - // |2 - $var = preg_replace_callback('/\|2\s(\w)/i', function ($m) { - if (in_array(strtolower($m[1]), ['a', 'e', 'h', 'i', 'o', 'u'])) - return "d'".$m[1]; - else - return 'de '.$m[1]; - }, $var); + * number |4singular:plural; -or- number |4singular:plural1:plural2; + Will choose a form based on the number preceding it. More than two forms (separated by colons) may be required by locale 8 (ruRU). + **/ - // russian word cunjugation thingy - // |3-() - $var = preg_replace_callback('/\|3-(\d)\(([^\)]+)\)/i', function ($m) { - switch ($m[0]) + public static function unescapeUISequences(string $var, int $fmt = -1) : string + { + // line break |n + $var = preg_replace_callback('/\|n/i', function ($m) use ($fmt) { - case 1: // seen cases - case 2: - case 3: - case 4: - case 5: - case 6: - case 7: - default: // passthrough .. unk case - return $m[1]; - } - - }, $var); - - // numeric switch - // |4:[:]; - $var = preg_replace_callback('/([\d\.\,]+)([^\d]*)\|4([^:]*):([^;]*);/i', function ($m) { - $plurals = explode(':', $m[4]); - $result = ''; - - if (count($plurals) == 2) // special case: ruRU - { - switch (substr($m[1], -1)) // check last digit of number + switch ($fmt) { - case 1: - // but not 11 (teen number) - if (!in_array($m[1], [11])) - { - $result = $m[3]; - break; - } - case 2: - case 3: - case 4: - // but not 12, 13, 14 (teen number) [11 is passthrough] - if (!in_array($m[1], [11, 12, 13, 14])) - { - $result = $plurals[0]; - break; - } - break; + case -1: // default Lang::vspf case + case self::FMT_HTML: + return '
'; + case self::FMT_MARKUP: + return '[br]'; + case self::FMT_RAW: default: - $result = $plurals[1]; + return ''; } - } - else - $result = ($m[1] == 1 ? $m[3] : $plurals[0]); + } , $var); - return $m[1].$m[2].$result; - }, $var); + // color |c|r + $var = preg_replace_callback('/\|c([[:xdigit:]]{2})([[:xdigit:]]{6})(.+?)\|r/i', function ($m) use ($fmt) + { + [$_, $a, $rgb, $text] = $m; + + switch ($fmt) + { + case -1: // default Lang::vspf case + case self::FMT_HTML: + return sprintf('%3s', $rgb, $a, $text); + case self::FMT_MARKUP: + return sprintf('[span color=#%1s]%3s[/span]', $rgb, $text); // doesn't support alpha + case self::FMT_RAW: + default: + return $text; + } + }, $var); + + // icon |T:0:0:0:-1|t + $var = preg_replace_callback('/\|T([\w]+\\\)*([^\.]+)\.[bB][lL][pP]:([^\|]+)\|t/', function ($m) use ($fmt) + { + /* iconParam - size1, size2, xoffset, yoffset + size1 == 0; size2 omitted: Width = Height = TextHeight (always square!) + size1 > 0; size2 omitted: Width = Height = size1 (always square!) + size1 == 0; size2 == 0 : Width = Height = TextHeight (always square!) + size1 > 0; size2 == 0 : Width = TextHeight; Height = size1 (size1 is height!!!) + size1 == 0; size2 > 0 : Width = size2 * TextHeight; Height = TextHeight (size2 is an aspect ratio and defines width!!!) + size1 > 0; size2 > 0 : Width = size1; Height = size2 + */ + + [$_, $iconPath, $iconName, $iconParam] = $m; + + switch ($fmt) + { + case self::FMT_HTML: + return ''; + case self::FMT_MARKUP: + return '[icon name='.Util::lower($iconName).']'; + case self::FMT_RAW: + default: + return ''; + } + }, $var); + + // hyperlink |H|h|h + $var = preg_replace_callback('/\|H([^:]+):([^\|]+)\|h([^\|]+)\|h/', function ($m) use ($fmt) + { + /* type Params + |Hchannel channelName, channelname == CHANNEL ? channelNr : null + |Hachievement AchievementID, PlayerGUID, isComplete, Month, Day, Year, criteriaMask1, criteriaMask2, criteriaMask3, criteriaMask4 - 32bit masks of Achievement_criteria.dbc/UIOrder only for achievements that display a todo list + |Hquest QuestID, QuestLevel + |Hitem itemId enchantId gemId1 gemId2 gemId3 gemId4 suffixId uniqueId linkLevel + |Henchant SpellID (from craftwindow) + |Htalent TalentID, TalentRank + |Hspell SpellID, PlayerLevel? + |Htrade SpellID, curSkill, maxSkill, PlayerGUID, base64_encode(known recipes bitmask) + |Hplayer Name + |Hunit GUID ? - combatlog + |Hicon ? "source"|"dest" - combatlog + |Haction ? - combatlog + */ + + [$_, $linkType, $linkVars, $text] = $m; + + $linkVars = explode(':', $linkVars); + + $spfVars = ['', $linkVars[0], $text]; + + switch ($linkType) + { + case 'trade': + case 'enchant': + $linkType = 'spell'; + case 'achievement': // markdown COULD implement completed status + case 'quest': + case 'item': // markdown COULD implement enchantments/gems + case 'spell': + $spfVars[0] = $linkType; + break; + case 'talent': + if ($spell = DB::Aowow()->selectCell('SELECT `spell` FROM ?_talents WHERE `id` = ?d AND `rank` = ?d', $linkVars[0], $linkVars[1])) + { + $spfVars[0] = 'spell'; + $spfVars[1] = $spell; + break; + } + default: + return ''; + } + + switch ($fmt) + { + case self::FMT_HTML: + return sprintf('%s', $spfVars); + case self::FMT_MARKUP: + return sprintf('[%s=%d]', $spfVars); + case self::FMT_RAW: + default: + return sprintf('(%s #%d) %s', $spfVars); + } + }, $var); + + // |1 - digit singular/plural |1; + $var = preg_replace_callback('/(\d+)\s*\|1([^;]+);([^;]+);/i', function ($m) + { + [$_, $num, $singular, $plural] = $m; + + switch ($num[-1]) + { + case 0: + case 1: + return $num . ' ' . $singular; + default: + return $num . ' ' . $plural; + } + }, $var); + + // |2 - frFR preposition: de |2 + $var = preg_replace_callback('/\|2\s?(\w)/i', function ($m) + { + [$_, $word] = $m; + + switch (strtolower($word[1])) + { + case 'h': + if (self::$locId != LOCALE_FR) + return 'de ' . $word; + case 'a': + case 'e': + case 'i': + case 'o': + case 'u': + return "d'" . $word; + default: + return 'de ' . $word; + } + }, $var); + + // |3 - ruRU declinations |3-() + $var = preg_replace_callback('/\|3-(\d)\(([^\)]+)\)/iu', function ($m) + { + [$_, $caseIdx, $word] = $m; + + if ($caseIdx > 11 || $caseIdx < 1) // max caseIdx seen in DeclinedWordCases.dbc + return $word; + + if (preg_match('/\P{Cyrillic}/iu', $word)) // not in cyrillic script + return $word; + + if ($declWord = DB::Aowow()->selectCell('SELECT dwc.word FROM ?_declinedwordcases dwc JOIN ?_declinedword dc ON dwc.wordId = dc.id WHERE dwc.caseIdx = ?d AND dc.word = ?', $caseIdx, $word)) + return $declWord; + + return $word; + }, $var); + + // |4 - numeric switch |4:[:]; + $var = preg_replace_callback('/([\d\.\,]+)([^\d]*)\|4([^:]*):([^:;]+)(?::([^;]+))?;/i', function ($m) + { + [$_, $num, $pad, $singular, $plural1, $plural2] = $m; + + if (self::$locId != LOCALE_RU || !$plural2) + return $num . $pad . ($num == 1 ? $singular : $plural1); + + // singular - ends in 1, but not teen number + if ($num[-1] == 1 && $num != 11) + return $num . $pad . $singular; + + // genitive singular - ends in 2, 3, 4, but not teen number + if (($num[-1] == 2 && $num != 12) || ($num[-1] == 3 && $num != 13) || ($num[-1] == 4 && $num != 14)) + return $num . $pad . $plural1; + + // genitive plural - everything else + return $num . $pad . $plural2; + }, $var); return $var; } diff --git a/localization/locale_ruru.php b/localization/locale_ruru.php index 766fff4f..53d67958 100644 --- a/localization/locale_ruru.php +++ b/localization/locale_ruru.php @@ -1877,7 +1877,7 @@ $lang = array( 'gemColors' => array( // *_GEM "Особый", "Красный", "Желтый", "Синий" ), - 'gemConditions' => array( // ENCHANT_CONDITION_* so whats that pipe-code..? + 'gemConditions' => array( // ENCHANT_CONDITION_* 2 => "меньше, чем %d |4камень:камня:камней; |3-1(%s) цвета", 3 => "больше |3-7(%s), чем |3-7(%s) камней", 5 => "хотя бы %d |4камень:камня:камней; |3-1(%s) цвета" diff --git a/setup/db_structure.sql b/setup/db_structure.sql index 7c3e5bd6..f67cb379 100644 --- a/setup/db_structure.sql +++ b/setup/db_structure.sql @@ -667,6 +667,35 @@ CREATE TABLE `aowow_dbversion` ( ) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; /*!40101 SET character_set_client = @saved_cs_client */; +-- +-- Table structure for table `aowow_declinedword` +-- + +DROP TABLE IF EXISTS `aowow_declinedword`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `aowow_declinedword` ( + `id` SMALLINT(5) UNSIGNED NOT NULL, + `word` VARCHAR(127) COLLATE utf8mb4_unicode_ci DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Table structure for table `aowow_declinedwordcases` +-- + +DROP TABLE IF EXISTS `aowow_declinedwordcases`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `aowow_declinedwordcases` ( + `wordId` SMALLINT(5) UNSIGNED NOT NULL, + `caseIdx` TINYINT(1) UNSIGNED NOT NULL, + `word` VARCHAR(131) COLLATE utf8mb4_unicode_ci DEFAULT NULL, + PRIMARY KEY (`wordId`, `caseIdx`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; +/*!40101 SET character_set_client = @saved_cs_client */; + -- -- Table structure for table `aowow_emotes` -- @@ -3203,7 +3232,7 @@ UNLOCK TABLES; LOCK TABLES `aowow_dbversion` WRITE; /*!40000 ALTER TABLE `aowow_dbversion` DISABLE KEYS */; -INSERT INTO `aowow_dbversion` VALUES (1691940878,0,NULL,NULL); +INSERT INTO `aowow_dbversion` VALUES (1692289952,0,NULL,NULL); /*!40000 ALTER TABLE `aowow_dbversion` ENABLE KEYS */; UNLOCK TABLES; diff --git a/setup/tools/clisetup/setup.func.php b/setup/tools/clisetup/setup.func.php index dfd4a401..3d78ecda 100644 --- a/setup/tools/clisetup/setup.func.php +++ b/setup/tools/clisetup/setup.func.php @@ -77,6 +77,7 @@ function setup() : void ['SqlGen::generate', 'item_stats', null, null, null], ['SqlGen::generate', 'source', null, null, null], ['SqlGen::generate', 'sounds', null, null, null], + ['SqlGen::generate', 'declinedwords', null, null, null], ['FileGen::generate', 'soundfiles', null, null, null], ['FileGen::generate', 'searchplugin', null, null, null], ['FileGen::generate', 'power', null, null, null], diff --git a/setup/tools/dbc.class.php b/setup/tools/dbc.class.php index 34318789..bfe0e28d 100644 --- a/setup/tools/dbc.class.php +++ b/setup/tools/dbc.class.php @@ -58,6 +58,8 @@ class DBC 'creaturemodeldata' => 'nxxxxxxxxxxxxixxxxxxxxxxxxxx', 'creaturesounddata' => 'niiiixiiiiiiiiixxxxixxxxixiiiiixxiiiix', 'currencytypes' => 'niix', + 'declinedword' => 'ns', + 'declinedwordcases' => 'niis', 'dungeonmap' => 'niiffffi', 'durabilitycosts' => 'niiiiiiiiixiiiiiiiiiiixiiiixix', 'durabilityquality' => 'nf', @@ -163,6 +165,8 @@ class DBC 'creaturemodeldata' => 'id,creatureSoundId', 'creaturesounddata' => 'id,exertion,exertionCritical,injury,injuryCritical,death,stun,stand,footstepTerrainId,aggro,wingFlap,wingGlide,alert,fidget,customAttack,loop,jumpStart,jumpEnd,petAttack,petOrder,petDismiss,birth,spellcast,submerge,submerged', 'currencytypes' => 'id,itemId,category', + 'declinedword' => 'id,word', + 'declinedwordcases' => 'id,wordId,caseIdx,word', 'dungeonmap' => 'id,mapId,floor,minY,maxY,minX,maxX,areaId', 'durabilitycosts' => 'id,w0,w1,w2,w3,w4,w5,w6,w7,w8,w10,w11,w12,w13,w14,w15,w16,w17,w18,w19,w20,a1,a2,a3,a4,a6', 'durabilityquality' => 'id,mod', diff --git a/setup/tools/sqlgen/declinedword.func.php b/setup/tools/sqlgen/declinedword.func.php new file mode 100644 index 00000000..a5992c6d --- /dev/null +++ b/setup/tools/sqlgen/declinedword.func.php @@ -0,0 +1,29 @@ +query('TRUNCATE ?_declinedword'); + DB::Aowow()->query('INSERT INTO ?_declinedword SELECT * FROM dbc_declinedword'); + + CLI::write('SqlGen::generate() - copying declinedwordcases.dbc into aowow_declinedwordcases'); + DB::Aowow()->query('TRUNCATE ?_declinedwordcases'); + DB::Aowow()->query('INSERT INTO ?_declinedwordcases SELECT `wordId`, `caseIdx`, `word` FROM dbc_declinedwordcases'); + + return true; + } +}); + +?> diff --git a/setup/updates/1692289951_01.sql b/setup/updates/1692289951_01.sql new file mode 100644 index 00000000..3528f2b5 --- /dev/null +++ b/setup/updates/1692289951_01.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS `aowow_declinedword`; +DROP TABLE IF EXISTS `aowow_declinedwordcases`; + +CREATE TABLE `aowow_declinedword` ( + `id` SMALLINT(5) UNSIGNED NOT NULL, + `word` VARCHAR(127) COLLATE utf8mb4_unicode_ci DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +CREATE TABLE `aowow_declinedwordcases` ( + `wordId` SMALLINT(5) UNSIGNED NOT NULL, + `caseIdx` TINYINT(1) UNSIGNED NOT NULL, + `word` VARCHAR(131) COLLATE utf8mb4_unicode_ci DEFAULT NULL, + PRIMARY KEY (`wordId`, `caseIdx`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +UPDATE `aowow_dbversion` SET `sql` = CONCAT(IFNULL(`sql`, ''), ' declinedwords');