/* * This file is part of OpenTTD. * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2. * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see . */ /** @file strgen_base.cpp Tool to create computer readable (stand-alone) translation files. */ #include "../stdafx.h" #include "../core/alloc_func.hpp" #include "../core/endian_func.hpp" #include "../core/mem_func.hpp" #include "../error_func.h" #include "../string_func.h" #include "../table/control_codes.h" #include "strgen.h" #include #include "../table/strgen_tables.h" #include "../safeguards.h" /* Compiles a list of strings into a compiled string list */ static bool _translated; ///< Whether the current language is not the master language static bool _translation; ///< Is the current file actually a translation or not const char *_file = "(unknown file)"; ///< The filename of the input, so we can refer to it in errors/warnings int _cur_line; ///< The current line we're parsing in the input file int _errors, _warnings, _show_todo; LanguagePackHeader _lang; ///< Header information about a language. static const ptrdiff_t MAX_COMMAND_PARAM_SIZE = 100; ///< Maximum size of every command block, not counting the name of the command itself static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei); /** * Create a new case. * @param caseidx The index of the case. * @param string The translation of the case. */ Case::Case(int caseidx, const std::string &string) : caseidx(caseidx), string(string) { } /** * Create a new string. * @param name The name of the string. * @param english The english "translation" of the string. * @param index The index in the string table. * @param line The line this string was found on. */ LangString::LangString(const std::string &name, const std::string &english, size_t index, int line) : name(name), english(english), index(index), line(line) { } /** Free all data related to the translation. */ void LangString::FreeTranslation() { this->translated.clear(); this->translated_cases.clear(); } /** * Create a new string data container. * @param tabs The maximum number of strings. */ StringData::StringData(size_t tabs) : tabs(tabs), max_strings(tabs * TAB_SIZE) { this->strings.resize(max_strings); this->next_string_id = 0; } /** Free all data related to the translation. */ void StringData::FreeTranslation() { for (size_t i = 0; i < this->max_strings; i++) { LangString *ls = this->strings[i].get(); if (ls != nullptr) ls->FreeTranslation(); } } /** * Add a newly created LangString. * @param s The name of the string. * @param ls The string to add. */ void StringData::Add(std::unique_ptr ls) { this->name_to_string[ls->name] = ls.get(); this->strings[ls->index].swap(ls); } /** * Find a LangString based on the string name. * @param s The string name to search on. * @return The LangString or nullptr if it is not known. */ LangString *StringData::Find(const std::string_view s) { auto it = this->name_to_string.find(s); if (it == this->name_to_string.end()) return nullptr; return it->second; } /** * Create a compound hash. * @param hash The hash to add the string hash to. * @param s The string hash. * @return The new hash. */ uint StringData::VersionHashStr(uint hash, const char *s) const { for (; *s != '\0'; s++) { hash = std::rotl(hash, 3) ^ *s; hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1); } return hash; } /** * Make a hash of the file to get a unique "version number" * @return The version number. */ uint StringData::Version() const { uint hash = 0; for (size_t i = 0; i < this->max_strings; i++) { const LangString *ls = this->strings[i].get(); if (ls != nullptr) { const CmdStruct *cs; const char *s; char buf[MAX_COMMAND_PARAM_SIZE]; int argno; int casei; s = ls->name.c_str(); hash ^= i * 0x717239; hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1); hash = this->VersionHashStr(hash, s + 1); s = ls->english.c_str(); while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != nullptr) { if (cs->flags & C_DONTCOUNT) continue; hash ^= (cs - _cmd_structs) * 0x1234567; hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1); } } } return hash; } /** * Count the number of tab elements that are in use. * @param tab The tab to count the elements of. */ uint StringData::CountInUse(uint tab) const { int i; for (i = TAB_SIZE; --i >= 0;) if (this->strings[(tab * TAB_SIZE) + i] != nullptr) break; return i + 1; } static const char *_cur_ident; /* Used when generating some advanced commands. */ static ParsedCommandStruct _cur_pcs; static int _cur_argidx; /** The buffer for writing a single string. */ struct Buffer : std::vector { /** * Convenience method for adding a byte. * @param value The value to add. */ void AppendByte(byte value) { this->push_back(value); } /** * Add an Unicode character encoded in UTF-8 to the buffer. * @param value The character to add. */ void AppendUtf8(uint32_t value) { if (value < 0x80) { this->push_back(value); } else if (value < 0x800) { this->push_back(0xC0 + GB(value, 6, 5)); this->push_back(0x80 + GB(value, 0, 6)); } else if (value < 0x10000) { this->push_back(0xE0 + GB(value, 12, 4)); this->push_back(0x80 + GB(value, 6, 6)); this->push_back(0x80 + GB(value, 0, 6)); } else if (value < 0x110000) { this->push_back(0xF0 + GB(value, 18, 3)); this->push_back(0x80 + GB(value, 12, 6)); this->push_back(0x80 + GB(value, 6, 6)); this->push_back(0x80 + GB(value, 0, 6)); } else { StrgenWarning("Invalid unicode value U+0x{:X}", value); } } }; size_t Utf8Validate(const char *s) { uint32_t c; if (!HasBit(s[0], 7)) { /* 1 byte */ return 1; } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) { /* 2 bytes */ c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6); if (c >= 0x80) return 2; } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) { /* 3 bytes */ c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6); if (c >= 0x800) return 3; } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) { /* 4 bytes */ c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6); if (c >= 0x10000 && c <= 0x10FFFF) return 4; } return 0; } void EmitSingleChar(Buffer *buffer, char *buf, int value) { if (*buf != '\0') StrgenWarning("Ignoring trailing letters in command"); buffer->AppendUtf8(value); } /* The plural specifier looks like * {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM */ /* This is encoded like * CommandByte {Length of each string} {each string} */ bool ParseRelNum(char **buf, int *value, int *offset) { const char *s = *buf; char *end; bool rel = false; while (*s == ' ' || *s == '\t') s++; if (*s == '+') { rel = true; s++; } int v = std::strtol(s, &end, 0); if (end == s) return false; if (rel || v < 0) { *value += v; } else { *value = v; } if (offset != nullptr && *end == ':') { /* Take the Nth within */ s = end + 1; *offset = std::strtol(s, &end, 0); if (end == s) return false; } *buf = end; return true; } /* Parse out the next word, or nullptr */ char *ParseWord(char **buf) { char *s = *buf, *r; while (*s == ' ' || *s == '\t') s++; if (*s == '\0') return nullptr; if (*s == '"') { r = ++s; /* parse until next " or NUL */ for (;;) { if (*s == '\0') break; if (*s == '"') { *s++ = '\0'; break; } s++; } } else { /* proceed until whitespace or NUL */ r = s; for (;;) { if (*s == '\0') break; if (*s == ' ' || *s == '\t') { *s++ = '\0'; break; } s++; } } *buf = s; return r; } /* Forward declaration */ static int TranslateArgumentIdx(int arg, int offset = 0); static void EmitWordList(Buffer *buffer, const std::vector &words, uint nw) { buffer->AppendByte(nw); for (uint i = 0; i < nw; i++) buffer->AppendByte((byte)strlen(words[i]) + 1); for (uint i = 0; i < nw; i++) { for (uint j = 0; words[i][j] != '\0'; j++) buffer->AppendByte(words[i][j]); buffer->AppendByte(0); } } void EmitPlural(Buffer *buffer, char *buf, int) { int argidx = _cur_argidx; int offset = -1; int expected = _plural_forms[_lang.plural_form].plural_count; std::vector words(std::max(expected, MAX_PLURALS), nullptr); int nw = 0; /* Parse out the number, if one exists. Otherwise default to prev arg. */ if (!ParseRelNum(&buf, &argidx, &offset)) argidx--; const CmdStruct *cmd = _cur_pcs.consuming_commands[argidx]; if (offset == -1) { /* Use default offset */ if (cmd == nullptr || cmd->default_plural_offset < 0) { StrgenFatal("Command '{}' has no (default) plural position", cmd == nullptr ? "" : cmd->cmd); } offset = cmd->default_plural_offset; } /* Parse each string */ for (nw = 0; nw < MAX_PLURALS; nw++) { words[nw] = ParseWord(&buf); if (words[nw] == nullptr) break; } if (nw == 0) { StrgenFatal("{}: No plural words", _cur_ident); } if (expected != nw) { if (_translated) { StrgenFatal("{}: Invalid number of plural forms. Expecting {}, found {}.", _cur_ident, expected, nw); } else { if ((_show_todo & 2) != 0) StrgenWarning("'{}' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident); if (nw > expected) { nw = expected; } else { for (; nw < expected; nw++) { words[nw] = words[nw - 1]; } } } } buffer->AppendUtf8(SCC_PLURAL_LIST); buffer->AppendByte(_lang.plural_form); buffer->AppendByte(TranslateArgumentIdx(argidx, offset)); EmitWordList(buffer, words, nw); } void EmitGender(Buffer *buffer, char *buf, int) { int argidx = _cur_argidx; int offset = 0; uint nw; if (buf[0] == '=') { buf++; /* This is a {G=DER} command */ nw = _lang.GetGenderIndex(buf); if (nw >= MAX_NUM_GENDERS) StrgenFatal("G argument '{}' invalid", buf); /* now nw contains the gender index */ buffer->AppendUtf8(SCC_GENDER_INDEX); buffer->AppendByte(nw); } else { std::vector words(MAX_NUM_GENDERS, nullptr); /* This is a {G 0 foo bar two} command. * If no relative number exists, default to +0 */ ParseRelNum(&buf, &argidx, &offset); const CmdStruct *cmd = _cur_pcs.consuming_commands[argidx]; if (cmd == nullptr || (cmd->flags & C_GENDER) == 0) { StrgenFatal("Command '{}' can't have a gender", cmd == nullptr ? "" : cmd->cmd); } for (nw = 0; nw < MAX_NUM_GENDERS; nw++) { words[nw] = ParseWord(&buf); if (words[nw] == nullptr) break; } if (nw != _lang.num_genders) StrgenFatal("Bad # of arguments for gender command"); assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX)); buffer->AppendUtf8(SCC_GENDER_LIST); buffer->AppendByte(TranslateArgumentIdx(argidx, offset)); EmitWordList(buffer, words, nw); } } static const CmdStruct *FindCmd(const char *s, int len) { for (const CmdStruct *cs = _cmd_structs; cs != endof(_cmd_structs); cs++) { if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs; } return nullptr; } static uint ResolveCaseName(const char *str, size_t len) { /* First get a clean copy of only the case name, then resolve it. */ char case_str[CASE_GENDER_LEN]; len = std::min(lengthof(case_str) - 1, len); memcpy(case_str, str, len); case_str[len] = '\0'; uint8_t case_idx = _lang.GetCaseIndex(case_str); if (case_idx >= MAX_NUM_CASES) StrgenFatal("Invalid case-name '{}'", case_str); return case_idx + 1; } /* returns nullptr on eof * else returns command struct */ static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei) { const char *s = *str, *start; char c; *argno = -1; *casei = -1; /* Scan to the next command, exit if there's no next command. */ for (; *s != '{'; s++) { if (*s == '\0') return nullptr; } s++; // Skip past the { if (*s >= '0' && *s <= '9') { char *end; *argno = std::strtoul(s, &end, 0); if (*end != ':') StrgenFatal("missing arg #"); s = end + 1; } /* parse command name */ start = s; do { c = *s++; } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0); const CmdStruct *cmd = FindCmd(start, s - start - 1); if (cmd == nullptr) { std::string command(start, s - start - 1); StrgenError("Undefined command '{}'", command); return nullptr; } if (c == '.') { const char *casep = s; if (!(cmd->flags & C_CASE)) { StrgenFatal("Command '{}' can't have a case", cmd->cmd); } do { c = *s++; } while (c != '}' && c != ' ' && c != '\0'); *casei = ResolveCaseName(casep, s - casep - 1); } if (c == '\0') { StrgenError("Missing }} from command '{}'", start); return nullptr; } if (c != '}') { if (c == '=') s--; /* copy params */ start = s; for (;;) { c = *s++; if (c == '}') break; if (c == '\0') { StrgenError("Missing }} from command '{}'", start); return nullptr; } if (s - start == MAX_COMMAND_PARAM_SIZE) FatalError("param command too long"); *param++ = c; } } *param = '\0'; *str = s; return cmd; } /** * Prepare reading. * @param data The data to fill during reading. * @param file The file we are reading. * @param master Are we reading the master file? * @param translation Are we reading a translation? */ StringReader::StringReader(StringData &data, const std::string &file, bool master, bool translation) : data(data), file(file), master(master), translation(translation) { } ParsedCommandStruct ExtractCommandString(const char *s, bool) { char param[MAX_COMMAND_PARAM_SIZE]; int argno; int argidx = 0; int casei; ParsedCommandStruct p; for (;;) { /* read until next command from a. */ const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei); if (ar == nullptr) break; /* Sanity checking */ if (argno != -1 && ar->consumes == 0) StrgenFatal("Non consumer param can't have a paramindex"); if (ar->consumes) { if (argno != -1) argidx = argno; if (argidx < 0 || (uint)argidx >= p.consuming_commands.max_size()) StrgenFatal("invalid param idx {}", argidx); if (p.consuming_commands[argidx] != nullptr && p.consuming_commands[argidx] != ar) StrgenFatal("duplicate param idx {}", argidx); p.consuming_commands[argidx++] = ar; } else if (!(ar->flags & C_DONTCOUNT)) { // Ignore some of them p.non_consuming_commands.emplace_back(CmdPair{ar, param}); } } return p; } const CmdStruct *TranslateCmdForCompare(const CmdStruct *a) { if (a == nullptr) return nullptr; if (strcmp(a->cmd, "STRING1") == 0 || strcmp(a->cmd, "STRING2") == 0 || strcmp(a->cmd, "STRING3") == 0 || strcmp(a->cmd, "STRING4") == 0 || strcmp(a->cmd, "STRING5") == 0 || strcmp(a->cmd, "STRING6") == 0 || strcmp(a->cmd, "STRING7") == 0 || strcmp(a->cmd, "RAW_STRING") == 0) { return FindCmd("STRING", 6); } return a; } static bool CheckCommandsMatch(const char *a, const char *b, const char *name) { /* If we're not translating, i.e. we're compiling the base language, * it is pointless to do all these checks as it'll always be correct. * After all, all checks are based on the base language. */ if (!_translation) return true; bool result = true; ParsedCommandStruct templ = ExtractCommandString(b, true); ParsedCommandStruct lang = ExtractCommandString(a, true); /* For each string in templ, see if we find it in lang */ if (templ.non_consuming_commands.max_size() != lang.non_consuming_commands.max_size()) { StrgenWarning("{}: template string and language string have a different # of commands", name); result = false; } for (auto &templ_nc : templ.non_consuming_commands) { /* see if we find it in lang, and zero it out */ bool found = false; for (auto &lang_nc : lang.non_consuming_commands) { if (templ_nc.cmd == lang_nc.cmd && templ_nc.param == lang_nc.param) { /* it was found in both. zero it out from lang so we don't find it again */ lang_nc.cmd = nullptr; found = true; break; } } if (!found) { StrgenWarning("{}: command '{}' exists in template file but not in language file", name, templ_nc.cmd->cmd); result = false; } } /* if we reach here, all non consumer commands match up. * Check if the non consumer commands match up also. */ for (uint i = 0; i < templ.consuming_commands.max_size(); i++) { if (TranslateCmdForCompare(templ.consuming_commands[i]) != lang.consuming_commands[i]) { StrgenWarning("{}: Param idx #{} '{}' doesn't match with template command '{}'", name, i, lang.consuming_commands[i] == nullptr ? "" : TranslateCmdForCompare(lang.consuming_commands[i])->cmd, templ.consuming_commands[i] == nullptr ? "" : templ.consuming_commands[i]->cmd); result = false; } } return result; } void StringReader::HandleString(char *str) { if (*str == '#') { if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2); return; } /* Ignore comments & blank lines */ if (*str == ';' || *str == ' ' || *str == '\0') return; char *s = strchr(str, ':'); if (s == nullptr) { StrgenError("Line has no ':' delimiter"); return; } char *t; /* Trim spaces. * After this str points to the command name, and s points to the command contents */ for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {} *t = 0; s++; /* Check string is valid UTF-8 */ const char *tmp; for (tmp = s; *tmp != '\0';) { size_t len = Utf8Validate(tmp); if (len == 0) StrgenFatal("Invalid UTF-8 sequence in '{}'", s); char32_t c; Utf8Decode(&c, tmp); if (c <= 0x001F || // ASCII control character range c == 0x200B || // Zero width space (c >= 0xE000 && c <= 0xF8FF) || // Private range (c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range StrgenFatal("Unwanted UTF-8 character U+{:04X} in sequence '{}'", (int)c, s); } tmp += len; } /* Check if the string has a case.. * The syntax for cases is IDENTNAME.case */ char *casep = strchr(str, '.'); if (casep != nullptr) *casep++ = '\0'; /* Check if this string already exists.. */ LangString *ent = this->data.Find(str); if (this->master) { if (casep != nullptr) { StrgenError("Cases in the base translation are not supported."); return; } if (ent != nullptr) { StrgenError("String name '{}' is used multiple times", str); return; } if (this->data.strings[this->data.next_string_id] != nullptr) { StrgenError("String ID 0x{:X} for '{}' already in use by '{}'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name); return; } /* Allocate a new LangString */ this->data.Add(std::make_unique(str, s, this->data.next_string_id++, _cur_line)); } else { if (ent == nullptr) { StrgenWarning("String name '{}' does not exist in master file", str); return; } if (!ent->translated.empty() && casep == nullptr) { StrgenError("String name '{}' is used multiple times", str); return; } /* make sure that the commands match */ if (!CheckCommandsMatch(s, ent->english.c_str(), str)) return; if (casep != nullptr) { ent->translated_cases.emplace_back(ResolveCaseName(casep, strlen(casep)), s); } else { ent->translated = s; /* If the string was translated, use the line from the * translated language so errors in the translated file * are properly referenced to. */ ent->line = _cur_line; } } } void StringReader::HandlePragma(char *str) { if (!memcmp(str, "plural ", 7)) { _lang.plural_form = atoi(str + 7); if (_lang.plural_form >= lengthof(_plural_forms)) { StrgenFatal("Invalid pluralform {}", _lang.plural_form); } } else { StrgenFatal("unknown pragma '{}'", str); } } static void StripTrailingWhitespace(std::string &str) { str.erase(str.find_last_not_of("\r\n ") + 1); } void StringReader::ParseFile() { _warnings = _errors = 0; _translation = this->translation; _file = this->file.c_str(); /* Abusing _show_todo to replace "warning" with "info" for translations. */ _show_todo &= 3; if (!this->translation) _show_todo |= 4; /* For each new file we parse, reset the genders, and language codes. */ MemSetT(&_lang, 0); strecpy(_lang.number_format, "00,000,000,000,000,000,000", lastof(_lang.number_format)); strecpy(_lang.number_abbreviations, "3=00,000,000,000,000,000{NBSP}k|6=00,000,000,000,000{NBSP}m|9=00,000,000,000{NBSP}bn|12=00,000,000{NBSP}tn|15=00,000{NBSP}Qa|18=00{NBSP}Qi", lastof(_lang.number_abbreviations)); strecpy(_lang.digit_group_separator, ",", lastof(_lang.digit_group_separator)); strecpy(_lang.digit_group_separator_currency, ",", lastof(_lang.digit_group_separator_currency)); strecpy(_lang.digit_decimal_separator, ".", lastof(_lang.digit_decimal_separator)); _cur_line = 1; while (this->data.next_string_id < this->data.max_strings) { std::optional line = this->ReadLine(); if (!line.has_value()) return; StripTrailingWhitespace(line.value()); this->HandleString(line.value().data()); _cur_line++; } if (this->data.next_string_id == this->data.max_strings) { StrgenError("Too many strings, maximum allowed is {}", this->data.max_strings); } } /** * Write the header information. * @param data The data about the string. */ void HeaderWriter::WriteHeader(const StringData &data) { int last = 0; for (size_t i = 0; i < data.max_strings; i++) { if (data.strings[i] != nullptr) { this->WriteStringID(data.strings[i]->name, (int)i); last = (int)i; } } this->WriteStringID("STR_LAST_STRINGID", last); } static int TranslateArgumentIdx(int argidx, int offset) { int sum; if (argidx < 0 || (uint)argidx >= _cur_pcs.consuming_commands.max_size()) { StrgenFatal("invalid argidx {}", argidx); } const CmdStruct *cs = _cur_pcs.consuming_commands[argidx]; if (cs != nullptr && cs->consumes <= offset) { StrgenFatal("invalid argidx offset {}:{}", argidx, offset); } if (_cur_pcs.consuming_commands[argidx] == nullptr) { StrgenFatal("no command for this argidx {}", argidx); } for (int i = sum = 0; i < argidx; i++) { cs = _cur_pcs.consuming_commands[i]; sum += (cs != nullptr) ? cs->consumes : 1; } return sum + offset; } static void PutArgidxCommand(Buffer *buffer) { buffer->AppendUtf8(SCC_ARG_INDEX); buffer->AppendByte(TranslateArgumentIdx(_cur_argidx)); } static void PutCommandString(Buffer *buffer, const char *str) { _cur_argidx = 0; while (*str != '\0') { /* Process characters as they are until we encounter a { */ if (*str != '{') { buffer->AppendByte(*str++); continue; } char param[MAX_COMMAND_PARAM_SIZE]; int argno; int casei; const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei); if (cs == nullptr) break; if (casei != -1) { buffer->AppendUtf8(SCC_SET_CASE); // {SET_CASE} buffer->AppendByte(casei); } /* For params that consume values, we need to handle the argindex properly */ if (cs->consumes > 0) { /* Check if we need to output a move-param command */ if (argno != -1 && argno != _cur_argidx) { _cur_argidx = argno; PutArgidxCommand(buffer); } /* Output the one from the master string... it's always accurate. */ cs = _cur_pcs.consuming_commands[_cur_argidx++]; if (cs == nullptr) { StrgenFatal("{}: No argument exists at position {}", _cur_ident, _cur_argidx - 1); } } cs->proc(buffer, param, cs->value); } } /** * Write the length as a simple gamma. * @param length The number to write. */ void LanguageWriter::WriteLength(uint length) { char buffer[2]; int offs = 0; if (length >= 0x4000) { StrgenFatal("string too long"); } if (length >= 0xC0) { buffer[offs++] = (length >> 8) | 0xC0; } buffer[offs++] = length & 0xFF; this->Write((byte*)buffer, offs); } /** * Actually write the language. * @param data The data about the string. */ void LanguageWriter::WriteLang(const StringData &data) { std::vector in_use; for (size_t tab = 0; tab < data.tabs; tab++) { uint n = data.CountInUse((uint)tab); in_use.push_back(n); _lang.offsets[tab] = TO_LE16(n); for (uint j = 0; j != in_use[tab]; j++) { const LangString *ls = data.strings[(tab * TAB_SIZE) + j].get(); if (ls != nullptr && ls->translated.empty()) _lang.missing++; } } _lang.ident = TO_LE32(LanguagePackHeader::IDENT); _lang.version = TO_LE32(data.Version()); _lang.missing = TO_LE16(_lang.missing); _lang.winlangid = TO_LE16(_lang.winlangid); this->WriteHeader(&_lang); Buffer buffer; for (size_t tab = 0; tab < data.tabs; tab++) { for (uint j = 0; j != in_use[tab]; j++) { const LangString *ls = data.strings[(tab * TAB_SIZE) + j].get(); const std::string *cmdp; /* For undefined strings, just set that it's an empty string */ if (ls == nullptr) { this->WriteLength(0); continue; } _cur_ident = ls->name.c_str(); _cur_line = ls->line; /* Produce a message if a string doesn't have a translation. */ if (_show_todo > 0 && ls->translated.empty()) { if ((_show_todo & 2) != 0) { StrgenWarning("'{}' is untranslated", ls->name); } if ((_show_todo & 1) != 0) { const char *s = " "; while (*s != '\0') buffer.AppendByte(*s++); } } /* Extract the strings and stuff from the english command string */ _cur_pcs = ExtractCommandString(ls->english.c_str(), false); if (!ls->translated_cases.empty() || !ls->translated.empty()) { cmdp = &ls->translated; } else { cmdp = &ls->english; } _translated = cmdp != &ls->english; if (!ls->translated_cases.empty()) { /* Need to output a case-switch. * It has this format * <0x9E> * Each LEN is printed using 2 bytes in big endian order. */ buffer.AppendUtf8(SCC_SWITCH_CASE); buffer.AppendByte((byte)ls->translated_cases.size()); /* Write each case */ for (const Case &c : ls->translated_cases) { buffer.AppendByte(c.caseidx); /* Make some space for the 16-bit length */ uint pos = (uint)buffer.size(); buffer.AppendByte(0); buffer.AppendByte(0); /* Write string */ PutCommandString(&buffer, c.string.c_str()); buffer.AppendByte(0); // terminate with a zero /* Fill in the length */ uint size = (uint)buffer.size() - (pos + 2); buffer[pos + 0] = GB(size, 8, 8); buffer[pos + 1] = GB(size, 0, 8); } } if (!cmdp->empty()) PutCommandString(&buffer, cmdp->c_str()); this->WriteLength((uint)buffer.size()); this->Write(buffer.data(), buffer.size()); buffer.clear(); } } } static const std::string_view NBSP_TOKEN = "{NBSP}"; static std::string ReplaceNBSP(std::string string) { for (;;) { auto iter = string.find(NBSP_TOKEN); if (iter == std::string::npos) break; string.replace(iter, NBSP_TOKEN.size(), NBSP); } return string; } /** * Parse the \c NumberFormatSeparators out of the given format string, with the expected number of digits. * * Different cultures have different ways to separate their numbers when they get really big. In the Western world * these are often called thousands separators which come every three digits counted from the back. The actual * separator differs per language/country. In Chinese, Japanese and Korean they add a character every four digits * counted from the back, and this character differs for each spot as it denotes "ten thousand", "hundred million", * etc. In the Indic numbering system (Indian subcontinent), the first separator is after three digits counted * from the back, but the next separators are given every two digits. * * So, there's no simple single parameter that you can add to the digit grouping character that is already * configured. The simplest solution is just defining what character to place between each of the digits, i.e what * characters separate each of the digits. These are the \c NumberFormatSeparators. * * To define these, you simply write a string of \c length zeros and then add any characters in between at the right * locations so the digit grouping is correct. When formatting numbers, it will start at the appropriate digit and * continue from there with separators. * * Examples of formats are "00,000,000,000,000,000,000" and "0000{NBSP}0000{NBSP}0000{NBSP}0000{NBSP}0000". * * @param separators The separators to fill; it will be cleared first. * @param format The format that is going to be read. * @param length The number of digits that are expected in this format. * @return An \c std::optional with the error message, or \c std::nullopt when the parsing went without problems. */ std::optional ParseNumberFormatSeparators(NumberFormatSeparators &separators, std::string_view format, size_t length) { separators.fill({}); size_t seen_zeros = 0; auto it_separator = separators.rbegin(); auto iter = format.find_last_of('0'); while (iter != std::string_view::npos && it_separator != separators.rend()) { seen_zeros++; *it_separator = ReplaceNBSP(std::string(format.substr(iter + 1))); ++it_separator; format = format.substr(0, iter); iter = format.find_last_of('0'); } if (seen_zeros != length) return fmt::format("Unexpected number of digits ({} vs {}) in format string: [{}]", seen_zeros, length, format); return std::nullopt; } /** * Parse the \c NumberAbbreviations out of the given input string. * * In some places in the UI numbers are getting really big yet their exact value is not that important. For example * in the graphs of company values. For this you want more compact number, e.g. 123 m for 123.456.789. However, due * to the grouping of digits differing in different cultures, see \c ParseNumberFormatSeparators, there are many * different ways of grouping digits. * * This function builds up a lookup table of these abbreviations by power of ten. The input will be a list of * definitions per power separator by a pipe character (|). Each definition is the power of ten and and the * associated number format with DIGITS_IN_UINT64_T - power digits, separated by the equals sign (=). * * For example, for English it defines every third power of ten with subsequently smaller number formats: * 3=00,000,000,000,000,000{NBSP}k|6=00,000,000,000,000{NBSP}m|9=00,000,000,000{NBSP}bn|12=00,000,000{NBSP}tn|15=00,000{NBSP}Qa|18=00{NBSP}Qi * * @param abbreviations The table to write the abbreviations in; is will be cleared before filling. * @param input The input format to parse. * @return An \c std::optional with the error message, or \c std::nullopt when the parsing went without problems. */ std::optional ParseNumberAbbreviations(NumberAbbreviations &abbreviations, std::string_view input) { abbreviations.clear(); std::map abbreviation_map; do { std::string_view part = input.substr(0, input.find_first_of('|')); input.remove_prefix(std::min(part.size() + 1, input.size())); auto equals = part.find_first_of('='); if (equals == std::string_view::npos) return fmt::format("Part [{}] does not have an '='", part); std::string_view power_sv = part.substr(0, equals); int power = 0; if (std::from_chars(power_sv.data(), power_sv.data() + power_sv.size(), power).ec != std::errc{}) return fmt::format("Power [{}] is not a number", power_sv); if (power >= DIGITS_IN_UINT64_T || power <= 0) return fmt::format("Power {} is not allowed", power_sv); abbreviation_map[power] = part.substr(equals + 1); } while (!input.empty()); for (auto iter = abbreviation_map.rbegin(); iter != abbreviation_map.rend(); ++iter) { NumberFormatSeparators separators; auto result = ParseNumberFormatSeparators(separators, iter->second, DIGITS_IN_UINT64_T - iter->first); if (result.has_value()) return result; abbreviations.emplace_back(PowerOfTen(iter->first), separators); } return std::nullopt; }