cpp/openttd-patchpack/source Changeset - r28769:123ae3cb0017

Changeset - r28769:123ae3cb0017

Parent rev.

Child rev.

[Not reviewed]

master

0 5 0

Rubidium - 10 months ago 2024-02-08 20:28:36
rubidium@openttd.org

Codechange: Add support for number format and abbreviations pragmas/attributes to strgen

5 files changed with 158 insertions and 1 deletions:

src/lang/english.txt

src/language.h

src/strgen/strgen.cpp

src/strgen/strgen_base.cpp

113

src/strings.cpp

0 comments (0 inline, 0 general)

src/lang/english.txt

➞

Show inline comments

 ##name English (UK)
 ##ownname English (UK)
 ##isocode en_GB
 ##plural 0
 ##textdir ltr
 ##numberformat 00,000,000,000,000,000,000
 ##numberabbreviations 3=00,000,000,000,000,000{NBSP}k|6=00,000,000,000,000{NBSP}m|9=00,000,000,000{NBSP}bn|12=00,000,000{NBSP}tn|15=00,000{NBSP}Qa|18=00{NBSP}Qi
 ##digitsep ,
 ##digitsepcur ,
 ##decimalsep .
 ##winlangid 0x0809
 ##grflangid 0x01
 # This file is part of OpenTTD.
 # OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
 # OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 # See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.

src/language.h

➞

Show inline comments

@@ @@ -22,24 +22,28 @@ static const uint8_t MAX_NUM_CASES = 1 @@
 /** Header of a language file. */
 struct LanguagePackHeader {
 	static const uint32_t IDENT = 0x474E414C; ///< Identifier for OpenTTD language files, big endian for "LANG"
 	uint32_t ident;       ///< 32-bits identifier
 	uint32_t version;     ///< 32-bits of auto generated version info which is basically a hash of strings.h
 	char name[32];      ///< the international name of this language
 	char own_name[32];  ///< the localized name of this language
 	char isocode[16];   ///< the ISO code for the language (not country code)
 	uint16_t offsets[TEXT_TAB_END]; ///< the offsets
 	/** The raw formatting string for numbers. */
 	char number_format[64];
 	/** The raw formatting string for number abbreviations. */
 	char number_abbreviations[256];
 	/** Thousand separator used for anything not currencies */
 	char digit_group_separator[8];
 	/** Thousand separator used for currencies */
 	char digit_group_separator_currency[8];
 	/** Decimal separator */
 	char digit_decimal_separator[8];
 	uint16_t missing;     ///< number of missing strings.
 	byte plural_form;   ///< plural form index
 	byte text_dir;      ///< default direction of the text
 	/**
 	 * Windows language ID:
 	 * Windows cannot and will not convert isocodes to something it can use to
@@ @@ -98,16 +102,37 @@ struct LanguageMetadata : public Languag @@
 typedef std::vector<LanguageMetadata> LanguageList;
 /** The actual list of language meta data. */
 extern LanguageList _languages;
 /** The currently loaded language. */
 extern const LanguageMetadata *_current_language;
 #ifdef WITH_ICU_I18N
 extern std::unique_ptr<icu::Collator> _current_collator;
 #endif /* WITH_ICU_I18N */
 /** The number digits available in a uint64_t. */
 constexpr int DIGITS_IN_UINT64_T = 20;
 /**
  * Table with the text to place after each of the digits of a number. The text at index "20 - i" will be
  * inserted after the digit with value "10**i". So, for "normal" thousand separators, the strings at indices
  * 3, 6, 9, 12, 15 and 18 will be filled. For CJK the strings at indices 0, 4, 8, 12 and 16 will be filled.
  * @see ParseNumberFormatSeparators
  */
 using NumberFormatSeparators = std::array<std::string, DIGITS_IN_UINT64_T>;
 /** Container for the power to abbreviation mapping for formatting short numbers. */
 struct NumberAbbreviation {
 	NumberAbbreviation(int64_t threshold, NumberFormatSeparators &format) : threshold(threshold), format(format) {}
 	int64_t threshold; ///< The threshold from which this abbreviation holds.
 	NumberFormatSeparators format; ///< Format separators to use for this specific power.
 };
 /** Lookup for abbreviated formats for different powers of ten. */
 using NumberAbbreviations = std::vector<NumberAbbreviation>;
 bool ReadLanguagePack(const LanguageMetadata *lang);
 const LanguageMetadata *GetLanguage(byte newgrflangid);
 std::optional<std::string> ParseNumberFormatSeparators(NumberFormatSeparators &separators, std::string_view format, size_t length = DIGITS_IN_UINT64_T);
 std::optional<std::string> ParseNumberAbbreviations(NumberAbbreviations &abbreviations, std::string_view input);
 #endif /* LANGUAGE_H */

src/strgen/strgen.cpp

➞

Show inline comments

@@ @@ -112,24 +112,40 @@ void FileStringReader::HandlePragma(char @@
 	} else if (!memcmp(str, "ownname ", 8)) {
 		strecpy(_lang.own_name, str + 8, lastof(_lang.own_name));
 	} else if (!memcmp(str, "isocode ", 8)) {
 		strecpy(_lang.isocode, str + 8, lastof(_lang.isocode));
 	} else if (!memcmp(str, "textdir ", 8)) {
 		if (!memcmp(str + 8, "ltr", 3)) {
 			_lang.text_dir = TD_LTR;
 		} else if (!memcmp(str + 8, "rtl", 3)) {
 			_lang.text_dir = TD_RTL;
 		} else {
 			FatalError("Invalid textdir {}", str + 8);
+		}
 	} else if (!memcmp(str, "numberformat ", 13)) {
 		str += 13;
 		NumberFormatSeparators separators;
 		auto result = ParseNumberFormatSeparators(separators, str);
 		if (result.has_value()) FatalError("Invalid number format: {}", *result);
 		strecpy(_lang.number_format, str, lastof(_lang.number_format));
 	} else if (!memcmp(str, "numberabbreviations ", 20)) {
 		str += 20;
 		NumberAbbreviations abbreviations;
 		auto result = ParseNumberAbbreviations(abbreviations, str);
 		if (result.has_value()) FatalError("Invalid number abbreviations: {}", *result);
 		strecpy(_lang.number_abbreviations, str, lastof(_lang.number_abbreviations));
 	} else if (!memcmp(str, "digitsep ", 9)) {
 		str += 9;
 		strecpy(_lang.digit_group_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str, lastof(_lang.digit_group_separator));
 	} else if (!memcmp(str, "digitsepcur ", 12)) {
 		str += 12;
 		strecpy(_lang.digit_group_separator_currency, strcmp(str, "{NBSP}") == 0 ? NBSP : str, lastof(_lang.digit_group_separator_currency));
 	} else if (!memcmp(str, "decimalsep ", 11)) {
 		str += 11;
 		strecpy(_lang.digit_decimal_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str, lastof(_lang.digit_decimal_separator));
 	} else if (!memcmp(str, "winlangid ", 10)) {
 		const char *buf = str + 10;
 		long langid = std::strtol(buf, nullptr, 16);

src/strgen/strgen_base.cpp

➞

Show inline comments

@@ @@ -7,25 +7,25 @@ @@
 /** @file strgen_base.cpp Tool to create computer readable (stand-alone) translation files. */
 #include "../stdafx.h"
 #include "../core/alloc_func.hpp"
 #include "../core/endian_func.hpp"
 #include "../core/mem_func.hpp"
 #include "../error_func.h"
 #include "../string_func.h"
 #include "../table/control_codes.h"
 #include "strgen.h"
+#include <charconv>
 #include "../table/strgen_tables.h"
 #include "../safeguards.h"
 /* Compiles a list of strings into a compiled string list */
 static bool _translated;              ///< Whether the current language is not the master language
 static bool _translation;             ///< Is the current file actually a translation or not
 const char *_file = "(unknown file)"; ///< The filename of the input, so we can refer to it in errors/warnings
 int _cur_line;                        ///< The current line we're parsing in the input file
 int _errors, _warnings, _show_todo;
@@ @@ -747,24 +747,26 @@ void StringReader::ParseFile() @@
+{
 	_warnings = _errors = 0;
 	_translation = this->translation;
 	_file = this->file.c_str();
 	/* Abusing _show_todo to replace "warning" with "info" for translations. */
 	_show_todo &= 3;
 	if (!this->translation) _show_todo |= 4;
 	/* For each new file we parse, reset the genders, and language codes. */
 	MemSetT(&_lang, 0);
 	strecpy(_lang.number_format, "00,000,000,000,000,000,000", lastof(_lang.number_format));
 	strecpy(_lang.number_abbreviations, "3=00,000,000,000,000,000{NBSP}k|6=00,000,000,000,000{NBSP}m|9=00,000,000,000{NBSP}bn|12=00,000,000{NBSP}tn|15=00,000{NBSP}Qa|18=00{NBSP}Qi", lastof(_lang.number_abbreviations));
 	strecpy(_lang.digit_group_separator, ",", lastof(_lang.digit_group_separator));
 	strecpy(_lang.digit_group_separator_currency, ",", lastof(_lang.digit_group_separator_currency));
 	strecpy(_lang.digit_decimal_separator, ".", lastof(_lang.digit_decimal_separator));
 	_cur_line = 1;
 	while (this->data.next_string_id < this->data.max_strings) {
 		std::optional<std::string> line = this->ReadLine();
 		if (!line.has_value()) return;
 		StripTrailingWhitespace(line.value());
 		this->HandleString(line.value().data());
 		_cur_line++;
@@ @@ -971,12 +973,122 @@ void LanguageWriter::WriteLang(const Str @@
 					buffer[pos + 1] = GB(size, 0, 8);
+				}
+			}
 			if (!cmdp->empty()) PutCommandString(&buffer, cmdp->c_str());
 			this->WriteLength((uint)buffer.size());
 			this->Write(buffer.data(), buffer.size());
 			buffer.clear();
+		}
+	}
+}
 static const std::string_view NBSP_TOKEN = "{NBSP}";
 static std::string ReplaceNBSP(std::string string)
+{
 	for (;;) {
 		auto iter = string.find(NBSP_TOKEN);
 		if (iter == std::string::npos) break;
 		string.replace(iter, NBSP_TOKEN.size(), NBSP);
+	}
 	return string;
+}
 /**
  * Parse the \c NumberFormatSeparators out of the given format string, with the expected number of digits.
+ *
  * Different cultures have different ways to separate their numbers when they get really big. In the Western world
  * these are often called thousands separators which come every three digits counted from the back. The actual
  * separator differs per language/country. In Chinese, Japanese and Korean they add a character every four digits
  * counted from the back, and this character differs for each spot as it denotes "ten thousand", "hundred million",
  * etc. In the Indic numbering system (Indian subcontinent), the first separator is after three digits counted
  * from the back, but the next separators are given every two digits.
+ *
  * So, there's no simple single parameter that you can add to the digit grouping character that is already
  * configured. The simplest solution is just defining what character to place between each of the digits, i.e what
  * characters separate each of the digits. These are the \c NumberFormatSeparators.
+ *
  * To define these, you simply write a string of \c length zeros and then add any characters in between at the right
  * locations so the digit grouping is correct. When formatting numbers, it will start at the appropriate digit and
  * continue from there with separators.
+ *
  * Examples of formats are "00,000,000,000,000,000,000" and "0000{NBSP}0000{NBSP}0000{NBSP}0000{NBSP}0000".
+ *
  * @param separators The separators to fill; it will be cleared first.
  * @param format The format that is going to be read.
  * @param length The number of digits that are expected in this format.
  * @return An \c std::optional with the error message, or \c std::nullopt when the parsing went without problems.
  */
 std::optional<std::string> ParseNumberFormatSeparators(NumberFormatSeparators &separators, std::string_view format, size_t length)
+{
 	separators.fill({});
 	size_t seen_zeros = 0;
 	auto it_separator = separators.rbegin();
 	auto iter = format.find_last_of('0');
 	while (iter != std::string_view::npos && it_separator != separators.rend())  {
 		seen_zeros++;
 		*it_separator = ReplaceNBSP(std::string(format.substr(iter + 1)));
 		++it_separator;
 		format = format.substr(0, iter);
 		iter = format.find_last_of('0');
+	}
 	if (seen_zeros != length) return fmt::format("Unexpected number of digits ({} vs {}) in format string: [{}]", seen_zeros, length, format);
 	return std::nullopt;
+}
 /**
  * Parse the \c NumberAbbreviations out of the given input string.
+ *
  * In some places in the UI numbers are getting really big yet their exact value is not that important. For example
  * in the graphs of company values. For this you want more compact number, e.g. 123 m for 123.456.789. However, due
  * to the grouping of digits differing in different cultures, see \c ParseNumberFormatSeparators, there are many
  * different ways of grouping digits.
+ *
  * This function builds up a lookup table of these abbreviations by power of ten. The input will be a list of
  * definitions per power separator by a pipe character (|). Each definition is the power of ten and and the
  * associated number format with DIGITS_IN_UINT64_T - power digits, separated by the equals sign (=).
+ *
  * For example, for English it defines every third power of ten with subsequently smaller number formats:
  * 3=00,000,000,000,000,000{NBSP}k|6=00,000,000,000,000{NBSP}m|9=00,000,000,000{NBSP}bn|12=00,000,000{NBSP}tn|15=00,000{NBSP}Qa|18=00{NBSP}Qi
+ *
  * @param abbreviations The table to write the abbreviations in; is will be cleared before filling.
  * @param input The input format to parse.
  * @return An \c std::optional with the error message, or \c std::nullopt when the parsing went without problems.
  */
 std::optional<std::string> ParseNumberAbbreviations(NumberAbbreviations &abbreviations, std::string_view input)
+{
 	abbreviations.clear();
 	std::map<int, std::string_view> abbreviation_map;
 	do {
 		std::string_view part = input.substr(0, input.find_first_of('|'));
 		input.remove_prefix(std::min(part.size() + 1, input.size()));
 		auto equals = part.find_first_of('=');
 		if (equals == std::string_view::npos) return fmt::format("Part [{}] does not have an '='", part);
 		std::string_view power_sv = part.substr(0, equals);
 		int power = 0;
 		if (std::from_chars(power_sv.data(), power_sv.data() + power_sv.size(), power).ec != std::errc{}) return fmt::format("Power [{}] is not a number", power_sv);
 		if (power >= DIGITS_IN_UINT64_T || power <= 0) return fmt::format("Power {} is not allowed", power_sv);
 		abbreviation_map[power] = part.substr(equals + 1);
 	} while (!input.empty());
 	for (auto iter = abbreviation_map.rbegin(); iter != abbreviation_map.rend(); ++iter) {
 		NumberFormatSeparators separators;
 		auto result = ParseNumberFormatSeparators(separators, iter->second, DIGITS_IN_UINT64_T - iter->first);
 		if (result.has_value()) return result;
 		abbreviations.emplace_back(PowerOfTen(iter->first), separators);
+	}
 	return std::nullopt;
+}

src/strings.cpp

➞

Show inline comments

@@ @@ -1875,24 +1875,26 @@ static void GetSpecialNameString(StringB @@
 bool LanguagePackHeader::IsValid() const
+{
 	return this->ident        == TO_LE32(LanguagePackHeader::IDENT) &&
 	       this->version      == TO_LE32(LANGUAGE_PACK_VERSION) &&
 	       this->plural_form  <  LANGUAGE_MAX_PLURAL &&
 	       this->text_dir     <= 1 &&
 	       this->newgrflangid < MAX_LANG &&
 	       this->num_genders  < MAX_NUM_GENDERS &&
 	       this->num_cases    < MAX_NUM_CASES &&
 	       StrValid(this->name,                           lastof(this->name)) &&
 	       StrValid(this->own_name,                       lastof(this->own_name)) &&
 	       StrValid(this->isocode,                        lastof(this->isocode)) &&
 	       StrValid(this->number_format,                  lastof(this->number_format)) &&
 	       StrValid(this->number_abbreviations,           lastof(this->number_abbreviations)) &&
 	       StrValid(this->digit_group_separator,          lastof(this->digit_group_separator)) &&
 	       StrValid(this->digit_group_separator_currency, lastof(this->digit_group_separator_currency)) &&
 	       StrValid(this->digit_decimal_separator,        lastof(this->digit_decimal_separator));
+}
 /**
  * Check whether a translation is sufficiently finished to offer it to the public.
  */
 bool LanguagePackHeader::IsReasonablyFinished() const
+{
 	/* "Less than 25% missing" is "sufficiently finished". */
 	return 4 * this->missing < LANGUAGE_TOTAL_STRINGS;

0 comments (0 inline, 0 general)