26static std::string_view _cur_ident;
28static size_t _cur_argidx;
33 std::optional<size_t> argno;
34 std::optional<uint8_t> casei;
37static size_t TranslateArgumentIdx(
size_t arg,
size_t offset = 0);
94 this->
strings[ls->index] = std::move(ls);
107 return it->second.get();
119 hash = std::rotl(hash, 3) ^ c;
120 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
137 hash ^= i * 0x717239;
138 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
143 while ((cs = ParseCommandString(consumer)).cmd !=
nullptr) {
146 hash ^= (cs.cmd - _cmd_structs) * 0x1234567;
147 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
162 while (count > 0 && this->
strings[(tab *
TAB_SIZE) + count - 1] ==
nullptr) --count;
166void EmitSingleChar(
StringBuilder &builder, std::string_view param,
char32_t value)
168 if (!param.empty()) StrgenWarning(
"Ignoring trailing letters in command");
174static std::pair<std::optional<size_t>, std::optional<size_t>> ParseRelNum(
StringConsumer &consumer)
178 std::optional<size_t> offset;
179 if (v.has_value() && consumer.
ReadCharIf(
':')) {
182 if (!offset.has_value()) StrgenFatal(
"Expected number for substring parameter");
188std::optional<std::string_view> ParseWord(
StringConsumer &consumer)
196 if (!consumer.
ReadCharIf(
'"')) StrgenFatal(
"Unterminated quotes");
206static void EmitWordList(
StringBuilder &builder,
const std::vector<std::string> &words)
208 builder.
PutUint8(
static_cast<uint8_t
>(words.size()));
209 for (
size_t i = 0; i < words.size(); i++) {
210 size_t len = words[i].size();
211 if (len > UINT8_MAX) StrgenFatal(
"WordList {}/{} string '{}' too long, max bytes {}", i + 1, words.size(), words[i], UINT8_MAX);
212 builder.
PutUint8(
static_cast<uint8_t
>(len));
214 for (
size_t i = 0; i < words.size(); i++) {
215 builder.
Put(words[i]);
219void EmitPlural(
StringBuilder &builder, std::string_view param,
char32_t)
224 auto [argidx, offset] = ParseRelNum(consumer);
225 if (!argidx.has_value()) {
226 if (_cur_argidx == 0) StrgenFatal(
"Plural choice needs positional reference");
227 argidx = _cur_argidx - 1;
230 const CmdStruct *cmd = _cur_pcs.consuming_commands[*argidx];
231 if (!offset.has_value()) {
233 if (cmd ==
nullptr || !cmd->default_plural_offset.has_value()) {
234 StrgenFatal(
"Command '{}' has no (default) plural position", cmd ==
nullptr ?
"<empty>" : cmd->cmd);
236 offset = cmd->default_plural_offset;
240 std::vector<std::string> words;
242 auto word = ParseWord(consumer);
243 if (!word.has_value())
break;
244 words.emplace_back(*word);
248 StrgenFatal(
"{}: No plural words", _cur_ident);
251 size_t expected =
_plural_forms[_strgen.lang.plural_form].plural_count;
252 if (expected != words.size()) {
254 StrgenFatal(
"{}: Invalid number of plural forms. Expecting {}, found {}.", _cur_ident,
255 expected, words.size());
257 if (_strgen.show_warnings) StrgenWarning(
"'{}' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
258 if (words.size() > expected) {
259 words.resize(expected);
261 while (words.size() < expected) {
262 words.push_back(words.back());
268 builder.
PutUtf8(SCC_PLURAL_LIST);
269 builder.
PutUint8(_strgen.lang.plural_form);
270 builder.
PutUint8(
static_cast<uint8_t
>(TranslateArgumentIdx(*argidx, *offset)));
271 EmitWordList(builder, words);
274void EmitGender(
StringBuilder &builder, std::string_view param,
char32_t)
280 auto nw = _strgen.lang.GetGenderIndex(gender);
281 if (nw >=
MAX_NUM_GENDERS) StrgenFatal(
"G argument '{}' invalid", gender);
284 builder.
PutUtf8(SCC_GENDER_INDEX);
289 auto [argidx, offset] = ParseRelNum(consumer);
290 if (!argidx.has_value()) argidx = _cur_argidx;
291 if (!offset.has_value()) offset = 0;
293 const CmdStruct *cmd = _cur_pcs.consuming_commands[*argidx];
295 StrgenFatal(
"Command '{}' can't have a gender", cmd ==
nullptr ?
"<empty>" : cmd->cmd);
298 std::vector<std::string> words;
300 auto word = ParseWord(consumer);
301 if (!word.has_value())
break;
302 words.emplace_back(*word);
304 if (words.size() != _strgen.lang.num_genders) StrgenFatal(
"Bad # of arguments for gender command");
306 assert(
IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
307 builder.
PutUtf8(SCC_GENDER_LIST);
308 builder.
PutUint8(
static_cast<uint8_t
>(TranslateArgumentIdx(*argidx, *offset)));
309 EmitWordList(builder, words);
313static const CmdStruct *FindCmd(std::string_view s)
315 auto it = std::ranges::find(_cmd_structs, s, &CmdStruct::cmd);
316 if (it != std::end(_cmd_structs))
return &*it;
320static uint8_t ResolveCaseName(std::string_view str)
322 uint8_t case_idx = _strgen.lang.GetCaseIndex(str);
323 if (case_idx >=
MAX_NUM_CASES) StrgenFatal(
"Invalid case-name '{}'", str);
337 result.argno = argno;
338 if (!consumer.
ReadCharIf(
':')) StrgenFatal(
"missing arg #");
343 result.cmd = FindCmd(command);
344 if (result.cmd ==
nullptr) {
345 StrgenError(
"Undefined command '{}'", command);
352 StrgenFatal(
"Command '{}' can't have a case", result.cmd->cmd);
356 result.casei = ResolveCaseName(casep);
363 StrgenError(
"Missing }} from command '{}'", result.cmd->cmd);
390 auto cs = ParseCommandString(consumer);
392 if (cs.cmd ==
nullptr)
break;
395 if (cs.argno.has_value() && cs.cmd->consumes == 0) StrgenFatal(
"Non consumer param can't have a paramindex");
397 if (cs.cmd->consumes > 0) {
398 if (cs.argno.has_value()) argidx = *cs.argno;
399 if (argidx >= p.consuming_commands.max_size()) StrgenFatal(
"invalid param idx {}", argidx);
400 if (p.consuming_commands[argidx] !=
nullptr && p.consuming_commands[argidx] != cs.cmd) StrgenFatal(
"duplicate param idx {}", argidx);
402 p.consuming_commands[argidx++] = cs.cmd;
404 p.non_consuming_commands.emplace_back(cs.cmd, std::move(cs.param));
413 if (a ==
nullptr)
return nullptr;
415 if (a->cmd ==
"STRING1" ||
416 a->cmd ==
"STRING2" ||
417 a->cmd ==
"STRING3" ||
418 a->cmd ==
"STRING4" ||
419 a->cmd ==
"STRING5" ||
420 a->cmd ==
"STRING6" ||
421 a->cmd ==
"STRING7" ||
422 a->cmd ==
"RAW_STRING") {
423 return FindCmd(
"STRING");
429static bool CheckCommandsMatch(std::string_view a, std::string_view b, std::string_view name)
435 if (!_strgen.translation)
return true;
443 if (templ.non_consuming_commands.max_size() != lang.non_consuming_commands.max_size()) {
444 StrgenWarning(
"{}: template string and language string have a different # of commands", name);
448 for (
auto &templ_nc : templ.non_consuming_commands) {
450 auto it = std::ranges::find(lang.non_consuming_commands, templ_nc);
451 if (it != std::end(lang.non_consuming_commands)) {
455 StrgenWarning(
"{}: command '{}' exists in template file but not in language file", name, templ_nc.cmd->cmd);
462 for (
size_t i = 0; i < templ.consuming_commands.max_size(); i++) {
463 if (TranslateCmdForCompare(templ.consuming_commands[i]) != lang.consuming_commands[i]) {
464 StrgenWarning(
"{}: Param idx #{} '{}' doesn't match with template command '{}'", name, i,
465 lang.consuming_commands[i] ==
nullptr ?
"<empty>" : TranslateCmdForCompare(lang.consuming_commands[i])->cmd,
466 templ.consuming_commands[i] ==
nullptr ?
"<empty>" : templ.consuming_commands[i]->cmd);
474void StringReader::HandleString(std::string_view src)
477 if (src.empty())
return;
479 StringConsumer consumer(src);
488 StrgenError(
"Line has no ':' delimiter");
493 std::optional<std::string_view> casep;
494 if (
auto index = str_name.find(
"."); index != std::string_view::npos) {
495 casep = str_name.substr(index + 1);
496 str_name = str_name.substr(0, index);
503 for (StringConsumer validation_consumer(value); validation_consumer.AnyBytesLeft(); ) {
504 auto c = validation_consumer.TryReadUtf8();
505 if (!c.has_value()) StrgenFatal(
"Invalid UTF-8 sequence in '{}'", value);
508 (*c >= 0xE000 && *c <= 0xF8FF) ||
509 (*c >= 0xFFF0 && *c <= 0xFFFF)) {
510 StrgenFatal(
"Unwanted UTF-8 character U+{:04X} in sequence '{}'",
static_cast<uint32_t
>(*c), value);
515 LangString *ent = this->
data.Find(str_name);
518 if (casep.has_value()) {
519 StrgenError(
"Cases in the base translation are not supported.");
523 if (ent !=
nullptr) {
524 StrgenError(
"String name '{}' is used multiple times", str_name);
528 if (this->
data.strings[this->data.next_string_id] !=
nullptr) {
529 StrgenError(
"String ID 0x{:X} for '{}' already in use by '{}'", this->
data.next_string_id, str_name, this->data.strings[this->data.next_string_id]->name);
534 this->
data.Add(std::make_unique<LangString>(str_name, value, this->
data.next_string_id++, _strgen.
cur_line));
536 if (ent ==
nullptr) {
537 StrgenWarning(
"String name '{}' does not exist in master file", str_name);
541 if (!ent->
translated.empty() && !casep.has_value()) {
542 StrgenError(
"String name '{}' is used multiple times", str_name);
547 if (!CheckCommandsMatch(value, ent->
english, str_name))
return;
549 if (casep.has_value()) {
565 if (name ==
"plural") {
568 StrgenFatal(
"Invalid pluralform {}", lang.
plural_form);
571 StrgenFatal(
"unknown pragma '{}'", name);
577 _strgen.warnings = _strgen.errors = 0;
580 _strgen.file = this->
file;
585 _strgen.cur_line = 1;
586 while (this->
data.next_string_id < this->data.max_strings) {
587 std::optional<std::string> line = this->
ReadLine();
588 if (!line.has_value())
return;
594 if (this->
data.next_string_id == this->data.max_strings) {
595 StrgenError(
"Too many strings, maximum allowed is {}", this->
data.max_strings);
607 if (data.
strings[i] !=
nullptr) {
616static size_t TranslateArgumentIdx(
size_t argidx,
size_t offset)
618 if (argidx >= _cur_pcs.consuming_commands.max_size()) {
619 StrgenFatal(
"invalid argidx {}", argidx);
621 const CmdStruct *cs = _cur_pcs.consuming_commands[argidx];
622 if (cs !=
nullptr && cs->consumes <= offset) {
623 StrgenFatal(
"invalid argidx offset {}:{}", argidx, offset);
626 if (_cur_pcs.consuming_commands[argidx] ==
nullptr) {
627 StrgenFatal(
"no command for this argidx {}", argidx);
631 for (
size_t i = 0; i < argidx; i++) {
632 cs = _cur_pcs.consuming_commands[i];
634 if (cs ==
nullptr && sum > i)
continue;
636 sum += (cs !=
nullptr) ? cs->consumes : 1;
644 builder.
PutUtf8(SCC_ARG_INDEX);
645 builder.
PutUint8(
static_cast<uint8_t
>(TranslateArgumentIdx(_cur_argidx)));
648static std::string PutCommandString(std::string_view str)
660 auto cs = ParseCommandString(consumer);
662 if (cmd ==
nullptr)
break;
664 if (cs.casei.has_value()) {
670 if (cmd->consumes > 0) {
672 if (cs.argno.has_value() && *cs.argno != _cur_argidx) {
673 _cur_argidx = *cs.argno;
674 PutArgidxCommand(builder);
678 cmd = _cur_pcs.consuming_commands[_cur_argidx++];
679 if (cmd ==
nullptr) {
680 StrgenFatal(
"{}: No argument exists at position {}", _cur_ident, _cur_argidx - 1);
684 cmd->proc(builder, cs.param, cmd->value);
697 if (length >= 0x4000) {
698 StrgenFatal(
"string too long");
701 if (length >= 0xC0) {
702 buffer[offs++] =
static_cast<char>(
static_cast<uint8_t
>((length >> 8) | 0xC0));
704 buffer[offs++] =
static_cast<char>(
static_cast<uint8_t
>(length & 0xFF));
705 this->
Write({buffer, offs});
714 std::vector<size_t> in_use;
715 for (
size_t tab = 0; tab < data.
tabs; tab++) {
719 _strgen.lang.offsets[tab] = TO_LE16(
static_cast<uint16_t
>(n));
721 for (
size_t j = 0; j != in_use[tab]; j++) {
723 if (ls !=
nullptr && ls->
translated.empty()) _strgen.lang.missing++;
728 _strgen.lang.version = TO_LE32(data.
Version());
729 _strgen.lang.missing = TO_LE16(_strgen.lang.missing);
730 _strgen.lang.winlangid = TO_LE16(_strgen.lang.winlangid);
734 for (
size_t tab = 0; tab < data.
tabs; tab++) {
735 for (
size_t j = 0; j != in_use[tab]; j++) {
746 _cur_ident = ls->
name;
747 _strgen.cur_line = ls->
line;
751 if (_strgen.show_warnings) {
752 StrgenWarning(
"'{}' is untranslated", ls->
name);
754 if (_strgen.annotate_todos) {
755 builder.
Put(
"<TODO> ");
760 _cur_pcs = ExtractCommandString(ls->
english,
false);
770 builder.
PutUtf8(SCC_SWITCH_CASE);
775 auto case_str = PutCommandString(c.
string);
777 builder.
PutUint16LE(
static_cast<uint16_t
>(case_str.size()));
778 builder.
Put(case_str);
783 if (!cmdp.empty()) def_str = PutCommandString(cmdp);
785 builder.
PutUint16LE(
static_cast<uint16_t
>(def_str.size()));
787 builder.
Put(def_str);
constexpr bool Test(Tvalue_type value) const
Test if the value-th bit is set.
void PutUtf8(char32_t c)
Append UTF.8 char.
void PutUint16LE(uint16_t value)
Append binary uint16 using little endian.
void Put(std::string_view str)
Append string.
void PutUint8(uint8_t value)
Append binary uint8.
Compose data into a growing std::string.
Parse data from a string / buffer.
bool ReadCharIf(char c)
Check whether the next 8-bit char matches 'c', and skip it.
std::optional< T > TryReadIntegerBase(int base, bool clamp=false)
Try to read and parse an integer in number 'base', and then advance the reader.
std::string_view ReadUntilChar(char c, SeparatorUsage sep)
Read data until the first occurrence of 8-bit char 'c', and advance reader.
void SkipUntilChar(char c, SeparatorUsage sep)
Skip data until the first occurrence of 8-bit char 'c'.
@ SKIP_ALL_SEPARATORS
Read and discard all consecutive separators, do not include any in the result.
@ KEEP_SEPARATOR
Keep the separator in the data as next value to be read.
bool AnyBytesLeft() const noexcept
Check whether any bytes left to read.
static const std::string_view WHITESPACE_OR_NEWLINE
ASCII whitespace characters, including new-line.
static const std::string_view WHITESPACE_NO_NEWLINE
ASCII whitespace characters, excluding new-line.
void SkipUntilCharNotIn(std::string_view chars)
Skip 8-bit chars, while they are in 'chars', until they are not.
std::string_view ReadUntilCharIn(std::string_view chars)
Read 8-bit chars, while they are not in 'chars', until they are; and advance reader.
T ReadIntegerBase(int base, T def=0, bool clamp=false)
Read and parse an integer in number 'base', and advance the reader.
std::string_view Read(size_type len)
Read the next 'len' bytes, and advance reader.
static constexpr size_type npos
Special value for "end of data".
Control codes that are embedded in the translation strings.
Function to handling different endian machines.
Error reporting related functions.
static const uint8_t MAX_NUM_GENDERS
Maximum number of supported genders.
static const uint8_t MAX_NUM_CASES
Maximum number of supported cases.
constexpr bool IsInsideBS(const T x, const size_t base, const size_t size)
Checks if a value is between a window started at some base point.
A number of safeguards to prevent using unsafe methods.
Definition of base types and functions in a cross-platform compatible way.
#define lengthof(array)
Return the length of an fixed size array.
Structures related to strgen.
static bool _translated
Whether the current language is not the master language.
static uint32_t VersionHashStr(uint32_t hash, std::string_view s)
Create a compound hash.
Tables of commands for strgen.
static const PluralForm _plural_forms[]
All plural forms used.
@ Gender
These commands support genders.
@ Case
These commands support cases.
@ DontCount
These commands aren't counted for comparison.
Compose strings from textual and binary data.
Functions related to low-level strings.
static const uint TAB_SIZE
Number of strings per StringTab.
Container for the different cases of a string.
Case(uint8_t caseidx, std::string_view string)
Create a new case.
uint8_t caseidx
The index of the case.
std::string string
The translation of the case.
Information about a single string.
size_t line
Line of string in source-file.
std::string english
English text.
std::vector< Case > translated_cases
Cases of the translation.
std::string translated
Translated text.
void FreeTranslation()
Free all data related to the translation.
std::string name
Name of the string.
size_t index
The index in the language file.
LangString(std::string_view name, std::string_view english, size_t index, size_t line)
Create a new string.
virtual void WriteHeader(const LanguagePackHeader *header)=0
Write the header metadata.
virtual void WriteLength(size_t length)
Write the length as a simple gamma.
virtual void Write(std::string_view buffer)=0
Write a number of bytes.
virtual void WriteLang(const StringData &data)
Actually write the language.
Global state shared between strgen.cpp, game_text.cpp and strgen_base.cpp.
LanguagePackHeader lang
Header information about a language.
size_t cur_line
The current line we're parsing in the input file.
Information about the currently known strings.
size_t tabs
The number of 'tabs' of strings.
void Add(std::shared_ptr< LangString > ls)
Add a newly created LangString.
size_t max_strings
The maximum number of strings.
size_t next_string_id
The next string ID to allocate.
void FreeTranslation()
Free all data related to the translation.
LangString * Find(std::string_view s)
Find a LangString based on the string name.
StringData(size_t tabs)
Create a new string data container.
std::unordered_map< std::string, std::shared_ptr< LangString >, StringHash, std::equal_to<> > name_to_string
Lookup table for the strings.
std::vector< std::shared_ptr< LangString > > strings
List of all known strings.
uint32_t Version() const
Make a hash of the file to get a unique "version number".
size_t CountInUse(size_t tab) const
Count the number of tab elements that are in use.
const std::string file
The file we are reading.
StringReader(StringData &data, const std::string &file, bool master, bool translation)
Prepare reading.
StringData & data
The data to fill during reading.
virtual void ParseFile()
Start parsing the file.
bool translation
Are we reading a translation, implies !master. However, the base translation will have this false.
virtual void HandlePragma(std::string_view str, LanguagePackHeader &lang)
Handle the pragma of the file.
virtual std::optional< std::string > ReadLine()=0
Read a single line from the source of strings.
bool master
Are we reading the master file?