From e7e256e18713b3ec31d9b24c932a11233096103e Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Mon, 30 Sep 2024 18:11:40 -0700 Subject: [PATCH 01/37] WIP on function composition --- icu4c/source/i18n/messageformat2.cpp | 179 +++++---- .../source/i18n/messageformat2_evaluation.cpp | 47 ++- icu4c/source/i18n/messageformat2_evaluation.h | 5 + .../i18n/messageformat2_formattable.cpp | 153 ++++++-- .../i18n/messageformat2_function_registry.cpp | 244 +++++-------- ...essageformat2_function_registry_internal.h | 1 + icu4c/source/i18n/unicode/messageformat2.h | 43 ++- .../i18n/unicode/messageformat2_formattable.h | 323 +++++++++-------- .../source/test/intltest/messageformat2test.h | 33 +- .../intltest/messageformat2test_custom.cpp | 340 ++++++++++++++---- .../message2/icu4j/icu-test-functions.json | 2 +- testdata/message2/duplicate-declarations.json | 18 +- testdata/message2/more-functions.json | 29 ++ testdata/message2/tricky-declarations.json | 2 - 14 files changed, 876 insertions(+), 543 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index 73f7fa45e69f..3448470bb9ec 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -39,19 +39,19 @@ static Formattable evalLiteral(const Literal& lit) { str += var; const Formattable* val = context.getGlobal(var, errorCode); if (U_SUCCESS(errorCode)) { - return (FormattedPlaceholder(*val, str)); + return (FormattedPlaceholder(*val, str, errorCode)); } } return {}; } // Returns the contents of the literal -[[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const Literal& lit) const { +[[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const Literal& lit, UErrorCode& errorCode) const { // The fallback for a literal is itself. - return FormattedPlaceholder(evalLiteral(lit), lit.quoted()); + return FormattedPlaceholder(evalLiteral(lit), lit.quoted(), errorCode); } -[[nodiscard]] FormattedPlaceholder MessageFormatter::formatOperand(const Environment& env, +[[nodiscard]] InternalValue MessageFormatter::formatOperand(const Environment& env, const Operand& rand, MessageContext& context, UErrorCode &status) const { @@ -60,7 +60,7 @@ static Formattable evalLiteral(const Literal& lit) { } if (rand.isNull()) { - return FormattedPlaceholder(); + return InternalValue(); } if (rand.isVariable()) { // Check if it's local or global @@ -88,12 +88,12 @@ static Formattable evalLiteral(const Literal& lit) { // https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution UnicodeString str(DOLLAR); str += var; - return FormattedPlaceholder(str); + return InternalValue(str); } - return result; + return InternalValue(std::move(result)); } else { U_ASSERT(rand.isLiteral()); - return formatLiteral(rand.asLiteral()); + return InternalValue(formatLiteral(rand.asLiteral(), status)); } } @@ -114,12 +114,15 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O // Options are fully evaluated before calling the function // Format the operand - FormattedPlaceholder rhsVal = formatOperand(env, v, context, status); + InternalValue rhsVal = formatOperand(env, v, context, status); if (U_FAILURE(status)) { return {}; } if (!rhsVal.isFallback()) { - resolvedOpt.adoptInstead(create(ResolvedFunctionOption(k, rhsVal.asFormattable()), status)); + FormattedPlaceholder optVal = rhsVal.value(); + U_ASSERT(!optVal.isNullOperand()); // Option value can't be absent, syntactically + ResolvedFunctionOption opt(k, std::move(optVal), status); + resolvedOpt.adoptInstead(create(std::move(opt), status)); if (U_FAILURE(status)) { return {}; } @@ -131,20 +134,19 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O } // Overload that dispatches on argument type. Syntax doesn't provide for options in this case. -[[nodiscard]] FormattedPlaceholder MessageFormatter::evalFormatterCall(FormattedPlaceholder&& argument, - MessageContext& context, - UErrorCode& status) const { +[[nodiscard]] InternalValue MessageFormatter::evalFormatterCall(FormattedPlaceholder&& argument, + MessageContext& context, + UErrorCode& status) const { if (U_FAILURE(status)) { return {}; } - // These cases should have been checked for already - U_ASSERT(!argument.isFallback() && !argument.isNullOperand()); - - const Formattable& toFormat = argument.asFormattable(); - switch (toFormat.getType()) { + const Formattable* toFormat = argument.getSource(status); + // Null operand case should have been checked for already + U_ASSERT(U_SUCCESS(status)); + switch (toFormat->getType()) { case UFMT_OBJECT: { - const FormattableObject* obj = toFormat.getObject(status); + const FormattableObject* obj = toFormat->getObject(status); U_ASSERT(U_SUCCESS(status)); U_ASSERT(obj != nullptr); const UnicodeString& type = obj->tag(); @@ -167,25 +169,25 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O } // No formatter for this type, or it's a primitive type (which will be formatted later) // -- just return the argument itself - return std::move(argument); + return InternalValue(std::move(argument)); } // Overload that dispatches on function name -[[nodiscard]] FormattedPlaceholder MessageFormatter::evalFormatterCall(const FunctionName& functionName, - FormattedPlaceholder&& argument, - FunctionOptions&& options, - MessageContext& context, - UErrorCode& status) const { +[[nodiscard]] InternalValue MessageFormatter::evalFormatterCall(const FunctionName& functionName, + FormattedPlaceholder&& argument, + FunctionOptions&& options, + MessageContext& context, + UErrorCode& status) const { if (U_FAILURE(status)) { return {}; } DynamicErrors& errs = context.getErrors(); - UnicodeString fallback(COLON); - fallback += functionName; - if (!argument.isNullOperand()) { - fallback = argument.fallback; + UnicodeString fallback = argument.getFallback(); + if (argument.isNullOperand()) { + fallback = UnicodeString(COLON); + fallback += functionName; } if (isFormatter(functionName)) { @@ -194,12 +196,12 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O if (status == U_MF_FORMATTING_ERROR) { errs.setFormattingError(functionName, status); status = U_ZERO_ERROR; - return {}; + return InternalValue(fallback); } if (status == U_MF_UNKNOWN_FUNCTION_ERROR) { errs.setUnknownFunction(functionName, status); status = U_ZERO_ERROR; - return {}; + return InternalValue(fallback); } // Other errors are non-recoverable return {}; @@ -220,7 +222,7 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O // as a formatting error, except for operand mismatch errors errs.setFormattingError(functionName, status); } - return FormattedPlaceholder(fallback); + return InternalValue(fallback); } else { // Ignore warnings status = savedStatus; @@ -228,9 +230,9 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O } // Ignore the output if any errors occurred if (errs.hasFormattingError()) { - return FormattedPlaceholder(fallback); + return InternalValue(fallback); } - return result; + return InternalValue(std::move(result)); } // No formatter with this name -- set error if (isSelector(functionName)) { @@ -238,21 +240,21 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O } else { errs.setUnknownFunction(functionName, status); } - return FormattedPlaceholder(fallback); + return InternalValue(fallback); } // Formats an expression using `globalEnv` for the values of variables -[[nodiscard]] FormattedPlaceholder MessageFormatter::formatExpression(const Environment& globalEnv, - const Expression& expr, - MessageContext& context, - UErrorCode &status) const { +[[nodiscard]] InternalValue MessageFormatter::formatExpression(const Environment& globalEnv, + const Expression& expr, + MessageContext& context, + UErrorCode &status) const { if (U_FAILURE(status)) { return {}; } const Operand& rand = expr.getOperand(); // Format the operand (formatOperand handles the case of a null operand) - FormattedPlaceholder randVal = formatOperand(globalEnv, rand, context, status); + InternalValue randVal = formatOperand(globalEnv, rand, context, status); // Don't call the function on error values if (randVal.isFallback()) { @@ -261,7 +263,7 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O if (!expr.isFunctionCall()) { // Dispatch based on type of `randVal` - return evalFormatterCall(std::move(randVal), + return evalFormatterCall(randVal.value(), context, status); } else { @@ -273,16 +275,8 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status); // Call the formatter function - // The fallback for a nullary function call is the function name - UnicodeString fallback; - if (rand.isNull()) { - fallback = UnicodeString(COLON); - fallback += functionName; - } else { - fallback = randVal.fallback; - } return evalFormatterCall(functionName, - std::move(randVal), + randVal.value(), std::move(resolvedOptions), context, status); @@ -301,19 +295,25 @@ void MessageFormatter::formatPattern(MessageContext& context, const Environment& // Markup is ignored } else { // Format the expression - FormattedPlaceholder partVal = formatExpression(globalEnv, part.contents(), context, status); - // Force full evaluation, e.g. applying default formatters to - // unformatted input (or formatting numbers as strings) - UnicodeString partResult = partVal.formatToString(locale, status); - result += partResult; - // Handle formatting errors. `formatToString()` can't take a context and thus can't - // register an error directly - if (status == U_MF_FORMATTING_ERROR) { - status = U_ZERO_ERROR; - // TODO: The name of the formatter that failed is unavailable. - // Not ideal, but it's hard for `formatToString()` - // to pass along more detailed diagnostics - context.getErrors().setFormattingError(status); + InternalValue partVal = formatExpression(globalEnv, part.contents(), context, status); + if (partVal.isFallback()) { + result += LEFT_CURLY_BRACE; + result += partVal.asFallback(); + result += RIGHT_CURLY_BRACE; + } else { + // Force full evaluation, e.g. applying default formatters to + // unformatted input (or formatting numbers as strings) + UnicodeString partResult = partVal.value().formatToString(locale, status); + result += partResult; + // Handle formatting errors. `formatToString()` can't take a context and thus can't + // register an error directly + if (status == U_MF_FORMATTING_ERROR) { + status = U_ZERO_ERROR; + // TODO: The name of the formatter that failed is unavailable. + // Not ideal, but it's hard for `formatToString()` + // to pass along more detailed diagnostics + context.getErrors().setFormattingError(status); + } } } } @@ -347,7 +347,7 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme #if U_DEBUG const DynamicErrors& err = context.getErrors(); U_ASSERT(err.hasError()); - U_ASSERT(rv.argument().isFallback()); + U_ASSERT(rv.isFallback()); #endif } // 2ii(a). Append rv as the last element of the list res. @@ -616,7 +616,7 @@ ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, cons } if (rand.isLiteral()) { - return ResolvedSelector(formatLiteral(rand.asLiteral())); + return ResolvedSelector(formatLiteral(rand.asLiteral(), status)); } // Must be variable @@ -635,7 +635,7 @@ ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, cons status = U_ZERO_ERROR; // Unresolved variable -- could be a previous warning. Nothing to resolve U_ASSERT(context.getErrors().hasUnresolvedVariableError()); - return ResolvedSelector(FormattedPlaceholder(var)); + return ResolvedSelector(var); } // Pass through other errors return ResolvedSelector(std::move(val)); @@ -661,9 +661,12 @@ ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, auto selector = getSelector(context, selectorName, status); if (U_SUCCESS(status)) { FunctionOptions resolvedOptions = resolveOptions(env, rator->getOptionsInternal(), context, status); - // Operand may be the null argument, but resolveVariables() handles that - FormattedPlaceholder argument = formatOperand(env, expr.getOperand(), context, status); - return ResolvedSelector(selectorName, selector, std::move(resolvedOptions), std::move(argument)); + InternalValue argument = formatOperand(env, expr.getOperand(), context, status); + if (argument.isFallback()) { + return ResolvedSelector(argument.asFallback()); + } else { + return ResolvedSelector(selectorName, selector, std::move(resolvedOptions), argument.value()); + } } } else if (isFormatter(selectorName)) { context.getErrors().setSelectorError(selectorName, status); @@ -674,9 +677,14 @@ ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, UnicodeString fallback(COLON); fallback += selectorName; if (!expr.getOperand().isNull()) { - fallback = formatOperand(env, expr.getOperand(), context, status).fallback; + InternalValue randVal = formatOperand(env, expr.getOperand(), context, status); + if (randVal.isFallback()) { + fallback = randVal.asFallback(); + } else { + fallback = randVal.value().getFallback(); + } } - return ResolvedSelector(FormattedPlaceholder(fallback)); + return ResolvedSelector(fallback); } else { // Might be a variable reference, so expand one more level of variable return resolveVariables(env, expr.getOperand(), context, status); @@ -695,23 +703,12 @@ ResolvedSelector MessageFormatter::formatSelectorExpression(const Environment& g // If there is a selector, then `resolveVariables()` recorded it in the context if (exprResult.hasSelector()) { - // Check if there was an error - if (exprResult.argument().isFallback()) { - // Use a null expression if it's a syntax or data model warning; - // create a valid (non-fallback) formatted placeholder from the - // fallback string otherwise - if (err.hasSyntaxError() || err.hasDataModelError()) { - return ResolvedSelector(FormattedPlaceholder()); // Null operand - } else { - return ResolvedSelector(exprResult.takeArgument()); - } - } return exprResult; } // No selector was found; error should already have been set - U_ASSERT(err.hasMissingSelectorAnnotationError() || err.hasUnknownFunctionError() || err.hasSelectorError()); - return ResolvedSelector(FormattedPlaceholder(exprResult.argument().fallback)); + U_ASSERT(err.count() > 0); + return ResolvedSelector(exprResult.getFallback()); } void MessageFormatter::formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const { @@ -770,19 +767,17 @@ UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments checkDeclarations(context, env, status); LocalPointer globalEnv(env); + DynamicErrors& err = context.getErrors(); UnicodeString result; - if (dataModel.hasPattern()) { - formatPattern(context, *globalEnv, dataModel.getPattern(), status, result); - } else { - // Check for errors/warnings -- if so, then the result of pattern selection is the fallback value - // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection - const DynamicErrors& err = context.getErrors(); - if (err.hasSyntaxError() || err.hasDataModelError()) { - result += REPLACEMENT; + + if (!(err.hasSyntaxError() || err.hasDataModelError())) { + if (dataModel.hasPattern()) { + formatPattern(context, *globalEnv, dataModel.getPattern(), status, result); } else { formatSelectors(context, *globalEnv, status, result); } } + // Update status according to all errors seen while formatting if (signalErrors) { context.checkErrors(status); diff --git a/icu4c/source/i18n/messageformat2_evaluation.cpp b/icu4c/source/i18n/messageformat2_evaluation.cpp index 41e4c9a8020a..e795c46434d2 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.cpp +++ b/icu4c/source/i18n/messageformat2_evaluation.cpp @@ -28,6 +28,19 @@ ResolvedFunctionOption::ResolvedFunctionOption(ResolvedFunctionOption&& other) { value = std::move(other.value); } +ResolvedFunctionOption::ResolvedFunctionOption(const UnicodeString& n, + FormattedPlaceholder&& v, + UErrorCode& status) { + CHECK_ERROR(status); + + name = n; + LocalPointer + temp(create(std::move(v), status)); + if (U_SUCCESS(status)) { + value.adoptInstead(temp.orphan()); + } +} + ResolvedFunctionOption::~ResolvedFunctionOption() {} @@ -44,27 +57,31 @@ FunctionOptions::FunctionOptions(UVector&& optionsVector, UErrorCode& status) { options = moveVectorToArray(optionsVector, status); } -UBool FunctionOptions::getFunctionOption(const UnicodeString& key, Formattable& option) const { +const FormattedPlaceholder* +FunctionOptions::getFunctionOption(const UnicodeString& key, + UErrorCode& status) const { if (options == nullptr) { U_ASSERT(functionOptionsLen == 0); } for (int32_t i = 0; i < functionOptionsLen; i++) { const ResolvedFunctionOption& opt = options[i]; if (opt.getName() == key) { - option = opt.getValue(); - return true; + return opt.getValue(); } } - return false; + status = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; } UnicodeString FunctionOptions::getStringFunctionOption(const UnicodeString& key) const { - Formattable option; - if (getFunctionOption(key, option)) { - if (option.getType() == UFMT_STRING) { - UErrorCode localErrorCode = U_ZERO_ERROR; - UnicodeString val = option.getString(localErrorCode); - U_ASSERT(U_SUCCESS(localErrorCode)); + UErrorCode localStatus = U_ZERO_ERROR; + const FormattedPlaceholder* option = getFunctionOption(key, localStatus); + if (U_SUCCESS(localStatus)) { + const Formattable* source = option->getSource(localStatus); + // Null operand should never appear as an option value + U_ASSERT(U_SUCCESS(localStatus)); + UnicodeString val = source->getString(localStatus); + if (U_SUCCESS(localStatus)) { return val; } } @@ -94,6 +111,8 @@ FunctionOptions::~FunctionOptions() { // ResolvedSelector // ---------------- +ResolvedSelector::ResolvedSelector(const UnicodeString& fb) : selector(nullptr), fallback(fb) {} + ResolvedSelector::ResolvedSelector(const FunctionName& fn, Selector* sel, FunctionOptions&& opts, @@ -106,9 +125,15 @@ ResolvedSelector::ResolvedSelector(FormattedPlaceholder&& val) : value(std::move ResolvedSelector& ResolvedSelector::operator=(ResolvedSelector&& other) noexcept { selectorName = std::move(other.selectorName); - selector.adoptInstead(other.selector.orphan()); + if (other.selector.isValid()) { + selector.adoptInstead(other.selector.orphan()); + other.selector.adoptInstead(nullptr); + } else { + selector.adoptInstead(nullptr); + } options = std::move(other.options); value = std::move(other.value); + fallback = std::move(other.fallback); return *this; } diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index b8ae0242367d..82655cf7149b 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -74,6 +74,8 @@ namespace message2 { FormattedPlaceholder&& value); // Used either for errors, or when selector isn't yet known explicit ResolvedSelector(FormattedPlaceholder&& value); + // Used for fallback values + explicit ResolvedSelector(const UnicodeString& fb); bool hasSelector() const { return selector.isValid(); } const FormattedPlaceholder& argument() const { return value; } FormattedPlaceholder&& takeArgument() { return std::move(value); } @@ -88,11 +90,14 @@ namespace message2 { virtual ~ResolvedSelector(); ResolvedSelector& operator=(ResolvedSelector&&) noexcept; ResolvedSelector(ResolvedSelector&&); + bool isFallback() const { return !fallback.isEmpty(); } + const UnicodeString& getFallback() const { return fallback; } private: FunctionName selectorName; // For error reporting LocalPointer selector; FunctionOptions options; FormattedPlaceholder value; + UnicodeString fallback; // Non-empty if this is a fallback }; // class ResolvedSelector // Closures and environments diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index 3152ccb44fd8..12d2284c11b5 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -9,6 +9,7 @@ #include "unicode/messageformat2_formattable.h" #include "unicode/smpdtfmt.h" +#include "messageformat2_allocation.h" #include "messageformat2_macros.h" #include "limits.h" @@ -17,17 +18,6 @@ U_NAMESPACE_BEGIN namespace message2 { - // Fallback values are enclosed in curly braces; - // see https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#formatting-fallback-values - - static UnicodeString fallbackToString(const UnicodeString& s) { - UnicodeString result; - result += LEFT_CURLY_BRACE; - result += s; - result += RIGHT_CURLY_BRACE; - return result; - } - Formattable& Formattable::operator=(Formattable other) noexcept { swap(*this, other); return *this; @@ -179,19 +169,113 @@ namespace message2 { FormattedValue::~FormattedValue() {} + // FormattedPlaceholder + // ------------------- + + FormattedPlaceholder& FormattedPlaceholder::operator=(FormattedPlaceholder&& other) noexcept { type = other.type; source = other.source; if (type == kEvaluated) { formatted = std::move(other.formatted); - previousOptions = std::move(other.previousOptions); + } + if (other.previousOptions != nullptr) { + previousOptions = other.previousOptions; + other.previousOptions = nullptr; + } else { + previousOptions = nullptr; } fallback = other.fallback; return *this; } - const Formattable& FormattedPlaceholder::asFormattable() const { - return source; + const Formattable* FormattedPlaceholder::getSource(UErrorCode& errorCode) const { + if (U_SUCCESS(errorCode)) { + if (type != kNull) { + return &source; + } else { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + } + } + return nullptr; + } + + FormattedPlaceholder FormattedPlaceholder::withResult(FormattedValue&& result) { + formatted = std::move(result); + type = kEvaluated; + return std::move(*this); + } + + FormattedPlaceholder FormattedPlaceholder::withResultAndOptions(FormattedValue&& result, + FunctionOptions&& opts, + UErrorCode& status) { + if (U_FAILURE(status)) { + return {}; + } + formatted = std::move(result); + type = kEvaluated; + delete previousOptions; + previousOptions = create(std::move(opts), status); + if (U_FAILURE(status)) { + previousOptions = nullptr; + return {}; + } + return std::move(*this); + } + + FormattedPlaceholder::FormattedPlaceholder(const FormattedPlaceholder& input, + FunctionOptions&& opts, + FormattedValue&& output, + UErrorCode& status) + : fallback(input.fallback), + source(input.source), + formatted(std::move(output)), + previousOptions(nullptr), + type(kEvaluated) { + CHECK_ERROR(status); + + LocalPointer temp(create(std::move(opts), status)); + CHECK_ERROR(status); + previousOptions = temp.orphan(); + } + + FormattedPlaceholder::FormattedPlaceholder(const FormattedPlaceholder& input, + FormattedValue&& output, + UErrorCode& status) + : fallback(input.fallback), + source(input.source), + formatted(std::move(output)), + type(kEvaluated) { + initOptions(status); + } + + FormattedPlaceholder::FormattedPlaceholder(const Formattable& input, + const UnicodeString& fb, + UErrorCode& status) + : fallback(fb), source(input), type(kUnevaluated) { + initOptions(status); + } + + FormattedPlaceholder::FormattedPlaceholder() : type(kNull) { + previousOptions = nullptr; + } + + void FormattedPlaceholder::initOptions(UErrorCode& status) { + LocalPointer temp(create(FunctionOptions(), status)); + CHECK_ERROR(status); + previousOptions = temp.orphan(); + } + + const message2::FunctionOptions& FormattedPlaceholder::getOptions() const { + U_ASSERT(previousOptions != nullptr); + return *previousOptions; + } + + FormattedPlaceholder::~FormattedPlaceholder() { + if (previousOptions != nullptr) { + delete previousOptions; + previousOptions = nullptr; + } } // Default formatters @@ -239,50 +323,53 @@ namespace message2 { return {}; } - const Formattable& toFormat = input.asFormattable(); + const Formattable* toFormat = input.getSource(status); + U_ASSERT(U_SUCCESS(status)); // Shouldn't get called on a null argument // Try as decimal number first - if (toFormat.isNumeric()) { + if (toFormat->isNumeric()) { // Note: the ICU Formattable has to be created here since the StringPiece // refers to state inside the Formattable; so otherwise we'll have a reference // to a temporary object - icu::Formattable icuFormattable = toFormat.asICUFormattable(status); + icu::Formattable icuFormattable = toFormat->asICUFormattable(status); StringPiece asDecimal = icuFormattable.getDecimalNumber(status); if (U_FAILURE(status)) { return {}; } if (asDecimal != nullptr) { - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, asDecimal, status))); + return FormattedPlaceholder(input, + FormattedValue(formatNumberWithDefaults(locale, asDecimal, status)), + status); } } - UFormattableType type = toFormat.getType(); + UFormattableType type = toFormat->getType(); switch (type) { case UFMT_DATE: { UnicodeString result; - UDate d = toFormat.getDate(status); + UDate d = toFormat->getDate(status); U_ASSERT(U_SUCCESS(status)); formatDateWithDefaults(locale, d, result, status); - return FormattedPlaceholder(input, FormattedValue(std::move(result))); + return FormattedPlaceholder(input, FormattedValue(std::move(result)), status); } case UFMT_DOUBLE: { - double d = toFormat.getDouble(status); + double d = toFormat->getDouble(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, d, status))); + return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, d, status)), status); } case UFMT_LONG: { - int32_t l = toFormat.getLong(status); + int32_t l = toFormat->getLong(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, l, status))); + return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, l, status)), status); } case UFMT_INT64: { - int64_t i = toFormat.getInt64Value(status); + int64_t i = toFormat->getInt64Value(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, i, status))); + return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, i, status)), status); } case UFMT_STRING: { - const UnicodeString& s = toFormat.getString(status); + const UnicodeString& s = toFormat->getString(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(UnicodeString(s))); + return FormattedPlaceholder(input, FormattedValue(UnicodeString(s)), status); } default: { // No default formatters for other types; use fallback @@ -290,7 +377,7 @@ namespace message2 { // Note: it would be better to set an internal formatting error so that a string // (e.g. the type tag) can be provided. However, this method is called by the // public method formatToString() and thus can't take a MessageContext - return FormattedPlaceholder(input.getFallback()); + return {}; } } } @@ -302,9 +389,6 @@ namespace message2 { if (U_FAILURE(status)) { return {}; } - if (isFallback() || isNullOperand()) { - return fallbackToString(fallback); - } // Evaluated value: either just return the string, or format the number // as a string and return it @@ -319,8 +403,7 @@ namespace message2 { UErrorCode savedStatus = status; FormattedPlaceholder evaluated = formatWithDefaults(locale, *this, status); if (status == U_MF_FORMATTING_ERROR) { - U_ASSERT(evaluated.isFallback()); - return evaluated.getFallback(); + return {}; } // Ignore U_USING_DEFAULT_WARNING if (status == U_USING_DEFAULT_WARNING) { diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 17955760ecfb..224c06df841f 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -250,6 +250,23 @@ MFFunctionRegistry::~MFFunctionRegistry() { cleanup(); } +/* static */ UnicodeString +StandardFunctions::getStringOption(const FunctionOptions& opts, + const UnicodeString& key, + UErrorCode& status) { + const FormattedPlaceholder* optionVal = opts.getFunctionOption(key, status); + EMPTY_ON_ERROR(status); + + const Formattable* optionSrc = optionVal->getSource(status); + // Null operand should never appear as an option value + U_ASSERT(U_SUCCESS(status)); + + const UnicodeString& result = optionSrc->getString(status); + EMPTY_ON_ERROR(status); + + return result; +} + // Specific formatter implementations // --------- Number @@ -344,7 +361,9 @@ MFFunctionRegistry::~MFFunctionRegistry() { // All other options apply to both `:number` and `:integer` int32_t minIntegerDigits = number.minimumIntegerDigits(opts); - nf = nf.integerWidth(IntegerWidth::zeroFillTo(minIntegerDigits)); + if (minIntegerDigits != -1) { + nf = nf.integerWidth(IntegerWidth::zeroFillTo(minIntegerDigits)); + } // signDisplay UnicodeString sd = opts.getStringFunctionOption(UnicodeString("signDisplay")); @@ -420,19 +439,11 @@ Formatter* StandardFunctions::IntegerFactory::createFormatter(const Locale& loca StandardFunctions::IntegerFactory::~IntegerFactory() {} -static FormattedPlaceholder notANumber(const FormattedPlaceholder& input) { - return FormattedPlaceholder(input, FormattedValue(UnicodeString("NaN"))); +static FormattedPlaceholder notANumber(const FormattedPlaceholder& input, UErrorCode& status) { + return FormattedPlaceholder(input, FormattedValue(UnicodeString("NaN")), status); } -static double parseNumberLiteral(const FormattedPlaceholder& input, UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return {}; - } - - // Copying string to avoid GCC dangling-reference warning - // (although the reference is safe) - UnicodeString inputStr = input.asFormattable().getString(errorCode); - // Precondition: `input`'s source Formattable has type string +static double parseNumberLiteral(const UnicodeString& inputStr, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return {}; } @@ -463,10 +474,13 @@ static double parseNumberLiteral(const FormattedPlaceholder& input, UErrorCode& return result; } -static FormattedPlaceholder tryParsingNumberLiteral(const number::LocalizedNumberFormatter& nf, const FormattedPlaceholder& input, UErrorCode& errorCode) { +static FormattedPlaceholder tryParsingNumberLiteral(const number::LocalizedNumberFormatter& nf, + FormattedPlaceholder&& arg, + const UnicodeString& input, + UErrorCode& errorCode) { double numberValue = parseNumberLiteral(input, errorCode); if (U_FAILURE(errorCode)) { - return notANumber(input); + return {}; } UErrorCode savedStatus = errorCode; @@ -475,20 +489,19 @@ static FormattedPlaceholder tryParsingNumberLiteral(const number::LocalizedNumbe if (errorCode == U_USING_DEFAULT_WARNING) { errorCode = savedStatus; } - return FormattedPlaceholder(input, FormattedValue(std::move(result))); + return arg.withResult(FormattedValue(std::move(result))); } -int32_t StandardFunctions::Number::maximumFractionDigits(const FunctionOptions& opts) const { - Formattable opt; - - if (isInteger) { - return 0; - } - - if (opts.getFunctionOption(UnicodeString("maximumFractionDigits"), opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - int64_t val = getInt64Value(locale, opt, localErrorCode); - if (U_SUCCESS(localErrorCode)) { +int32_t StandardFunctions::Number::digitSizeOption(const FunctionOptions& opts, + const UnicodeString& k) const { + UErrorCode localStatus = U_ZERO_ERROR; + const FormattedPlaceholder* opt = opts.getFunctionOption(k, + localStatus); + if (U_SUCCESS(localStatus)) { + const Formattable* src = opt->getSource(localStatus); + U_ASSERT(U_SUCCESS(localStatus)); // null shouldn't appear as an option value + int64_t val = getInt64Value(locale, *src, localStatus); + if (U_SUCCESS(localStatus)) { return static_cast(val); } } @@ -498,81 +511,43 @@ int32_t StandardFunctions::Number::maximumFractionDigits(const FunctionOptions& return -1; } +int32_t StandardFunctions::Number::maximumFractionDigits(const FunctionOptions& opts) const { + if (isInteger) { + return 0; + } + + return digitSizeOption(opts, UnicodeString("maximumFractionDigits")); +} + int32_t StandardFunctions::Number::minimumFractionDigits(const FunctionOptions& opts) const { Formattable opt; - if (!isInteger) { - if (opts.getFunctionOption(UnicodeString("minimumFractionDigits"), opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - int64_t val = getInt64Value(locale, opt, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return static_cast(val); - } - } + if (isInteger) { + return -1; } - // Returning -1 indicates that the option wasn't provided or was a non-integer. - // The caller needs to check for that case, since passing -1 to Precision::minFraction() - // is an error. - return -1; + return digitSizeOption(opts, UnicodeString("minimumFractionDigits")); } int32_t StandardFunctions::Number::minimumIntegerDigits(const FunctionOptions& opts) const { - Formattable opt; - - if (opts.getFunctionOption(UnicodeString("minimumIntegerDigits"), opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - int64_t val = getInt64Value(locale, opt, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return static_cast(val); - } - } - return 0; + return digitSizeOption(opts, UnicodeString("minimumIntegerDigits")); } int32_t StandardFunctions::Number::minimumSignificantDigits(const FunctionOptions& opts) const { - Formattable opt; - - if (!isInteger) { - if (opts.getFunctionOption(UnicodeString("minimumSignificantDigits"), opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - int64_t val = getInt64Value(locale, opt, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return static_cast(val); - } - } + if (isInteger) { + return -1; } - // Returning -1 indicates that the option wasn't provided or was a non-integer. - // The caller needs to check for that case, since passing -1 to Precision::minSignificantDigits() - // is an error. - return -1; + return digitSizeOption(opts, UnicodeString("minimumSignificantDigits")); } int32_t StandardFunctions::Number::maximumSignificantDigits(const FunctionOptions& opts) const { - Formattable opt; - - if (opts.getFunctionOption(UnicodeString("maximumSignificantDigits"), opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - int64_t val = getInt64Value(locale, opt, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return static_cast(val); - } - } - // Returning -1 indicates that the option wasn't provided or was a non-integer. - // The caller needs to check for that case, since passing -1 to Precision::maxSignificantDigits() - // is an error. - return -1; // Not a valid value for Precision; has to be checked + return digitSizeOption(opts, UnicodeString("maximumSignificantDigits")); } bool StandardFunctions::Number::usePercent(const FunctionOptions& opts) const { - Formattable opt; - if (isInteger - || !opts.getFunctionOption(UnicodeString("style"), opt) - || opt.getType() != UFMT_STRING) { + const UnicodeString& style = opts.getStringFunctionOption(UnicodeString("style")); + if (isInteger || style.length() == 0) { return false; } - UErrorCode localErrorCode = U_ZERO_ERROR; - const UnicodeString& style = opt.getString(localErrorCode); - U_ASSERT(U_SUCCESS(localErrorCode)); return (style == UnicodeString("percent")); } @@ -585,51 +560,52 @@ FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& ar return {}; } - // No argument => return "NaN" - if (!arg.canFormat()) { - errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return notANumber(arg); - } - number::LocalizedNumberFormatter realFormatter; realFormatter = formatterForOptions(*this, opts, errorCode); number::FormattedNumber numberResult; if (U_SUCCESS(errorCode)) { - // Already checked that contents can be formatted - const Formattable& toFormat = arg.asFormattable(); - switch (toFormat.getType()) { + const Formattable* toFormat = arg.getSource(errorCode); + if (U_FAILURE(errorCode)) { + // number must take an argument + errorCode = U_MF_OPERAND_MISMATCH_ERROR; + return {}; + } + switch (toFormat->getType()) { case UFMT_DOUBLE: { - double d = toFormat.getDouble(errorCode); + double d = toFormat->getDouble(errorCode); U_ASSERT(U_SUCCESS(errorCode)); numberResult = realFormatter.formatDouble(d, errorCode); break; } case UFMT_LONG: { - int32_t l = toFormat.getLong(errorCode); + int32_t l = toFormat->getLong(errorCode); U_ASSERT(U_SUCCESS(errorCode)); numberResult = realFormatter.formatInt(l, errorCode); break; } case UFMT_INT64: { - int64_t i = toFormat.getInt64(errorCode); + int64_t i = toFormat->getInt64(errorCode); U_ASSERT(U_SUCCESS(errorCode)); numberResult = realFormatter.formatInt(i, errorCode); break; } case UFMT_STRING: { // Try to parse the string as a number - return tryParsingNumberLiteral(realFormatter, arg, errorCode); + return tryParsingNumberLiteral(realFormatter, + std::move(arg), + toFormat->getString(errorCode), + errorCode); } default: { // Other types can't be parsed as a number errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return notANumber(arg); + return notANumber(arg, errorCode); } } } - return FormattedPlaceholder(arg, FormattedValue(std::move(numberResult))); + return FormattedPlaceholder(arg, FormattedValue(std::move(numberResult)), errorCode); } StandardFunctions::Number::~Number() {} @@ -639,18 +615,14 @@ StandardFunctions::NumberFactory::~NumberFactory() {} StandardFunctions::Plural::PluralType StandardFunctions::Plural::pluralType(const FunctionOptions& opts) const { - Formattable opt; + const UnicodeString& select = opts.getStringFunctionOption(UnicodeString("select")); - if (opts.getFunctionOption(UnicodeString("select"), opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - UnicodeString val = opt.getString(localErrorCode); - if (U_SUCCESS(localErrorCode)) { - if (val == UnicodeString("ordinal")) { - return PluralType::PLURAL_ORDINAL; - } - if (val == UnicodeString("exact")) { - return PluralType::PLURAL_EXACT; - } + if (select.length() > 0) { + if (select == UnicodeString("ordinal")) { + return PluralType::PLURAL_ORDINAL; + } + if (select == UnicodeString("exact")) { + return PluralType::PLURAL_EXACT; } } return PluralType::PLURAL_CARDINAL; @@ -681,12 +653,6 @@ void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, UErrorCode& errorCode) const { CHECK_ERROR(errorCode); - // No argument => return "NaN" - if (!toFormat.canFormat()) { - errorCode = U_MF_SELECTOR_ERROR; - return; - } - // Handle any formatting options PluralType type = pluralType(opts); FormattedPlaceholder resolvedSelector = numberFormatter->format(std::move(toFormat), @@ -800,21 +766,6 @@ StandardFunctions::PluralFactory::~PluralFactory() {} // --------- DateTimeFactory -/* static */ UnicodeString StandardFunctions::getStringOption(const FunctionOptions& opts, - const UnicodeString& optionName, - UErrorCode& errorCode) { - if (U_SUCCESS(errorCode)) { - Formattable opt; - if (opts.getFunctionOption(optionName, opt)) { - return opt.getString(errorCode); // In case it's not a string, error code will be set - } else { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - } - } - // Default is empty string - return {}; -} - // Date/time options only static UnicodeString defaultForOption(const UnicodeString& optionName) { if (optionName == UnicodeString("dateStyle") @@ -845,7 +796,7 @@ UnicodeString StandardFunctions::DateTime::getFunctionOption(const FormattedPlac } // Next try the set of options used to construct `toFormat` localErrorCode = U_ZERO_ERROR; - s = getStringOption(toFormat.options(), optionName, localErrorCode); + s = getStringOption(toFormat.getOptions(), optionName, localErrorCode); if (U_SUCCESS(localErrorCode)) { return s; } @@ -869,7 +820,7 @@ UnicodeString StandardFunctions::DateTime::getFunctionOption(const FormattedPlac } // Next try the set of options used to construct `toFormat` localErrorCode = U_ZERO_ERROR; - s = getStringOption(toFormat.options(), optionName, localErrorCode); + s = getStringOption(toFormat.getOptions(), optionName, localErrorCode); if (U_SUCCESS(localErrorCode)) { return s; } @@ -947,11 +898,11 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& if (U_FAILURE(errorCode)) { return {}; } - - // Argument must be present - if (!toFormat.canFormat()) { + const Formattable* source = toFormat.getSource(errorCode); + // Function requires an operand + if (U_FAILURE(errorCode)) { errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return std::move(toFormat); + return {}; } LocalPointer df; @@ -964,8 +915,10 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& UnicodeString timeStyleName("timeStyle"); UnicodeString styleName("style"); - bool hasDateStyleOption = opts.getFunctionOption(dateStyleName, opt); - bool hasTimeStyleOption = opts.getFunctionOption(timeStyleName, opt); + UnicodeString dateStyleOption = opts.getStringFunctionOption(dateStyleName); + UnicodeString timeStyleOption = opts.getStringFunctionOption(timeStyleName); + bool hasDateStyleOption = dateStyleOption.length() > 0; + bool hasTimeStyleOption = dateStyleOption.length() > 0; bool noOptions = opts.optionsCount() == 0; bool useStyle = (type == DateTimeFactory::DateTimeType::DateTime @@ -1124,10 +1077,9 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } UnicodeString result; - const Formattable& source = toFormat.asFormattable(); - switch (source.getType()) { + switch (source->getType()) { case UFMT_STRING: { - const UnicodeString& sourceStr = source.getString(errorCode); + const UnicodeString& sourceStr = source->getString(errorCode); U_ASSERT(U_SUCCESS(errorCode)); // Pattern for ISO 8601 format - datetime UnicodeString pattern("YYYY-MM-dd'T'HH:mm:ss"); @@ -1155,13 +1107,13 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& // in the returned FormattedPlaceholder; this is necessary // so the date can be re-formatted toFormat = FormattedPlaceholder(message2::Formattable::forDate(d), - toFormat.getFallback()); + toFormat.getFallback(), errorCode); df->format(d, result, 0, errorCode); } break; } case UFMT_DATE: { - df->format(source.asICUFormattable(errorCode), result, 0, errorCode); + df->format(source->asICUFormattable(errorCode), result, 0, errorCode); if (U_FAILURE(errorCode)) { if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { errorCode = U_MF_OPERAND_MISMATCH_ERROR; @@ -1178,7 +1130,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& if (U_FAILURE(errorCode)) { return {}; } - return FormattedPlaceholder(toFormat, std::move(opts), FormattedValue(std::move(result))); + return FormattedPlaceholder(toFormat, std::move(opts), FormattedValue(std::move(result)), errorCode); } StandardFunctions::DateTimeFactory::~DateTimeFactory() {} @@ -1209,12 +1161,6 @@ void StandardFunctions::TextSelector::selectKey(FormattedPlaceholder&& toFormat, // Just compares the key and value as strings - // Argument must be present - if (!toFormat.canFormat()) { - errorCode = U_MF_SELECTOR_ERROR; - return; - } - prefsLen = 0; // Convert to string diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index 733fc5e945d5..8c3d29a1f06e 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -122,6 +122,7 @@ namespace message2 { static Number integer(const Locale& loc); // These options have their own accessor methods, since they have different default values. + int32_t digitSizeOption(const FunctionOptions&, const UnicodeString&) const; int32_t maximumFractionDigits(const FunctionOptions& options) const; int32_t minimumFractionDigits(const FunctionOptions& options) const; int32_t minimumSignificantDigits(const FunctionOptions& options) const; diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index c5459f042f40..0e2c9e0ccdaf 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -33,6 +33,27 @@ namespace message2 { class ResolvedSelector; class StaticErrors; + // Internal use only + // None = null operand + // String = fallback value + // FormattedPlaceholder = non-error value + class InternalValue : public UObject { + public: + bool isFallback() const { return !fallbackString.isEmpty(); } + InternalValue() : fallbackString("") {} + // Fallback constructor + explicit InternalValue(UnicodeString fb) : fallbackString(fb) {} + // Regular value constructor + explicit InternalValue(FormattedPlaceholder&& f) + : fallbackString(""), val(std::move(f)) {} + FormattedPlaceholder value() { return std::move(val); } + UnicodeString asFallback() const { return fallbackString; } + private: + UnicodeString fallbackString; // Non-empty if fallback + // Otherwise, assumed to be a FormattedPlaceholder + FormattedPlaceholder val; + }; // class InternalValue + /** *

MessageFormatter is a Technical Preview API implementing MessageFormat 2.0. * @@ -352,25 +373,25 @@ namespace message2 { void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const; // Formatting methods - [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const; + [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&, UErrorCode&) const; void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; // Formats a call to a formatting function // Dispatches on argument type - [[nodiscard]] FormattedPlaceholder evalFormatterCall(FormattedPlaceholder&& argument, - MessageContext& context, - UErrorCode& status) const; + [[nodiscard]] InternalValue evalFormatterCall(FormattedPlaceholder&& argument, + MessageContext& context, + UErrorCode& status) const; // Dispatches on function name - [[nodiscard]] FormattedPlaceholder evalFormatterCall(const FunctionName& functionName, - FormattedPlaceholder&& argument, - FunctionOptions&& options, - MessageContext& context, - UErrorCode& status) const; + [[nodiscard]] InternalValue evalFormatterCall(const FunctionName& functionName, + FormattedPlaceholder&& argument, + FunctionOptions&& options, + MessageContext& context, + UErrorCode& status) const; // Formats an expression that appears as a selector ResolvedSelector formatSelectorExpression(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode&) const; // Formats an expression that appears in a pattern or as the definition of a local variable - [[nodiscard]] FormattedPlaceholder formatExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const; + [[nodiscard]] InternalValue formatExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const; [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const; - [[nodiscard]] FormattedPlaceholder formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const; + [[nodiscard]] InternalValue formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const; [[nodiscard]] FormattedPlaceholder evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const; void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const; diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index 8a779adb9ab3..ed396dd87fa6 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -443,22 +443,31 @@ namespace message2 { * a single named function option. It pairs the given name with the `Formattable` * value resulting from evaluating the option's value. * - * `ResolvedFunctionOption` is immutable and is not copyable or movable. + * `ResolvedFunctionOption` is immutable and movable. It is not copyable. * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ #ifndef U_IN_DOXYGEN +class FormattedPlaceholder; +/* + TODO: It would be better not to include null operands or fallback values + in an options map. + Even better would be to handle them differently and not include them in + a FormattedPlaceholder (use a type like std::variant + in the formatter) +*/ class U_I18N_API ResolvedFunctionOption : public UObject { private: /* const */ UnicodeString name; - /* const */ Formattable value; + /* const */ LocalPointer value; public: const UnicodeString& getName() const { return name; } - const Formattable& getValue() const { return value; } - ResolvedFunctionOption(const UnicodeString& n, const Formattable& f) : name(n), value(f) {} + const FormattedPlaceholder* getValue() const { return value.getAlias(); } + FormattedPlaceholder* takeValue() { return value.orphan(); } + ResolvedFunctionOption(const UnicodeString& n, FormattedPlaceholder&& f, UErrorCode& status); ResolvedFunctionOption() {} ResolvedFunctionOption(ResolvedFunctionOption&&); ResolvedFunctionOption& operator=(ResolvedFunctionOption&& other) noexcept { @@ -466,107 +475,12 @@ class U_I18N_API ResolvedFunctionOption : public UObject { value = std::move(other.value); return *this; } + ResolvedFunctionOption& operator=(const ResolvedFunctionOption& other) = delete; + ResolvedFunctionOption(const ResolvedFunctionOption&) = delete; virtual ~ResolvedFunctionOption(); }; // class ResolvedFunctionOption #endif -/** - * Mapping from option names to `message2::Formattable` objects, obtained - * by calling `getOptions()` on a `FunctionOptions` object. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ -using FunctionOptionsMap = std::map; - -/** - * Structure encapsulating named options passed to a custom selector or formatter. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ -class U_I18N_API FunctionOptions : public UObject { - public: - /** - * Returns a map of all name-value pairs provided as options to this function. - * The syntactic order of options is not guaranteed to - * be preserved. - * - * This class is immutable and movable but not copyable. - * - * @return A map from strings to `message2::Formattable` objects representing - * the results of resolving each option value. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FunctionOptionsMap getOptions() const { - int32_t len; - const ResolvedFunctionOption* resolvedOptions = getResolvedFunctionOptions(len); - FunctionOptionsMap result; - for (int32_t i = 0; i < len; i++) { - const ResolvedFunctionOption& opt = resolvedOptions[i]; - result[opt.getName()] = opt.getValue(); - } - return result; - } - /** - * Default constructor. - * Returns an empty mapping. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FunctionOptions() { options = nullptr; } - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~FunctionOptions(); - /** - * Move assignment operator: - * The source FunctionOptions will be left in a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FunctionOptions& operator=(FunctionOptions&&) noexcept; - /** - * Move constructor: - * The source FunctionOptions will be left in a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FunctionOptions(FunctionOptions&&); - /** - * Copy constructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FunctionOptions& operator=(const FunctionOptions&) = delete; - private: - friend class MessageFormatter; - friend class StandardFunctions; - - explicit FunctionOptions(UVector&&, UErrorCode&); - - const ResolvedFunctionOption* getResolvedFunctionOptions(int32_t& len) const; - UBool getFunctionOption(const UnicodeString&, Formattable&) const; - // Returns empty string if option doesn't exist - UnicodeString getStringFunctionOption(const UnicodeString&) const; - int32_t optionsCount() const { return functionOptionsLen; } - - // Named options passed to functions - // This is not a Hashtable in order to make it possible for code in a public header file - // to construct a std::map from it, on-the-fly. Otherwise, it would be impossible to put - // that code in the header because it would have to call internal Hashtable methods. - ResolvedFunctionOption* options; - int32_t functionOptionsLen = 0; -}; // class FunctionOptions // TODO doc comments @@ -671,6 +585,8 @@ class U_I18N_API FunctionOptions : public UObject { number::FormattedNumber numberOutput; }; // class FormattedValue + class FunctionOptions; + /** * A `FormattablePlaceholder` encapsulates an input value (a `message2::Formattable`) * together with an optional output value (a `message2::FormattedValue`). @@ -685,17 +601,6 @@ class U_I18N_API FunctionOptions : public UObject { */ class U_I18N_API FormattedPlaceholder : public UObject { public: - /** - * Fallback constructor. Constructs a value that represents a formatting error, - * without recording an input `Formattable` as the source. - * - * @param s An error string. (See the MessageFormat specification for details - * on fallback strings.) - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - explicit FormattedPlaceholder(const UnicodeString& s) : fallback(s), type(kFallback) {} /** * Constructor for fully formatted placeholders. * @@ -703,13 +608,13 @@ class U_I18N_API FunctionOptions : public UObject { * `Formattable` used to construct the formatted value. * @param output A `FormattedValue` representing the formatted output of `input`. * Passed by move. + * @param errorCode Input/output error code * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - FormattedPlaceholder(const FormattedPlaceholder& input, FormattedValue&& output) - : fallback(input.fallback), source(input.source), - formatted(std::move(output)), previousOptions(FunctionOptions()), type(kEvaluated) {} + FormattedPlaceholder(const FormattedPlaceholder& input, FormattedValue&& output, + UErrorCode& errorCode); /** * Constructor for fully formatted placeholders with options. * @@ -718,24 +623,26 @@ class U_I18N_API FunctionOptions : public UObject { * @param opts Function options that were used to construct `output`. May be the empty map. * @param output A `FormattedValue` representing the formatted output of `input`. * Passed by move. + * @param errorCode Input/output error code * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - FormattedPlaceholder(const FormattedPlaceholder& input, FunctionOptions&& opts, FormattedValue&& output) - : fallback(input.fallback), source(input.source), - formatted(std::move(output)), previousOptions(std::move(opts)), type(kEvaluated) {} + FormattedPlaceholder(const FormattedPlaceholder& input, + FunctionOptions&& opts, + FormattedValue&& output, + UErrorCode& errorCode); /** * Constructor for unformatted placeholders. * * @param input A `Formattable` object. * @param fb Fallback string to use if an error occurs while formatting the input. + * @param errorCode Input/output error code * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - FormattedPlaceholder(const Formattable& input, const UnicodeString& fb) - : fallback(fb), source(input), type(kUnevaluated) {} + FormattedPlaceholder(const Formattable& input, const UnicodeString& fb, UErrorCode& status); /** * Default constructor. Leaves the FormattedPlaceholder in a * valid but undefined state. @@ -743,56 +650,64 @@ class U_I18N_API FunctionOptions : public UObject { * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - FormattedPlaceholder() : type(kNull) {} + FormattedPlaceholder(); + // TODO + UBool isNullOperand() const { return type == kNull; } /** - * Returns the source `Formattable` value for this placeholder. - * The result is undefined if this is a null operand. + * Returns a pointer to + * the source Formattable value for this placeholder. + * Sets the error code to failure for a null or fallback placeholder. * * @return A message2::Formattable value. * - * @internal ICU 75 technology preview + * @internal ICU 77 technology preview * @deprecated This API is for technology preview only. */ - const message2::Formattable& asFormattable() const; + const message2::Formattable* getSource(UErrorCode&) const; /** - * Returns true iff this is a fallback placeholder. + * Returns a reference to the option map for this placeholder. * - * @return True if and only if this placeholder was constructed from a fallback string, - * with no `Formattable` source or formatting output. + * @return The options map for this placeholder. * - * @internal ICU 75 technology preview + * @internal ICU 77 technology preview * @deprecated This API is for technology preview only. */ - bool isFallback() const { return type == kFallback; } + const message2::FunctionOptions& getOptions() const; /** - * Returns true iff this is a null placeholder. + * Returns a FormattedPlaceholder with `result` as the result value + * and everything else kept the same. + * `this` cannot be used after calling this method. * - * @return True if and only if this placeholder represents the absent argument to a formatter - * that was invoked without an argument. + * @param result FormattedValue to use as the result + * @return A FormattedPlaceholder * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. */ - bool isNullOperand() const { return type == kNull; } + FormattedPlaceholder withResult(FormattedValue&& result); /** - * Returns true iff this has formatting output. + * Returns a FormattedPlaceholder with `result` as the result value + * and `options` as the option map + * and everything else kept the same. + * `this` cannot be used after calling this method. * - * @return True if and only if this was constructed from both an input `Formattable` and - * output `FormattedValue`. + * @param result FormattedValue to use as the result + * @param options FunctionOptionsMap to use as the options + * @param errorCode Input/output error code + * @return A FormattedPlaceholder * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. */ - bool isEvaluated() const { return (type == kEvaluated); } + FormattedPlaceholder withResultAndOptions(FormattedValue&& result, + FunctionOptions&& options, + UErrorCode& errorCode); /** - * Returns true iff this represents a valid argument to the formatter. + * Returns true iff this has formatting output. * - * @return True if and only if this is neither the null argument nor a fallback placeholder. + * @return True if and only if this was constructed from both an input `Formattable` and + * output `FormattedValue`. * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - bool canFormat() const { return !(isFallback() || isNullOperand()); } + bool isEvaluated() const { return (type == kEvaluated); } /** * Gets the fallback value of this placeholder, to be used in its place if an error occurs while * formatting it. @@ -801,16 +716,6 @@ class U_I18N_API FunctionOptions : public UObject { * @deprecated This API is for technology preview only. */ const UnicodeString& getFallback() const { return fallback; } - /** - * Returns the options of this placeholder. The result is the empty map if !isEvaluated(). - * @return A reference to an option map, capturing the options that were used - * in producing the output of this `FormattedPlaceholder` - * (or empty if there is no output) - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const FunctionOptions& options() const { return previousOptions; } - /** * Returns the formatted output of this placeholder. The result is undefined if !isEvaluated(). * @return A fully formatted `FormattedPlaceholder`. @@ -851,23 +756,123 @@ class U_I18N_API FunctionOptions : public UObject { */ UnicodeString formatToString(const Locale& locale, UErrorCode& status) const; - + // TODO + virtual ~FormattedPlaceholder(); private: friend class MessageFormatter; enum Type { - kFallback, // Represents the result of formatting that encountered an error - kNull, // Represents the absence of both an output and an input (not necessarily an error) + kNull, // Represents the operand of an expression with no syntactic operand + // (Functions can be nullary in MF2 but the C++ representations must + // take an argument, so this is how that's reconciled) kUnevaluated, // `source` should be valid, but there's no result yet kEvaluated, // `formatted` exists }; UnicodeString fallback; Formattable source; FormattedValue formatted; - FunctionOptions previousOptions; // Ignored unless type is kEvaluated + void initOptions(UErrorCode&); + // Can be null if this was default-constructed + FunctionOptions* previousOptions = nullptr; // Ignored unless type is kEvaluated Type type; }; // class FormattedPlaceholder +/** + * Mapping from option names to `message2::Formattable` objects, obtained + * by calling `getOptions()` on a `FunctionOptions` object. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ +using FunctionOptionsMap = std::map; + +/** + * Structure encapsulating named options passed to a custom selector or formatter. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ +class U_I18N_API FunctionOptions : public UObject { + public: + /** + * Returns a map of all name-value pairs provided as options to this function. + * The syntactic order of options is not guaranteed to + * be preserved. + * + * This class is immutable and movable but not copyable. + * + * @return A map from strings to FormattedPlaceholder objects representing + * the results of resolving each option value. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionOptionsMap getOptions() const { + FunctionOptionsMap result; + for (int32_t i = 0; i < functionOptionsLen; i++) { + const ResolvedFunctionOption& opt = options[i]; + result[opt.getName()] = opt.getValue(); + } + return result; + } + /** + * Default constructor. + * Returns an empty mapping. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionOptions() { options = nullptr; } + /** + * Destructor. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~FunctionOptions(); + /** + * Move assignment operator: + * The source FunctionOptions will be left in a valid but undefined state. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionOptions& operator=(FunctionOptions&&) noexcept; + /** + * Move constructor: + * The source FunctionOptions will be left in a valid but undefined state. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionOptions(FunctionOptions&&); + /** + * Copy constructor. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionOptions& operator=(const FunctionOptions&) = delete; + private: + friend class MessageFormatter; + friend class StandardFunctions; + + explicit FunctionOptions(UVector&&, UErrorCode&); + + const ResolvedFunctionOption* getResolvedFunctionOptions(int32_t& len) const; + const FormattedPlaceholder* getFunctionOption(const UnicodeString&, UErrorCode&) const; + // Returns empty string if option doesn't exist + UnicodeString getStringFunctionOption(const UnicodeString&) const; + int32_t optionsCount() const { return functionOptionsLen; } + + // Named options passed to functions + // This is not a Hashtable in order to make it possible for code in a public header file + // to construct a std::map from it, on-the-fly. Otherwise, it would be impossible to put + // that code in the header because it would have to call internal Hashtable methods. + ResolvedFunctionOption* options; + int32_t functionOptionsLen = 0; +}; // class FunctionOptions + /** * Not yet implemented: The result of a message formatting operation. Based on * ICU4J's FormattedMessage.java. diff --git a/icu4c/source/test/intltest/messageformat2test.h b/icu4c/source/test/intltest/messageformat2test.h index 71dfb3916c9b..0cdc25633248 100644 --- a/icu4c/source/test/intltest/messageformat2test.h +++ b/icu4c/source/test/intltest/messageformat2test.h @@ -61,7 +61,8 @@ class TestMessageFormat2: public IntlTest { void testCustomFunctionsComplexMessage(IcuTestErrorCode&); void testGrammarCasesFormatter(IcuTestErrorCode&); void testListFormatter(IcuTestErrorCode&); - void testMessageRefFormatter(IcuTestErrorCode&); + // void testMessageRefFormatter(IcuTestErrorCode&); + void testComplexOptions(IcuTestErrorCode&); // Feature tests void testEmptyMessage(message2::TestCase::Builder&, IcuTestErrorCode&); @@ -161,6 +162,7 @@ class ListFormatter : public Formatter { ListFormatter(const Locale& loc) : locale(loc) {} }; +/* class ResourceManagerFactory : public FormatterFactory { public: Formatter* createFormatter(const Locale&, UErrorCode&) override; @@ -179,6 +181,35 @@ class ResourceManager : public Formatter { ResourceManager(const Locale& loc) : locale(loc) {} const Locale& locale; }; +*/ + +class NounFormatterFactory : public FormatterFactory { + + public: + Formatter* createFormatter(const Locale&, UErrorCode&) override; +}; + +class AdjectiveFormatterFactory : public FormatterFactory { + + public: + Formatter* createFormatter(const Locale&, UErrorCode&) override; +}; + +class NounFormatter : public Formatter { + public: + FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override; + private: + friend class NounFormatterFactory; + NounFormatter() { } +}; + +class AdjectiveFormatter : public Formatter { + public: + FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override; + private: + friend class AdjectiveFormatterFactory; + AdjectiveFormatter() { } +}; } // namespace message2 U_NAMESPACE_END diff --git a/icu4c/source/test/intltest/messageformat2test_custom.cpp b/icu4c/source/test/intltest/messageformat2test_custom.cpp index b498be791ca9..a4f353b04884 100644 --- a/icu4c/source/test/intltest/messageformat2test_custom.cpp +++ b/icu4c/source/test/intltest/messageformat2test_custom.cpp @@ -89,7 +89,9 @@ void TestMessageFormat2::testPersonFormatter(IcuTestErrorCode& errorCode) { .setExpected("Hello Mr. Doe") .setExpectSuccess() .build(); + TestUtils::runTestCase(*this, test, errorCode); + } void TestMessageFormat2::testCustomFunctionsComplexMessage(IcuTestErrorCode& errorCode) { @@ -182,6 +184,47 @@ void TestMessageFormat2::testCustomFunctionsComplexMessage(IcuTestErrorCode& err TestUtils::runTestCase(*this, test, errorCode); } +void TestMessageFormat2::testComplexOptions(IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) + .adoptFormatter(FunctionName("noun"), new NounFormatterFactory(), errorCode) + .adoptFormatter(FunctionName("adjective"), new AdjectiveFormatterFactory(), errorCode) + .build()); + UnicodeString name = "name"; + TestCase::Builder testBuilder; + testBuilder.setName("testComplexOptions"); + testBuilder.setLocale(Locale("en")); + testBuilder.setFunctionRegistry(&customRegistry); + + // Test that options can be values with their own resolved + // options attached + TestCase test = testBuilder.setPattern(".input {$item :noun case=accusative count=1} \ + .local $colorMatchingGrammaticalNumberGenderCase = {$color :adjective accord=$item} \ + {{{$colorMatchingGrammaticalNumberGenderCase}}}") + + .setArgument(UnicodeString("color"), UnicodeString("red")) + .setArgument(UnicodeString("item"), UnicodeString("balloon")) + .setExpected("red balloon (accusative, singular adjective)") + .build(); + TestUtils::runTestCase(*this, test, errorCode); + + // Test that the same noun can be used multiple times + test = testBuilder.setPattern(".input {$item :noun case=accusative count=1} \ + .local $colorMatchingGrammaticalNumberGenderCase = {$color :adjective accord=$item} \ + .local $sizeMatchingGrammaticalNumberGenderCase = {$size :adjective accord=$item} \ + {{{$colorMatchingGrammaticalNumberGenderCase}, {$sizeMatchingGrammaticalNumberGenderCase}}}") + + .setArgument(UnicodeString("color"), UnicodeString("red")) + .setArgument(UnicodeString("item"), UnicodeString("balloon")) + .setArgument(UnicodeString("size"), UnicodeString("huge")) + .setExpected("red balloon (accusative, singular adjective), \ +huge balloon (accusative, singular adjective)") + .build(); + TestUtils::runTestCase(*this, test, errorCode); + +} + void TestMessageFormat2::testCustomFunctions() { IcuTestErrorCode errorCode(*this, "testCustomFunctions"); @@ -189,7 +232,8 @@ void TestMessageFormat2::testCustomFunctions() { testCustomFunctionsComplexMessage(errorCode); testGrammarCasesFormatter(errorCode); testListFormatter(errorCode); - testMessageRefFormatter(errorCode); + // testMessageRefFormatter(errorCode); + testComplexOptions(errorCode); } @@ -210,30 +254,56 @@ Formatter* PersonNameFormatterFactory::createFormatter(const Locale& locale, UEr return result; } +static UnicodeString getStringOption(const FunctionOptionsMap& opt, + const UnicodeString& k) { + if (opt.count(k) == 0) { + return {}; + } + UErrorCode localErrorCode = U_ZERO_ERROR; + const message2::Formattable* optVal = opt.at(k)->getSource(localErrorCode); + if (U_FAILURE(localErrorCode)) { + return {}; + } + const UnicodeString& val = optVal->getString(localErrorCode); + if (U_SUCCESS(localErrorCode)) { + return val; + } + return {}; +} + +static bool hasStringOption(const FunctionOptionsMap& opt, + const UnicodeString& k, const UnicodeString& v) { + return getStringOption(opt, k) == v; +} + message2::FormattedPlaceholder PersonNameFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { if (U_FAILURE(errorCode)) { return {}; } - message2::FormattedPlaceholder errorVal = message2::FormattedPlaceholder("not a person"); - - if (!arg.canFormat() || arg.asFormattable().getType() != UFMT_OBJECT) { - return errorVal; + const Formattable* toFormat = arg.getSource(errorCode); + if (U_FAILURE(errorCode)) { + errorCode = U_MF_OPERAND_MISMATCH_ERROR; + return {}; } - const Formattable& toFormat = arg.asFormattable(); FunctionOptionsMap opt = options.getOptions(); - bool hasFormality = opt.count("formality") > 0 && opt["formality"].getType() == UFMT_STRING; - bool hasLength = opt.count("length") > 0 && opt["length"].getType() == UFMT_STRING; - bool useFormal = hasFormality && opt["formality"].getString(errorCode) == "formal"; - UnicodeString length = hasLength ? opt["length"].getString(errorCode) : "short"; + bool useFormal = hasStringOption(opt, "formality", "formal"); + UnicodeString length = getStringOption(opt, "length"); + if (length.length() == 0) { + length = "short"; + } - const FormattableObject* fp = toFormat.getObject(errorCode); - U_ASSERT(U_SUCCESS(errorCode)); + const FormattableObject* fp = toFormat->getObject(errorCode); + if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { + errorCode = U_MF_FORMATTING_ERROR; + return {}; + } if (fp == nullptr || fp->tag() != u"person") { - return errorVal; + errorCode = U_MF_FORMATTING_ERROR; + return {}; } const Person* p = static_cast(fp); @@ -267,7 +337,8 @@ message2::FormattedPlaceholder PersonNameFormatter::format(FormattedPlaceholder& result += firstName; } - return FormattedPlaceholder(arg, FormattedValue(std::move(result))); + FormattedPlaceholder res = arg.withResult(FormattedValue(std::move(result))); + return res; } FormattableProperties::~FormattableProperties() {} @@ -321,37 +392,44 @@ message2::FormattedPlaceholder GrammarCasesFormatter::format(FormattedPlaceholde return {}; } - // Argument must be present - if (!arg.canFormat()) { + const Formattable* toFormat = arg.getSource(errorCode); + // Check for null operand + if (U_FAILURE(errorCode)) { errorCode = U_MF_FORMATTING_ERROR; - return message2::FormattedPlaceholder("grammarBB"); + return {}; } - // Assumes the argument is not-yet-formatted - const Formattable& toFormat = arg.asFormattable(); UnicodeString result; - - FunctionOptionsMap opt = options.getOptions(); - switch (toFormat.getType()) { + const FunctionOptionsMap opt = options.getOptions(); + switch (toFormat->getType()) { case UFMT_STRING: { - const UnicodeString& in = toFormat.getString(errorCode); + const UnicodeString& in = toFormat->getString(errorCode); bool hasCase = opt.count("case") > 0; - bool caseIsString = opt["case"].getType() == UFMT_STRING; - if (hasCase && caseIsString && (opt["case"].getString(errorCode) == "dative" || opt["case"].getString(errorCode) == "genitive")) { - getDativeAndGenitive(in, result); - } else { - result += in; + const Formattable* caseAsFormattable = opt.at("case")->getSource(errorCode); + if (U_FAILURE(errorCode)) { + errorCode = U_MF_FORMATTING_ERROR; + return {}; + } + bool caseIsString = caseAsFormattable->getType() == UFMT_STRING; + if (hasCase && caseIsString) { + const UnicodeString& caseOpt = caseAsFormattable->getString(errorCode); + if (caseOpt == "dative" || caseOpt == "genitive") { + getDativeAndGenitive(in, result); + } + else { + result += in; + } } U_ASSERT(U_SUCCESS(errorCode)); break; } default: { - result += toFormat.getString(errorCode); + result += toFormat->getString(errorCode); break; } } - return message2::FormattedPlaceholder(arg, FormattedValue(std::move(result))); + return arg.withResult(FormattedValue(std::move(result))); } void TestMessageFormat2::testGrammarCasesFormatter(IcuTestErrorCode& errorCode) { @@ -430,36 +508,26 @@ message2::FormattedPlaceholder message2::ListFormatter::format(FormattedPlacehol return {}; } - message2::FormattedPlaceholder errorVal = FormattedPlaceholder("listformat"); - - // Argument must be present - if (!arg.canFormat()) { - errorCode = U_MF_FORMATTING_ERROR; - return errorVal; + const Formattable* toFormat = arg.getSource(errorCode); + if (U_FAILURE(errorCode)) { + // Must have an argument + errorCode = U_MF_OPERAND_MISMATCH_ERROR; + return {}; } - // Assumes arg is not-yet-formatted - const Formattable& toFormat = arg.asFormattable(); FunctionOptionsMap opt = options.getOptions(); - bool hasType = opt.count("type") > 0 && opt["type"].getType() == UFMT_STRING; UListFormatterType type = UListFormatterType::ULISTFMT_TYPE_AND; - if (hasType) { - if (opt["type"].getString(errorCode) == "OR") { - type = UListFormatterType::ULISTFMT_TYPE_OR; - } else if (opt["type"].getString(errorCode) == "UNITS") { - type = UListFormatterType::ULISTFMT_TYPE_UNITS; - } + if (hasStringOption(opt, "type", "OR")) { + type = UListFormatterType::ULISTFMT_TYPE_OR; + } else if (hasStringOption(opt, "type", "UNITS")) { + type = UListFormatterType::ULISTFMT_TYPE_UNITS; } - bool hasWidth = opt.count("width") > 0 && opt["width"].getType() == UFMT_STRING; UListFormatterWidth width = UListFormatterWidth::ULISTFMT_WIDTH_WIDE; - if (hasWidth) { - if (opt["width"].getString(errorCode) == "SHORT") { - width = UListFormatterWidth::ULISTFMT_WIDTH_SHORT; - } else if (opt["width"].getString(errorCode) == "NARROW") { - width = UListFormatterWidth::ULISTFMT_WIDTH_NARROW; - } + if (hasStringOption(opt, "width", "SHORT")) { + width = UListFormatterWidth::ULISTFMT_WIDTH_SHORT; + } else if (hasStringOption(opt, "width", "NARROW")) { + width = UListFormatterWidth::ULISTFMT_WIDTH_NARROW; } - U_ASSERT(U_SUCCESS(errorCode)); LocalPointer lf(icu::ListFormatter::createInstance(locale, type, width, errorCode)); if (U_FAILURE(errorCode)) { return {}; @@ -467,13 +535,13 @@ message2::FormattedPlaceholder message2::ListFormatter::format(FormattedPlacehol UnicodeString result; - switch (toFormat.getType()) { + switch (toFormat->getType()) { case UFMT_ARRAY: { int32_t n_items; - const Formattable* objs = toFormat.getArray(n_items, errorCode); + const Formattable* objs = toFormat->getArray(n_items, errorCode); if (U_FAILURE(errorCode)) { errorCode = U_MF_FORMATTING_ERROR; - return errorVal; + return {}; } UnicodeString* parts = new UnicodeString[n_items]; if (parts == nullptr) { @@ -489,13 +557,13 @@ message2::FormattedPlaceholder message2::ListFormatter::format(FormattedPlacehol break; } default: { - result += toFormat.getString(errorCode); + result += toFormat->getString(errorCode); U_ASSERT(U_SUCCESS(errorCode)); break; } } - return FormattedPlaceholder(arg, FormattedValue(std::move(result))); + return arg.withResult(FormattedValue(std::move(result))); } void TestMessageFormat2::testListFormatter(IcuTestErrorCode& errorCode) { @@ -535,6 +603,7 @@ void TestMessageFormat2::testListFormatter(IcuTestErrorCode& errorCode) { See ICU4J: CustomFormatterMessageRefTest.java */ +#if false /* static */ Hashtable* message2::ResourceManager::properties(UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); @@ -582,11 +651,19 @@ Formatter* ResourceManagerFactory::createFormatter(const Locale& locale, UErrorC using Arguments = MessageArguments; +// TODO: The next test is commented out because we need to write code +// to convert an options map to a MessageArguments (mapping FormattedPlaceholder +// back to Formattable) + static Arguments localToGlobal(const FunctionOptionsMap& opts, UErrorCode& status) { if (U_FAILURE(status)) { return {}; } - return MessageArguments(opts, status); + std::map result; + for (auto iter = opts.cbegin(); iter != opts.cend(); ++iter) { + result[iter->first] = iter->second->getSource(status); + } + return MessageArguments(result, status); } message2::FormattedPlaceholder ResourceManager::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { @@ -596,18 +673,16 @@ message2::FormattedPlaceholder ResourceManager::format(FormattedPlaceholder&& ar message2::FormattedPlaceholder errorVal = message2::FormattedPlaceholder("msgref"); - // Argument must be present - if (!arg.canFormat()) { + const Formattable* toFormat = arg.getSource(errorCode); + // Check for null or fallback + if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { errorCode = U_MF_FORMATTING_ERROR; return errorVal; } - - // Assumes arg is not-yet-formatted - const Formattable& toFormat = arg.asFormattable(); UnicodeString in; - switch (toFormat.getType()) { + switch (toFormat->getType()) { case UFMT_STRING: { - in = toFormat.getString(errorCode); + in = toFormat->getString(errorCode); break; } default: { @@ -615,13 +690,13 @@ message2::FormattedPlaceholder ResourceManager::format(FormattedPlaceholder&& ar return errorVal; } } - FunctionOptionsMap opt = options.getOptions(); - bool hasProperties = opt.count("resbundle") > 0 && opt["resbundle"].getType() == UFMT_OBJECT && opt["resbundle"].getObject(errorCode)->tag() == u"properties"; + FunctionOptionsMap opt = FunctionOptions::getOptions(std::move(options)); + bool hasProperties = opt.count("resbundle") > 0 && opt["resbundle"].getValue().getType() == UFMT_OBJECT && opt["resbundle"].getValue().getObject(errorCode)->tag() == u"properties"; // If properties were provided, look up the given string in the properties, // yielding a message if (hasProperties) { - const FormattableProperties* properties = reinterpret_cast(opt["resbundle"].getObject(errorCode)); + const FormattableProperties* properties = reinterpret_cast(opt["resbundle"].getValue().getObject(errorCode)); U_ASSERT(U_SUCCESS(errorCode)); UnicodeString* msg = static_cast(properties->properties->get(in)); if (msg == nullptr) { @@ -646,7 +721,7 @@ message2::FormattedPlaceholder ResourceManager::format(FormattedPlaceholder&& ar if (U_FAILURE(errorCode)) { errorCode = savedStatus; } - return FormattedPlaceholder(arg, FormattedValue(std::move(result))); + return arg.withOutput(FormattedValue(std::move(result)), errorCode); } else { // Properties must be provided errorCode = U_MF_FORMATTING_ERROR; @@ -726,6 +801,131 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { .build(); TestUtils::runTestCase(*this, test, errorCode); } +#endif + +Formatter* NounFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return nullptr; + } + + // Locale not used + (void) locale; + + Formatter* result = new NounFormatter(); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +Formatter* AdjectiveFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return nullptr; + } + + // Locale not used + (void) locale; + + Formatter* result = new AdjectiveFormatter(); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +message2::FormattedPlaceholder NounFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { + if (U_FAILURE(errorCode)) { + return {}; + } + + const Formattable* toFormat = arg.getSource(errorCode); + // Must have an argument + if (U_FAILURE(errorCode)) { + errorCode = U_MF_OPERAND_MISMATCH_ERROR; + return {}; + } + FunctionOptionsMap opt = options.getOptions(); + + // very simplified example + bool useAccusative = hasStringOption(opt, "case", "accusative"); + bool useSingular = hasStringOption(opt, "count", "1"); + const UnicodeString& noun = toFormat->getString(errorCode); + if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { + return {}; + } + + UnicodeString result; + if (useAccusative) { + if (useSingular) { + result = noun + " accusative, singular noun"; + } else { + result = noun + " accusative, plural noun"; + } + } else { + if (useSingular) { + result = noun + " dative, singular noun"; + } else { + result = noun + " dative, plural noun"; + } + } + + return arg.withResultAndOptions(FormattedValue(result), std::move(options), errorCode); +} + +message2::FormattedPlaceholder AdjectiveFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { + if (U_FAILURE(errorCode)) { + return {}; + } + + const Formattable* toFormat = arg.getSource(errorCode); + // Must have an argument + if (U_FAILURE(errorCode)) { + errorCode = U_MF_OPERAND_MISMATCH_ERROR; + return {}; + } + + const FunctionOptionsMap opt = options.getOptions(); + // Return empty string if no accord is provided + if (opt.count("accord") <= 0) { + return {}; + } + + const FormattedPlaceholder& accordOpt = *opt.at("accord"); + // Fail if no accord is provided, as this is a simplified example + const Formattable* accordSrc = accordOpt.getSource(errorCode); + if (U_FAILURE(errorCode)) { + return {}; + } + UnicodeString accord = accordSrc->getString(errorCode); + const UnicodeString& adjective = toFormat->getString(errorCode); + if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { + return {}; + } + + UnicodeString result = adjective + " " + accord; + // very simplified example + const FunctionOptionsMap accordOptionsMap = accordOpt.getOptions().getOptions(); + bool accordIsAccusative = hasStringOption(accordOptionsMap, "case", "accusative"); + bool accordIsSingular = hasStringOption(accordOptionsMap, "count", "1"); + if (accordIsAccusative) { + if (accordIsSingular) { + result += " (accusative, singular adjective)"; + } else { + result += " (accusative, plural adjective)"; + } + } else { + if (accordIsSingular) { + result += " (dative, singular adjective)"; + } else { + result += " (dative, plural adjective)"; + } + } + + return arg.withResultAndOptions(FormattedValue(std::move(result)), + std::move(options), + errorCode); +} + #endif /* #if !UCONFIG_NO_MF2 */ diff --git a/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json b/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json index 6d78ffe4f04d..4a4d6eb1529d 100644 --- a/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json +++ b/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json @@ -129,7 +129,7 @@ ], "exp": "Hello John, you want '9:43 PM', 'August 3, 2024 at 9:43 PM', or '8/3/24, 9:43:57 PM Pacific Daylight Time' or even 'Saturday, August 3, 2024 at 9:43 PM'?", "params": {"exp": { "date": 1722746637000 }, "user": "John", "tsOver" : "long" }, - "ignoreTest": "ICU-22754 ICU4C doesn't implement this kind of function composition yet. See https://github.com/unicode-org/message-format-wg/issues/515" + "ignoreTest": "timeStyle=long should print 'PDT', not 'Pacific Daylight Time'?" }, { "srcs": [ diff --git a/testdata/message2/duplicate-declarations.json b/testdata/message2/duplicate-declarations.json index cd3acc1576d3..b744365f51f1 100644 --- a/testdata/message2/duplicate-declarations.json +++ b/testdata/message2/duplicate-declarations.json @@ -12,32 +12,26 @@ "tests": [ { "src": ".local $foo = {$foo} .local $foo = {42} {{bar {$foo}}}", - "params": [{ "name": "foo", "value": "foo" }], - "exp": "bar 42" + "params": [{ "name": "foo", "value": "foo" }] }, { "src": ".local $foo = {42} .local $foo = {42} {{bar {$foo}}}", - "params": [{ "name": "foo", "value": "foo" }], - "exp": "bar 42" + "params": [{ "name": "foo", "value": "foo" }] }, { "src": ".local $foo = {:unknown} .local $foo = {42} {{bar {$foo}}}", - "params": [{ "name": "foo", "value": "foo" }], - "exp": "bar 42" + "params": [{ "name": "foo", "value": "foo" }] }, { - "src": ".local $x = {42} .local $y = {$x} .local $x = {13} {{{$x} {$y}}}", - "exp": "13 42" + "src": ".local $x = {42} .local $y = {$x} .local $x = {13} {{{$x} {$y}}}" }, { "src": ".local $foo = {$foo} {{bar {$foo}}}", - "params": [{ "name": "foo", "value": "foo" }], - "exp": "bar foo" + "params": [{ "name": "foo", "value": "foo" }] }, { "src": ".local $foo = {$bar} .local $bar = {$baz} {{bar {$foo}}}", - "params": [{ "name": "baz", "value": "foo" }], - "exp": "bar {$bar}" + "params": [{ "name": "baz", "value": "foo" }] } ] } diff --git a/testdata/message2/more-functions.json b/testdata/message2/more-functions.json index b34803635ce9..093678905b0c 100644 --- a/testdata/message2/more-functions.json +++ b/testdata/message2/more-functions.json @@ -112,6 +112,35 @@ "exp": "Default number: 1.234.567.890.123.456.789,987654!", "locale": "ro", "params": [{ "name": "val", "value": {"decimal": "1234567890123456789.987654321"} }] + }, + { + "src": ".local $x = {42 :number minimumFractionDigits=2} .local $y = {$x :number minimumFractionDigits=5} {{{$x} {$y}}}", + "exp": "42.00 42.00000", + "locale": "en" + }, + { + "src": ".local $x = {42 :number minimumFractionDigits=5} .local $y = {$x :number minimumFractionDigits=2} {{{$x} {$y}}}", + "exp": "42.00000 42.00", + "locale": "en" + }, + { + "src": ".local $x = {42 :number minimumFractionDigits=5} .local $y = {$x :number minimumIntegerDigits=3} {{{$x} {$y}}}", + "exp": "42.00000 042.00000", + "locale": "en" + }, + { + "comment": "Modified from icu4j test", + "srcs": [ + ".input {$exp :datetime timeStyle=short}\n", + ".input {$user :string}\n", + ".local $longExp = {$exp :datetime dateStyle=long}\n", + ".local $zooExp = {$exp :datetime dateStyle=short timeStyle=$tsOver}\n", + "{{Hello John, you want '{$exp}', '{$longExp}', or '{$zooExp}' or even '{$exp :datetime dateStyle=full}'?}}" + ], + "exp": "Hello John, you want '9:43 PM', 'August 3, 2024 at 9:43 PM', or '8/3/24, 9:43:57 PM Pacific Daylight Time' or even 'Saturday, August 3, 2024 at 9:43 PM'?", + "params": [{"name": "exp", "value": { "date": 1722746637000 }}, + {"name": "user", "value": "John"}, + {"name": "tsOver", "value" : "full" }] } ] } diff --git a/testdata/message2/tricky-declarations.json b/testdata/message2/tricky-declarations.json index 3fded666e633..eb7b5ac90add 100644 --- a/testdata/message2/tricky-declarations.json +++ b/testdata/message2/tricky-declarations.json @@ -6,12 +6,10 @@ }, "tests": [ { "src": ".input {$var :number minimumFractionDigits=$var2} .input {$var2 :number minimumFractionDigits=5} {{{$var} {$var2}}}", - "exp": "1.000 3.00000", "params": [{ "name": "var", "value": 1}, {"name": "var2", "value": 3 }], "expErrors": [{ "type": "duplicate-declaration" }] }, { "src": ".local $var = {$var2} .local $var2 = {1} {{{$var} {$var2}}}", - "exp": "5 1", "params": [{ "name": "var2", "value": 5 }], "expErrors": [{ "type": "duplicate-declaration" }] } From 8cae40c9b92fa05193b8df034c1fa0192b7fad79 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Mon, 30 Sep 2024 18:19:49 -0700 Subject: [PATCH 02/37] Fix warnings --- icu4c/source/i18n/messageformat2_formattable.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index 12d2284c11b5..54900beb2bc7 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -234,7 +234,7 @@ namespace message2 { type(kEvaluated) { CHECK_ERROR(status); - LocalPointer temp(create(std::move(opts), status)); + LocalPointer temp(create(std::move(opts), status)); CHECK_ERROR(status); previousOptions = temp.orphan(); } @@ -261,7 +261,7 @@ namespace message2 { } void FormattedPlaceholder::initOptions(UErrorCode& status) { - LocalPointer temp(create(FunctionOptions(), status)); + LocalPointer temp(create(FunctionOptions(), status)); CHECK_ERROR(status); previousOptions = temp.orphan(); } From 71209d9a9784da56163081395c2810e2dfe104cd Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 1 Oct 2024 13:33:38 -0700 Subject: [PATCH 03/37] Fix leaks --- icu4c/source/i18n/messageformat2.cpp | 19 +++++++++++++++++-- .../source/i18n/messageformat2_evaluation.cpp | 1 + .../i18n/messageformat2_formattable.cpp | 3 +++ icu4c/source/i18n/unicode/messageformat2.h | 3 +++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index 3448470bb9ec..d300764dc6f9 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -51,6 +51,18 @@ static Formattable evalLiteral(const Literal& lit) { return FormattedPlaceholder(evalLiteral(lit), lit.quoted(), errorCode); } + +InternalValue::~InternalValue() {} +InternalValue& InternalValue::operator=(InternalValue&& other) { + fallbackString = other.fallbackString; + val = std::move(other.val); + return *this; +} + +InternalValue::InternalValue(InternalValue&& other) { + *this = std::move(other); +} + [[nodiscard]] InternalValue MessageFormatter::formatOperand(const Environment& env, const Operand& rand, MessageContext& context, @@ -658,14 +670,17 @@ ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, // Already checked that rator is non-reserved const FunctionName& selectorName = rator->getFunctionName(); if (isSelector(selectorName)) { - auto selector = getSelector(context, selectorName, status); + LocalPointer selector(getSelector(context, selectorName, status)); if (U_SUCCESS(status)) { FunctionOptions resolvedOptions = resolveOptions(env, rator->getOptionsInternal(), context, status); InternalValue argument = formatOperand(env, expr.getOperand(), context, status); if (argument.isFallback()) { return ResolvedSelector(argument.asFallback()); } else { - return ResolvedSelector(selectorName, selector, std::move(resolvedOptions), argument.value()); + return ResolvedSelector(selectorName, + selector.orphan(), + std::move(resolvedOptions), + argument.value()); } } } else if (isFormatter(selectorName)) { diff --git a/icu4c/source/i18n/messageformat2_evaluation.cpp b/icu4c/source/i18n/messageformat2_evaluation.cpp index e795c46434d2..111d90183d13 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.cpp +++ b/icu4c/source/i18n/messageformat2_evaluation.cpp @@ -106,6 +106,7 @@ FunctionOptions::FunctionOptions(FunctionOptions&& other) { FunctionOptions::~FunctionOptions() { if (options != nullptr) { delete[] options; + options = nullptr; } } // ResolvedSelector diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index 54900beb2bc7..7c418223f4e9 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -179,6 +179,9 @@ namespace message2 { if (type == kEvaluated) { formatted = std::move(other.formatted); } + if (previousOptions != nullptr) { + delete previousOptions; + } if (other.previousOptions != nullptr) { previousOptions = other.previousOptions; other.previousOptions = nullptr; diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index 0e2c9e0ccdaf..c875f762d2ed 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -48,6 +48,9 @@ namespace message2 { : fallbackString(""), val(std::move(f)) {} FormattedPlaceholder value() { return std::move(val); } UnicodeString asFallback() const { return fallbackString; } + virtual ~InternalValue(); + InternalValue& operator=(InternalValue&&); + InternalValue(InternalValue&&); private: UnicodeString fallbackString; // Non-empty if fallback // Otherwise, assumed to be a FormattedPlaceholder From c8d28b39af0e6484750836ce4a4264ed5258d451 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 1 Oct 2024 13:38:49 -0700 Subject: [PATCH 04/37] Fix doc comments and move InternalValue to a private header file --- icu4c/source/i18n/messageformat2_evaluation.h | 22 ++++++++++++++++ icu4c/source/i18n/unicode/messageformat2.h | 25 +------------------ .../i18n/unicode/messageformat2_formattable.h | 12 +++++++-- 3 files changed, 33 insertions(+), 26 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index 82655cf7149b..bed6c197dca5 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -32,6 +32,28 @@ namespace message2 { using namespace data_model; + // InternalValue + // Encodes an "either a fallback string or a FormattedPlaceholder" + class InternalValue : public UObject { + public: + bool isFallback() const { return !fallbackString.isEmpty(); } + InternalValue() : fallbackString("") {} + // Fallback constructor + explicit InternalValue(UnicodeString fb) : fallbackString(fb) {} + // Regular value constructor + explicit InternalValue(FormattedPlaceholder&& f) + : fallbackString(""), val(std::move(f)) {} + FormattedPlaceholder value() { return std::move(val); } + UnicodeString asFallback() const { return fallbackString; } + virtual ~InternalValue(); + InternalValue& operator=(InternalValue&&); + InternalValue(InternalValue&&); + private: + UnicodeString fallbackString; // Non-empty if fallback + // Otherwise, assumed to be a FormattedPlaceholder + FormattedPlaceholder val; + }; // class InternalValue + // PrioritizedVariant // For how this class is used, see the references to (integer, variant) tuples diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index c875f762d2ed..7a306d8b96b8 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -32,30 +32,7 @@ namespace message2 { class MessageContext; class ResolvedSelector; class StaticErrors; - - // Internal use only - // None = null operand - // String = fallback value - // FormattedPlaceholder = non-error value - class InternalValue : public UObject { - public: - bool isFallback() const { return !fallbackString.isEmpty(); } - InternalValue() : fallbackString("") {} - // Fallback constructor - explicit InternalValue(UnicodeString fb) : fallbackString(fb) {} - // Regular value constructor - explicit InternalValue(FormattedPlaceholder&& f) - : fallbackString(""), val(std::move(f)) {} - FormattedPlaceholder value() { return std::move(val); } - UnicodeString asFallback() const { return fallbackString; } - virtual ~InternalValue(); - InternalValue& operator=(InternalValue&&); - InternalValue(InternalValue&&); - private: - UnicodeString fallbackString; // Non-empty if fallback - // Otherwise, assumed to be a FormattedPlaceholder - FormattedPlaceholder val; - }; // class InternalValue + class InternalValue; /** *

MessageFormatter is a Technical Preview API implementing MessageFormat 2.0. diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index ed396dd87fa6..ab748d7c61d8 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -642,7 +642,7 @@ class U_I18N_API ResolvedFunctionOption : public UObject { * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - FormattedPlaceholder(const Formattable& input, const UnicodeString& fb, UErrorCode& status); + FormattedPlaceholder(const Formattable& input, const UnicodeString& fb, UErrorCode& errorCode); /** * Default constructor. Leaves the FormattedPlaceholder in a * valid but undefined state. @@ -651,7 +651,15 @@ class U_I18N_API ResolvedFunctionOption : public UObject { * @deprecated This API is for technology preview only. */ FormattedPlaceholder(); - // TODO + /** + * Returns true iff this FormattedPlaceholder represents a null operand + * (the absence of an operand). + * + * @return A boolean indicating whether this is a null operand. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ UBool isNullOperand() const { return type == kNull; } /** * Returns a pointer to From a9d3547826e6fd4a2b866202fdd94f550e485375 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 1 Oct 2024 15:51:46 -0700 Subject: [PATCH 05/37] Extend InternalValue and eliminate ResolvedSelector --- icu4c/source/i18n/messageformat2.cpp | 313 ++++++++---------- .../source/i18n/messageformat2_evaluation.cpp | 87 +++-- icu4c/source/i18n/messageformat2_evaluation.h | 80 ++--- icu4c/source/i18n/unicode/messageformat2.h | 28 +- 4 files changed, 244 insertions(+), 264 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index d300764dc6f9..1337391b2a20 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -51,18 +51,6 @@ static Formattable evalLiteral(const Literal& lit) { return FormattedPlaceholder(evalLiteral(lit), lit.quoted(), errorCode); } - -InternalValue::~InternalValue() {} -InternalValue& InternalValue::operator=(InternalValue&& other) { - fallbackString = other.fallbackString; - val = std::move(other.val); - return *this; -} - -InternalValue::InternalValue(InternalValue&& other) { - *this = std::move(other); -} - [[nodiscard]] InternalValue MessageFormatter::formatOperand(const Environment& env, const Operand& rand, MessageContext& context, @@ -130,25 +118,34 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O if (U_FAILURE(status)) { return {}; } - if (!rhsVal.isFallback()) { - FormattedPlaceholder optVal = rhsVal.value(); - U_ASSERT(!optVal.isNullOperand()); // Option value can't be absent, syntactically - ResolvedFunctionOption opt(k, std::move(optVal), status); - resolvedOpt.adoptInstead(create(std::move(opt), status)); - if (U_FAILURE(status)) { - return {}; + // Force evaluation in order to extract a FormattedPlaceholder + // from `rhsVal` (which might be a suspension) + FormattedPlaceholder optVal = rhsVal.takeValue(status); + if (U_FAILURE(status)) { + status = U_ZERO_ERROR; + InternalValue applied = eval(context, std::move(rhsVal), status); + if (applied.isFallback()) { + // Skip this option + continue; } - optionsVector->adoptElement(resolvedOpt.orphan(), status); + optVal = applied.takeValue(status); + U_ASSERT(U_SUCCESS(status)); } + U_ASSERT(!optVal.isNullOperand()); // Option value can't be absent, syntactically + + // The option is resolved; add it to the vector + ResolvedFunctionOption resolvedOpt(k, std::move(optVal), status); + LocalPointer p(create(std::move(resolvedOpt), status)); + EMPTY_ON_ERROR(status); + optionsVector->adoptElement(p.orphan(), status); } return FunctionOptions(std::move(*optionsVector), status); } // Overload that dispatches on argument type. Syntax doesn't provide for options in this case. -[[nodiscard]] InternalValue MessageFormatter::evalFormatterCall(FormattedPlaceholder&& argument, - MessageContext& context, - UErrorCode& status) const { +[[nodiscard]] FunctionName MessageFormatter::getFormatterNameByType(const FormattedPlaceholder& argument, + UErrorCode& status) const { if (U_FAILURE(status)) { return {}; } @@ -167,11 +164,7 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O // No formatter for this type -- follow default behavior break; } - return evalFormatterCall(functionName, - std::move(argument), - FunctionOptions(), - context, - status); + return functionName; } default: { // TODO: The array case isn't handled yet; not sure whether it's desirable @@ -180,16 +173,39 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O } } // No formatter for this type, or it's a primitive type (which will be formatted later) - // -- just return the argument itself - return InternalValue(std::move(argument)); + return {}; +} + +// Takes a possibly-suspended value and returns a fully-evaluated one; +// the result is either fully evaluated or an error value +[[nodiscard]] InternalValue MessageFormatter::eval(MessageContext& context, + InternalValue suspension, + UErrorCode& status) const { + EMPTY_ON_ERROR(status); + + // If we already have an error value or fully-evaluated value, + // just return it + if (!suspension.isSuspension()) { + return suspension; + } + + // Get the function name, operand, and options + // (which should succeed, since we already did the check) + FunctionName functionName = suspension.getFunctionName(status); + FormattedPlaceholder operand = suspension.takeOperand(status); + FunctionOptions options = suspension.takeOptions(status); + U_ASSERT(U_SUCCESS(status)); + + // Call the function with the operand and options + return apply(functionName, std::move(operand), std::move(options), context, status); } // Overload that dispatches on function name -[[nodiscard]] InternalValue MessageFormatter::evalFormatterCall(const FunctionName& functionName, - FormattedPlaceholder&& argument, - FunctionOptions&& options, - MessageContext& context, - UErrorCode& status) const { +[[nodiscard]] InternalValue MessageFormatter::apply(const FunctionName& functionName, + FormattedPlaceholder&& argument, + FunctionOptions&& options, + MessageContext& context, + UErrorCode& status) const { if (U_FAILURE(status)) { return {}; } @@ -268,17 +284,32 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O // Format the operand (formatOperand handles the case of a null operand) InternalValue randVal = formatOperand(globalEnv, rand, context, status); - // Don't call the function on error values - if (randVal.isFallback()) { - return randVal; - } - if (!expr.isFunctionCall()) { - // Dispatch based on type of `randVal` - return evalFormatterCall(randVal.value(), - context, - status); + if (randVal.isSuspension() || randVal.isFallback()) { + return randVal; + } + FormattedPlaceholder operand = randVal.takeValue(status); + U_ASSERT(U_SUCCESS(status)); + // In this case, the operand is an unannotated literal or argument + // Dispatch based on type of `operand` + FunctionName defaultFormatterName = getFormatterNameByType(operand, status); + if (defaultFormatterName.isEmpty()) { + return InternalValue(std::move(operand)); + } + return InternalValue(defaultFormatterName, + FunctionOptions(), + std::move(operand), + status); } else { + randVal = eval(context, std::move(randVal), status); + // Don't call the function on error values + + if (randVal.isFallback()) { + return randVal; + } + FormattedPlaceholder operand = randVal.takeValue(status); + U_ASSERT(U_SUCCESS(status)); + const Operator* rator = expr.getOperator(status); U_ASSERT(U_SUCCESS(status)); const FunctionName& functionName = rator->getFunctionName(); @@ -287,11 +318,10 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status); // Call the formatter function - return evalFormatterCall(functionName, - randVal.value(), - std::move(resolvedOptions), - context, - status); + return InternalValue(functionName, + std::move(resolvedOptions), + std::move(operand), + status); } } @@ -308,15 +338,15 @@ void MessageFormatter::formatPattern(MessageContext& context, const Environment& } else { // Format the expression InternalValue partVal = formatExpression(globalEnv, part.contents(), context, status); - if (partVal.isFallback()) { + // Force full evaluation + InternalValue partResult = eval(context, std::move(partVal), status); + if (partResult.isFallback()) { result += LEFT_CURLY_BRACE; - result += partVal.asFallback(); + result += partResult.asFallback(); result += RIGHT_CURLY_BRACE; } else { - // Force full evaluation, e.g. applying default formatters to - // unformatted input (or formatting numbers as strings) - UnicodeString partResult = partVal.value().formatToString(locale, status); - result += partResult; + // Do final formatting (e.g. formatting numbers as strings) + result += partResult.takeValue(status).formatToString(locale, status); // Handle formatting errors. `formatToString()` can't take a context and thus can't // register an error directly if (status == U_MF_FORMATTING_ERROR) { @@ -334,6 +364,32 @@ void MessageFormatter::formatPattern(MessageContext& context, const Environment& // ------------------------------------------------------ // Selection +bool MessageFormatter::isSelectable(const InternalValue& rv) const { + UErrorCode localStatus = U_ZERO_ERROR; + const FunctionName& selectorName = rv.getFunctionName(localStatus); + if (U_FAILURE(localStatus)) { + return false; + } + return isSelector(selectorName); +} + +void MessageFormatter::setNotSelectableError(MessageContext& context, + const InternalValue& rv, + UErrorCode& status) const { + CHECK_ERROR(status); + if (rv.isFallback()) { + context.getErrors().setSelectorError({}, status); + return; + } + const FunctionName& functionName = rv.getFunctionName(status); + U_ASSERT(U_SUCCESS(status)); + if (isFormatter(functionName)) { + context.getErrors().setSelectorError(functionName, status); + } else { + context.getErrors().setUnknownFunction(functionName, status); + } +} + // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-selectors // `res` is a vector of ResolvedSelectors void MessageFormatter::resolveSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UVector& res) const { @@ -346,8 +402,8 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme // 2. For each expression exp of the message's selectors for (int32_t i = 0; i < dataModel.numSelectors(); i++) { // 2i. Let rv be the resolved value of exp. - ResolvedSelector rv = formatSelectorExpression(env, selectors[i], context, status); - if (rv.hasSelector()) { + InternalValue rv = formatExpression(env, selectors[i], context, status); + if (isSelectable(rv)) { // 2ii. If selection is supported for rv: // (True if this code has been reached) } else { @@ -356,15 +412,11 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme // Append nomatch as the last element of the list res. // Emit a Selection Error. // (Note: in this case, rv, being a fallback, serves as `nomatch`) - #if U_DEBUG - const DynamicErrors& err = context.getErrors(); - U_ASSERT(err.hasError()); - U_ASSERT(rv.isFallback()); - #endif + setNotSelectableError(context, rv, status); } // 2ii(a). Append rv as the last element of the list res. // (Also fulfills 2iii) - LocalPointer v(create(std::move(rv), status)); + LocalPointer v(create(std::move(rv), status)); CHECK_ERROR(status); res.adoptElement(v.orphan(), status); } @@ -374,17 +426,25 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme // `keys` and `matches` are vectors of strings void MessageFormatter::matchSelectorKeys(const UVector& keys, MessageContext& context, - ResolvedSelector&& rv, + InternalValue&& rv, UVector& keysOut, UErrorCode& status) const { CHECK_ERROR(status); - if (!rv.hasSelector()) { - // Return an empty list of matches + if (!isSelectable(rv)) { return; } - auto selectorImpl = rv.getSelector(); + const FunctionName& selectorName = rv.getFunctionName(status); + if (U_FAILURE(status)) { + status = U_ZERO_ERROR; + // Return an empty list of matches + return; + } + auto selectorImpl = getSelector(context, selectorName, status); + if (U_FAILURE(status)) { + return; + } U_ASSERT(selectorImpl != nullptr); UErrorCode savedStatus = status; @@ -412,7 +472,10 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, int32_t prefsLen = 0; // Call the selector - selectorImpl->selectKey(rv.takeArgument(), rv.takeOptions(), + FormattedPlaceholder rand = rv.takeOperand(status); + FunctionOptions opts = rv.takeOptions(status); + U_ASSERT(U_SUCCESS(status)); // Did this check earlier + selectorImpl->selectKey(std::move(rand), std::move(opts), adoptedKeys.getAlias(), keysLen, adoptedPrefs.getAlias(), prefsLen, status); @@ -420,7 +483,7 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, if (savedStatus != status) { if (U_FAILURE(status)) { status = U_ZERO_ERROR; - context.getErrors().setSelectorError(rv.getSelectorName(), status); + context.getErrors().setSelectorError(selectorName, status); } else { // Ignore warnings status = savedStatus; @@ -483,7 +546,7 @@ void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res, } // 2iii. Let `rv` be the resolved value at index `i` of `res`. U_ASSERT(i < res.size()); - ResolvedSelector rv = std::move(*(static_cast(res[i]))); + InternalValue rv = std::move(*(static_cast(res[i]))); // 2iv. Let matches be the result of calling the method MatchSelectorKeys(rv, keys) LocalPointer matches(createUVector(status)); matchSelectorKeys(*keys, context, std::move(rv), *matches, status); @@ -616,116 +679,6 @@ void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCo // 7. Select the pattern of `var` } - -// Evaluate the operand -ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, const Operand& rand, MessageContext& context, UErrorCode &status) const { - if (U_FAILURE(status)) { - return {}; - } - - if (rand.isNull()) { - return ResolvedSelector(FormattedPlaceholder()); - } - - if (rand.isLiteral()) { - return ResolvedSelector(formatLiteral(rand.asLiteral(), status)); - } - - // Must be variable - const VariableName& var = rand.asVariable(); - // Resolve the variable - if (env.has(var)) { - const Closure& referent = env.lookup(var); - // Resolve the referent - return resolveVariables(referent.getEnv(), referent.getExpr(), context, status); - } - // Either this is a global var or an unbound var -- - // either way, it can't be bound to a function call. - // Check globals - FormattedPlaceholder val = evalArgument(var, context, status); - if (status == U_ILLEGAL_ARGUMENT_ERROR) { - status = U_ZERO_ERROR; - // Unresolved variable -- could be a previous warning. Nothing to resolve - U_ASSERT(context.getErrors().hasUnresolvedVariableError()); - return ResolvedSelector(var); - } - // Pass through other errors - return ResolvedSelector(std::move(val)); -} - -// Evaluate the expression except for not performing the top-level function call -// (which is expected to be a selector, but may not be, in error cases) -ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, - const Expression& expr, - MessageContext& context, - UErrorCode &status) const { - if (U_FAILURE(status)) { - return {}; - } - - // Function call -- resolve the operand and options - if (expr.isFunctionCall()) { - const Operator* rator = expr.getOperator(status); - U_ASSERT(U_SUCCESS(status)); - // Already checked that rator is non-reserved - const FunctionName& selectorName = rator->getFunctionName(); - if (isSelector(selectorName)) { - LocalPointer selector(getSelector(context, selectorName, status)); - if (U_SUCCESS(status)) { - FunctionOptions resolvedOptions = resolveOptions(env, rator->getOptionsInternal(), context, status); - InternalValue argument = formatOperand(env, expr.getOperand(), context, status); - if (argument.isFallback()) { - return ResolvedSelector(argument.asFallback()); - } else { - return ResolvedSelector(selectorName, - selector.orphan(), - std::move(resolvedOptions), - argument.value()); - } - } - } else if (isFormatter(selectorName)) { - context.getErrors().setSelectorError(selectorName, status); - } else { - context.getErrors().setUnknownFunction(selectorName, status); - } - // Non-selector used as selector; an error would have been recorded earlier - UnicodeString fallback(COLON); - fallback += selectorName; - if (!expr.getOperand().isNull()) { - InternalValue randVal = formatOperand(env, expr.getOperand(), context, status); - if (randVal.isFallback()) { - fallback = randVal.asFallback(); - } else { - fallback = randVal.value().getFallback(); - } - } - return ResolvedSelector(fallback); - } else { - // Might be a variable reference, so expand one more level of variable - return resolveVariables(env, expr.getOperand(), context, status); - } -} - -ResolvedSelector MessageFormatter::formatSelectorExpression(const Environment& globalEnv, const Expression& expr, MessageContext& context, UErrorCode &status) const { - if (U_FAILURE(status)) { - return {}; - } - - // Resolve expression to determine if it's a function call - ResolvedSelector exprResult = resolveVariables(globalEnv, expr, context, status); - - DynamicErrors& err = context.getErrors(); - - // If there is a selector, then `resolveVariables()` recorded it in the context - if (exprResult.hasSelector()) { - return exprResult; - } - - // No selector was found; error should already have been set - U_ASSERT(err.count() > 0); - return ResolvedSelector(exprResult.getFallback()); -} - void MessageFormatter::formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const { CHECK_ERROR(status); diff --git a/icu4c/source/i18n/messageformat2_evaluation.cpp b/icu4c/source/i18n/messageformat2_evaluation.cpp index 111d90183d13..97c2b7366f09 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.cpp +++ b/icu4c/source/i18n/messageformat2_evaluation.cpp @@ -109,40 +109,79 @@ FunctionOptions::~FunctionOptions() { options = nullptr; } } -// ResolvedSelector -// ---------------- -ResolvedSelector::ResolvedSelector(const UnicodeString& fb) : selector(nullptr), fallback(fb) {} +// InternalValue +// ------------- + -ResolvedSelector::ResolvedSelector(const FunctionName& fn, - Selector* sel, - FunctionOptions&& opts, - FormattedPlaceholder&& val) - : selectorName(fn), selector(sel), options(std::move(opts)), value(std::move(val)) { - U_ASSERT(sel != nullptr); +InternalValue::~InternalValue() {} +InternalValue& InternalValue::operator=(InternalValue&& other) { + fallbackString = other.fallbackString; + functionName = other.functionName; + resolvedOptions = std::move(other.resolvedOptions); + operand = std::move(other.operand); + return *this; } -ResolvedSelector::ResolvedSelector(FormattedPlaceholder&& val) : value(std::move(val)) {} +InternalValue::InternalValue(InternalValue&& other) { + *this = std::move(other); +} -ResolvedSelector& ResolvedSelector::operator=(ResolvedSelector&& other) noexcept { - selectorName = std::move(other.selectorName); - if (other.selector.isValid()) { - selector.adoptInstead(other.selector.orphan()); - other.selector.adoptInstead(nullptr); - } else { - selector.adoptInstead(nullptr); +InternalValue::InternalValue(const FunctionName& name, + FunctionOptions&& options, + FormattedPlaceholder&& rand, + UErrorCode& status) : fallbackString(""), functionName(name) { + if (U_FAILURE(status)) { + return; } - options = std::move(other.options); - value = std::move(other.value); - fallback = std::move(other.fallback); - return *this; + resolvedOptions.adoptInstead(create(std::move(options), status)); + operand = std::move(rand); } -ResolvedSelector::ResolvedSelector(ResolvedSelector&& other) { - *this = std::move(other); +FormattedPlaceholder InternalValue::takeValue(UErrorCode& status) { + if (U_FAILURE(status)) { + return {}; + } + if (!functionName.isEmpty() || !fallbackString.isEmpty()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return {}; + } + return std::move(operand); +} +// Only works if not fully evaluated +FormattedPlaceholder InternalValue::takeOperand(UErrorCode& status) { + if (U_FAILURE(status)) { + return {}; + } + if (functionName.isEmpty()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return {}; + } + return std::move(operand); +} +// Only works if not fully evaluated +FunctionOptions InternalValue::takeOptions(UErrorCode& status) { + if (U_FAILURE(status)) { + return {}; + } + if (!resolvedOptions.isValid()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return {}; + } + return std::move(*resolvedOptions.orphan()); +} +// Only works if not fully evaluated +FunctionName InternalValue::getFunctionName(UErrorCode& status) const { + if (U_FAILURE(status)) { + return {}; + } + if (functionName.isEmpty()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return {}; + } + return functionName; } -ResolvedSelector::~ResolvedSelector() {} // PrioritizedVariant // ------------------ diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index bed6c197dca5..7a6c023f19bf 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -32,26 +32,51 @@ namespace message2 { using namespace data_model; - // InternalValue - // Encodes an "either a fallback string or a FormattedPlaceholder" + // InternalValue represents an intermediate value in the message + // formatter. An InternalValue can either be a fallback value (representing + // an error that occurred during formatting); a "suspension", meaning a function + // call that has yet to be fully resolved; or a fully-resolved FormattedPlaceholder. + // The "suspension" state is used in implementing selection; in a message like: + // .local $x = {1 :number} + // .match $x + // [...] + // $x can't be bound to a fully formatted value; the annotation needs to be + // preserved until the .match is evaluated. Moreover, any given function could + // be both a formatter and a selector, and it's ambiguous which one it's intended + // to be until the body of the message is processed. class InternalValue : public UObject { - public: + public: bool isFallback() const { return !fallbackString.isEmpty(); } + bool isSuspension() const { return !functionName.isEmpty(); } InternalValue() : fallbackString("") {} // Fallback constructor explicit InternalValue(UnicodeString fb) : fallbackString(fb) {} - // Regular value constructor + // Fully-evaluated value constructor explicit InternalValue(FormattedPlaceholder&& f) - : fallbackString(""), val(std::move(f)) {} - FormattedPlaceholder value() { return std::move(val); } + : fallbackString(""), functionName(""), resolvedOptions(nullptr), + operand(std::move(f)) {} + // Suspension constructor + InternalValue(const FunctionName& name, + FunctionOptions&& options, + FormattedPlaceholder&& rand, + UErrorCode& status); + // Error code is set if this isn't fully evaluated + FormattedPlaceholder takeValue(UErrorCode& status); + // Error code is set if this is not a suspension + FormattedPlaceholder takeOperand(UErrorCode& status); + // Error code is set if this is not a suspension + FunctionOptions takeOptions(UErrorCode& status); + // Error code is set if this is not a suspension + FunctionName getFunctionName(UErrorCode& status) const; UnicodeString asFallback() const { return fallbackString; } virtual ~InternalValue(); InternalValue& operator=(InternalValue&&); InternalValue(InternalValue&&); - private: + private: UnicodeString fallbackString; // Non-empty if fallback - // Otherwise, assumed to be a FormattedPlaceholder - FormattedPlaceholder val; + FunctionName functionName; // Non-empty if this is a suspension + LocalPointer resolvedOptions; // Valid iff this is a suspension + FormattedPlaceholder operand; }; // class InternalValue // PrioritizedVariant @@ -85,43 +110,6 @@ namespace message2 { return 1; } - // Encapsulates a value to be scrutinized by a `match` with its resolved - // options and the name of the selector - class ResolvedSelector : public UObject { - public: - ResolvedSelector() {} - ResolvedSelector(const FunctionName& fn, - Selector* selector, - FunctionOptions&& options, - FormattedPlaceholder&& value); - // Used either for errors, or when selector isn't yet known - explicit ResolvedSelector(FormattedPlaceholder&& value); - // Used for fallback values - explicit ResolvedSelector(const UnicodeString& fb); - bool hasSelector() const { return selector.isValid(); } - const FormattedPlaceholder& argument() const { return value; } - FormattedPlaceholder&& takeArgument() { return std::move(value); } - const Selector* getSelector() { - U_ASSERT(selector.isValid()); - return selector.getAlias(); - } - FunctionOptions&& takeOptions() { - return std::move(options); - } - const FunctionName& getSelectorName() const { return selectorName; } - virtual ~ResolvedSelector(); - ResolvedSelector& operator=(ResolvedSelector&&) noexcept; - ResolvedSelector(ResolvedSelector&&); - bool isFallback() const { return !fallback.isEmpty(); } - const UnicodeString& getFallback() const { return fallback; } - private: - FunctionName selectorName; // For error reporting - LocalPointer selector; - FunctionOptions options; - FormattedPlaceholder value; - UnicodeString fallback; // Non-empty if this is a fallback - }; // class ResolvedSelector - // Closures and environments // ------------------------- diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index 7a306d8b96b8..b1170e0b6f50 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -30,7 +30,6 @@ namespace message2 { class Environment; class MessageContext; - class ResolvedSelector; class StaticErrors; class InternalValue; @@ -335,11 +334,14 @@ namespace message2 { // Do not define default assignment operator const MessageFormatter &operator=(const MessageFormatter &) = delete; +/* ResolvedSelector resolveVariables(const Environment& env, const data_model::Operand&, MessageContext&, UErrorCode &) const; ResolvedSelector resolveVariables(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode &) const; +*/ // Selection methods + bool isSelectable(const InternalValue&) const; // Takes a vector of FormattedPlaceholders void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const; // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output) @@ -347,7 +349,7 @@ namespace message2 { // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output) void sortVariants(const UVector&, UVector&, UErrorCode&) const; // Takes a vector of strings (input) and a vector of strings (output) - void matchSelectorKeys(const UVector&, MessageContext&, ResolvedSelector&& rv, UVector&, UErrorCode&) const; + void matchSelectorKeys(const UVector&, MessageContext&, InternalValue&& rv, UVector&, UErrorCode&) const; // Takes a vector of FormattedPlaceholders (input), // and a vector of vectors of strings (output) void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const; @@ -355,20 +357,17 @@ namespace message2 { // Formatting methods [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&, UErrorCode&) const; void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; - // Formats a call to a formatting function + [[nodiscard]] InternalValue eval(MessageContext&, InternalValue, UErrorCode&) const; // Dispatches on argument type - [[nodiscard]] InternalValue evalFormatterCall(FormattedPlaceholder&& argument, - MessageContext& context, - UErrorCode& status) const; + [[nodiscard]] FunctionName getFormatterNameByType(const FormattedPlaceholder& argument, + UErrorCode& status) const; + // Formats a call to a formatting function // Dispatches on function name - [[nodiscard]] InternalValue evalFormatterCall(const FunctionName& functionName, - FormattedPlaceholder&& argument, - FunctionOptions&& options, - MessageContext& context, - UErrorCode& status) const; - // Formats an expression that appears as a selector - ResolvedSelector formatSelectorExpression(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode&) const; - // Formats an expression that appears in a pattern or as the definition of a local variable + [[nodiscard]] InternalValue apply(const FunctionName& functionName, + FormattedPlaceholder&& argument, + FunctionOptions&& options, + MessageContext& context, + UErrorCode& status) const; [[nodiscard]] InternalValue formatExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const; [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const; [[nodiscard]] InternalValue formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const; @@ -393,6 +392,7 @@ namespace message2 { const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const; bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); } bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); } + void setNotSelectableError(MessageContext&, const InternalValue&, UErrorCode&) const; const Formatter* lookupFormatter(const FunctionName&, UErrorCode&) const; Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const; From 135fec0a77e6885b7b4b593f0b0ace737ff6da2b Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 1 Oct 2024 16:14:03 -0700 Subject: [PATCH 06/37] Comments; renaming fields to be more informative --- icu4c/source/i18n/messageformat2.cpp | 8 +- icu4c/source/i18n/messageformat2_evaluation.h | 6 + .../i18n/messageformat2_formattable.cpp | 95 ++----- .../i18n/messageformat2_function_registry.cpp | 15 +- icu4c/source/i18n/unicode/messageformat2.h | 2 +- .../i18n/unicode/messageformat2_formattable.h | 237 +++++++++--------- 6 files changed, 160 insertions(+), 203 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index 1337391b2a20..c293224f7555 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -39,16 +39,16 @@ static Formattable evalLiteral(const Literal& lit) { str += var; const Formattable* val = context.getGlobal(var, errorCode); if (U_SUCCESS(errorCode)) { - return (FormattedPlaceholder(*val, str, errorCode)); + return FormattedPlaceholder(*val, str); } } return {}; } // Returns the contents of the literal -[[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const Literal& lit, UErrorCode& errorCode) const { +[[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const Literal& lit) const { // The fallback for a literal is itself. - return FormattedPlaceholder(evalLiteral(lit), lit.quoted(), errorCode); + return FormattedPlaceholder(evalLiteral(lit), lit.quoted()); } [[nodiscard]] InternalValue MessageFormatter::formatOperand(const Environment& env, @@ -93,7 +93,7 @@ static Formattable evalLiteral(const Literal& lit) { return InternalValue(std::move(result)); } else { U_ASSERT(rand.isLiteral()); - return InternalValue(formatLiteral(rand.asLiteral(), status)); + return InternalValue(formatLiteral(rand.asLiteral())); } } diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index 7a6c023f19bf..4fa3dcb803b0 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -32,6 +32,12 @@ namespace message2 { using namespace data_model; + // InternalValue tracks a value along with, possibly, a function that needs + // to be applied to it in the future (once the value is required + // (by a .match or pattern)); + // while FormattedPlaceholder tracks a value and how it was constructed in the + // past (by a function, or from a literal or argument). + // InternalValue represents an intermediate value in the message // formatter. An InternalValue can either be a fallback value (representing // an error that occurred during formatting); a "suspension", meaning a function diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index 7c418223f4e9..694a480592ef 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -174,27 +174,17 @@ namespace message2 { FormattedPlaceholder& FormattedPlaceholder::operator=(FormattedPlaceholder&& other) noexcept { - type = other.type; + origin = other.origin; source = other.source; - if (type == kEvaluated) { - formatted = std::move(other.formatted); - } - if (previousOptions != nullptr) { - delete previousOptions; - } - if (other.previousOptions != nullptr) { - previousOptions = other.previousOptions; - other.previousOptions = nullptr; - } else { - previousOptions = nullptr; - } + formatted = std::move(other.formatted); + previousOptions = std::move(other.previousOptions); fallback = other.fallback; return *this; } const Formattable* FormattedPlaceholder::getSource(UErrorCode& errorCode) const { if (U_SUCCESS(errorCode)) { - if (type != kNull) { + if (origin != kNull) { return &source; } else { errorCode = U_ILLEGAL_ARGUMENT_ERROR; @@ -205,7 +195,7 @@ namespace message2 { FormattedPlaceholder FormattedPlaceholder::withResult(FormattedValue&& result) { formatted = std::move(result); - type = kEvaluated; + origin = kFunctionResult; return std::move(*this); } @@ -216,70 +206,38 @@ namespace message2 { return {}; } formatted = std::move(result); - type = kEvaluated; - delete previousOptions; - previousOptions = create(std::move(opts), status); - if (U_FAILURE(status)) { - previousOptions = nullptr; - return {}; - } + origin = kFunctionResult; + previousOptions = std::move(opts); return std::move(*this); } FormattedPlaceholder::FormattedPlaceholder(const FormattedPlaceholder& input, FunctionOptions&& opts, - FormattedValue&& output, - UErrorCode& status) + FormattedValue&& output) : fallback(input.fallback), source(input.source), formatted(std::move(output)), - previousOptions(nullptr), - type(kEvaluated) { - CHECK_ERROR(status); - - LocalPointer temp(create(std::move(opts), status)); - CHECK_ERROR(status); - previousOptions = temp.orphan(); - } + previousOptions(std::move(opts)), + origin(kFunctionResult) {} FormattedPlaceholder::FormattedPlaceholder(const FormattedPlaceholder& input, - FormattedValue&& output, - UErrorCode& status) + FormattedValue&& output) : fallback(input.fallback), source(input.source), formatted(std::move(output)), - type(kEvaluated) { - initOptions(status); - } + origin(kFunctionResult) {} FormattedPlaceholder::FormattedPlaceholder(const Formattable& input, - const UnicodeString& fb, - UErrorCode& status) - : fallback(fb), source(input), type(kUnevaluated) { - initOptions(status); - } - - FormattedPlaceholder::FormattedPlaceholder() : type(kNull) { - previousOptions = nullptr; - } + const UnicodeString& fb) + : fallback(fb), source(input), origin(kArgumentOrLiteral) {} - void FormattedPlaceholder::initOptions(UErrorCode& status) { - LocalPointer temp(create(FunctionOptions(), status)); - CHECK_ERROR(status); - previousOptions = temp.orphan(); - } + FormattedPlaceholder::FormattedPlaceholder() : origin(kNull) {} const message2::FunctionOptions& FormattedPlaceholder::getOptions() const { - U_ASSERT(previousOptions != nullptr); - return *previousOptions; + return previousOptions; } - FormattedPlaceholder::~FormattedPlaceholder() { - if (previousOptions != nullptr) { - delete previousOptions; - previousOptions = nullptr; - } - } + FormattedPlaceholder::~FormattedPlaceholder() {} // Default formatters // ------------------ @@ -340,8 +298,7 @@ namespace message2 { } if (asDecimal != nullptr) { return FormattedPlaceholder(input, - FormattedValue(formatNumberWithDefaults(locale, asDecimal, status)), - status); + FormattedValue(formatNumberWithDefaults(locale, asDecimal, status))); } } @@ -352,27 +309,27 @@ namespace message2 { UDate d = toFormat->getDate(status); U_ASSERT(U_SUCCESS(status)); formatDateWithDefaults(locale, d, result, status); - return FormattedPlaceholder(input, FormattedValue(std::move(result)), status); + return FormattedPlaceholder(input, FormattedValue(std::move(result))); } case UFMT_DOUBLE: { double d = toFormat->getDouble(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, d, status)), status); + return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, d, status))); } case UFMT_LONG: { int32_t l = toFormat->getLong(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, l, status)), status); + return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, l, status))); } case UFMT_INT64: { int64_t i = toFormat->getInt64Value(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, i, status)), status); + return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, i, status))); } case UFMT_STRING: { const UnicodeString& s = toFormat->getString(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(UnicodeString(s)), status); + return FormattedPlaceholder(input, FormattedValue(UnicodeString(s))); } default: { // No default formatters for other types; use fallback @@ -393,16 +350,16 @@ namespace message2 { return {}; } - // Evaluated value: either just return the string, or format the number + // Function result: either just return the string, or format the number // as a string and return it - if (isEvaluated()) { + if (isFunctionResult()) { if (formatted.isString()) { return formatted.getString(); } else { return formatted.getNumber().toString(status); } } - // Unevaluated value: first evaluate it fully, then format + // Unannotated value: apply default formatters UErrorCode savedStatus = status; FormattedPlaceholder evaluated = formatWithDefaults(locale, *this, status); if (status == U_MF_FORMATTING_ERROR) { diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 224c06df841f..2efc54febdc3 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -439,8 +439,8 @@ Formatter* StandardFunctions::IntegerFactory::createFormatter(const Locale& loca StandardFunctions::IntegerFactory::~IntegerFactory() {} -static FormattedPlaceholder notANumber(const FormattedPlaceholder& input, UErrorCode& status) { - return FormattedPlaceholder(input, FormattedValue(UnicodeString("NaN")), status); +static FormattedPlaceholder notANumber(const FormattedPlaceholder& input) { + return FormattedPlaceholder(input, FormattedValue(UnicodeString("NaN"))); } static double parseNumberLiteral(const UnicodeString& inputStr, UErrorCode& errorCode) { @@ -600,12 +600,12 @@ FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& ar default: { // Other types can't be parsed as a number errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return notANumber(arg, errorCode); + return notANumber(arg); } } } - return FormattedPlaceholder(arg, FormattedValue(std::move(numberResult)), errorCode); + return FormattedPlaceholder(arg, FormattedValue(std::move(numberResult))); } StandardFunctions::Number::~Number() {} @@ -660,7 +660,8 @@ void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, errorCode); CHECK_ERROR(errorCode); - U_ASSERT(resolvedSelector.isEvaluated() && resolvedSelector.output().isNumber()); + U_ASSERT(resolvedSelector.isFunctionResult() + && resolvedSelector.output().isNumber()); // See https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#number-selection // 1. Let exact be the JSON string representation of the numeric value of resolvedSelector @@ -1107,7 +1108,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& // in the returned FormattedPlaceholder; this is necessary // so the date can be re-formatted toFormat = FormattedPlaceholder(message2::Formattable::forDate(d), - toFormat.getFallback(), errorCode); + toFormat.getFallback()); df->format(d, result, 0, errorCode); } break; @@ -1130,7 +1131,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& if (U_FAILURE(errorCode)) { return {}; } - return FormattedPlaceholder(toFormat, std::move(opts), FormattedValue(std::move(result)), errorCode); + return FormattedPlaceholder(toFormat, std::move(opts), FormattedValue(std::move(result))); } StandardFunctions::DateTimeFactory::~DateTimeFactory() {} diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index b1170e0b6f50..ff4ad461ab49 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -355,7 +355,7 @@ namespace message2 { void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const; // Formatting methods - [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&, UErrorCode&) const; + [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const; void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; [[nodiscard]] InternalValue eval(MessageContext&, InternalValue, UErrorCode&) const; // Dispatches on argument type diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index ab748d7c61d8..f5a5e5ca25e5 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -450,17 +450,12 @@ namespace message2 { */ #ifndef U_IN_DOXYGEN class FormattedPlaceholder; -/* - TODO: It would be better not to include null operands or fallback values - in an options map. - Even better would be to handle them differently and not include them in - a FormattedPlaceholder (use a type like std::variant - in the formatter) -*/ class U_I18N_API ResolvedFunctionOption : public UObject { private: /* const */ UnicodeString name; + // This is a pointer because FormattedPlaceholder and ResolvedFunctionOption + // are mutually recursive /* const */ LocalPointer value; public: @@ -585,7 +580,101 @@ class U_I18N_API ResolvedFunctionOption : public UObject { number::FormattedNumber numberOutput; }; // class FormattedValue - class FunctionOptions; +/** + * Mapping from option names to `message2::Formattable` objects, obtained + * by calling `getOptions()` on a `FunctionOptions` object. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ +using FunctionOptionsMap = std::map; + +/** + * Structure encapsulating named options passed to a custom selector or formatter. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ +class U_I18N_API FunctionOptions : public UObject { + public: + /** + * Returns a map of all name-value pairs provided as options to this function. + * The syntactic order of options is not guaranteed to + * be preserved. + * + * This class is immutable and movable but not copyable. + * + * @return A map from strings to FormattedPlaceholder objects representing + * the results of resolving each option value. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionOptionsMap getOptions() const { + FunctionOptionsMap result; + for (int32_t i = 0; i < functionOptionsLen; i++) { + const ResolvedFunctionOption& opt = options[i]; + result[opt.getName()] = opt.getValue(); + } + return result; + } + /** + * Default constructor. + * Returns an empty mapping. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionOptions() { options = nullptr; } + /** + * Destructor. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~FunctionOptions(); + /** + * Move assignment operator: + * The source FunctionOptions will be left in a valid but undefined state. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionOptions& operator=(FunctionOptions&&) noexcept; + /** + * Move constructor: + * The source FunctionOptions will be left in a valid but undefined state. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionOptions(FunctionOptions&&); + /** + * Copy constructor. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionOptions& operator=(const FunctionOptions&) = delete; + private: + friend class MessageFormatter; + friend class StandardFunctions; + + explicit FunctionOptions(UVector&&, UErrorCode&); + + const ResolvedFunctionOption* getResolvedFunctionOptions(int32_t& len) const; + const FormattedPlaceholder* getFunctionOption(const UnicodeString&, UErrorCode&) const; + // Returns empty string if option doesn't exist + UnicodeString getStringFunctionOption(const UnicodeString&) const; + int32_t optionsCount() const { return functionOptionsLen; } + + // Named options passed to functions + // This is not a Hashtable in order to make it possible for code in a public header file + // to construct a std::map from it, on-the-fly. Otherwise, it would be impossible to put + // that code in the header because it would have to call internal Hashtable methods. + ResolvedFunctionOption* options; + int32_t functionOptionsLen = 0; +}; // class FunctionOptions /** * A `FormattablePlaceholder` encapsulates an input value (a `message2::Formattable`) @@ -608,13 +697,11 @@ class U_I18N_API ResolvedFunctionOption : public UObject { * `Formattable` used to construct the formatted value. * @param output A `FormattedValue` representing the formatted output of `input`. * Passed by move. - * @param errorCode Input/output error code * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - FormattedPlaceholder(const FormattedPlaceholder& input, FormattedValue&& output, - UErrorCode& errorCode); + FormattedPlaceholder(const FormattedPlaceholder& input, FormattedValue&& output); /** * Constructor for fully formatted placeholders with options. * @@ -623,26 +710,23 @@ class U_I18N_API ResolvedFunctionOption : public UObject { * @param opts Function options that were used to construct `output`. May be the empty map. * @param output A `FormattedValue` representing the formatted output of `input`. * Passed by move. - * @param errorCode Input/output error code * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ FormattedPlaceholder(const FormattedPlaceholder& input, FunctionOptions&& opts, - FormattedValue&& output, - UErrorCode& errorCode); + FormattedValue&& output); /** * Constructor for unformatted placeholders. * * @param input A `Formattable` object. * @param fb Fallback string to use if an error occurs while formatting the input. - * @param errorCode Input/output error code * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - FormattedPlaceholder(const Formattable& input, const UnicodeString& fb, UErrorCode& errorCode); + FormattedPlaceholder(const Formattable& input, const UnicodeString& fb); /** * Default constructor. Leaves the FormattedPlaceholder in a * valid but undefined state. @@ -660,7 +744,7 @@ class U_I18N_API ResolvedFunctionOption : public UObject { * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - UBool isNullOperand() const { return type == kNull; } + UBool isNullOperand() const { return origin == kNull; } /** * Returns a pointer to * the source Formattable value for this placeholder. @@ -715,7 +799,7 @@ class U_I18N_API ResolvedFunctionOption : public UObject { * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - bool isEvaluated() const { return (type == kEvaluated); } + bool isFunctionResult() const { return origin == kFunctionResult; } /** * Gets the fallback value of this placeholder, to be used in its place if an error occurs while * formatting it. @@ -764,123 +848,32 @@ class U_I18N_API ResolvedFunctionOption : public UObject { */ UnicodeString formatToString(const Locale& locale, UErrorCode& status) const; - // TODO + /** + * Destructor. + * + * @internal ICU 77 technology preview + * @deprecated This API is for ICU internal use only. + */ virtual ~FormattedPlaceholder(); private: friend class MessageFormatter; - enum Type { + enum Origin { kNull, // Represents the operand of an expression with no syntactic operand // (Functions can be nullary in MF2 but the C++ representations must - // take an argument, so this is how that's reconciled) - kUnevaluated, // `source` should be valid, but there's no result yet - kEvaluated, // `formatted` exists + // take an argument, hence we need a representation for "no argument") + kArgumentOrLiteral, // Represents a `Formattable` originating from an argument or literal + kFunctionResult, // Represents the result of applying a function to another + // FormattedPlaceholder }; UnicodeString fallback; Formattable source; FormattedValue formatted; void initOptions(UErrorCode&); - // Can be null if this was default-constructed - FunctionOptions* previousOptions = nullptr; // Ignored unless type is kEvaluated - Type type; + FunctionOptions previousOptions; // Ignored unless type is kEvaluated + Origin origin; }; // class FormattedPlaceholder -/** - * Mapping from option names to `message2::Formattable` objects, obtained - * by calling `getOptions()` on a `FunctionOptions` object. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ -using FunctionOptionsMap = std::map; - -/** - * Structure encapsulating named options passed to a custom selector or formatter. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ -class U_I18N_API FunctionOptions : public UObject { - public: - /** - * Returns a map of all name-value pairs provided as options to this function. - * The syntactic order of options is not guaranteed to - * be preserved. - * - * This class is immutable and movable but not copyable. - * - * @return A map from strings to FormattedPlaceholder objects representing - * the results of resolving each option value. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FunctionOptionsMap getOptions() const { - FunctionOptionsMap result; - for (int32_t i = 0; i < functionOptionsLen; i++) { - const ResolvedFunctionOption& opt = options[i]; - result[opt.getName()] = opt.getValue(); - } - return result; - } - /** - * Default constructor. - * Returns an empty mapping. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FunctionOptions() { options = nullptr; } - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~FunctionOptions(); - /** - * Move assignment operator: - * The source FunctionOptions will be left in a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FunctionOptions& operator=(FunctionOptions&&) noexcept; - /** - * Move constructor: - * The source FunctionOptions will be left in a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FunctionOptions(FunctionOptions&&); - /** - * Copy constructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FunctionOptions& operator=(const FunctionOptions&) = delete; - private: - friend class MessageFormatter; - friend class StandardFunctions; - - explicit FunctionOptions(UVector&&, UErrorCode&); - - const ResolvedFunctionOption* getResolvedFunctionOptions(int32_t& len) const; - const FormattedPlaceholder* getFunctionOption(const UnicodeString&, UErrorCode&) const; - // Returns empty string if option doesn't exist - UnicodeString getStringFunctionOption(const UnicodeString&) const; - int32_t optionsCount() const { return functionOptionsLen; } - - // Named options passed to functions - // This is not a Hashtable in order to make it possible for code in a public header file - // to construct a std::map from it, on-the-fly. Otherwise, it would be impossible to put - // that code in the header because it would have to call internal Hashtable methods. - ResolvedFunctionOption* options; - int32_t functionOptionsLen = 0; -}; // class FunctionOptions - /** * Not yet implemented: The result of a message formatting operation. Based on * ICU4J's FormattedMessage.java. From 04b9cf2dfae4026a5e20ff438305e3ea049eb3b2 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 1 Oct 2024 16:18:07 -0700 Subject: [PATCH 07/37] Remove commented-out code --- icu4c/source/i18n/unicode/messageformat2.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index ff4ad461ab49..a8c28f5deb1a 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -334,11 +334,6 @@ namespace message2 { // Do not define default assignment operator const MessageFormatter &operator=(const MessageFormatter &) = delete; -/* - ResolvedSelector resolveVariables(const Environment& env, const data_model::Operand&, MessageContext&, UErrorCode &) const; - ResolvedSelector resolveVariables(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode &) const; -*/ - // Selection methods bool isSelectable(const InternalValue&) const; From c04831311343f0d6fc053539db5fb16b96b776e5 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Wed, 2 Oct 2024 11:37:15 -0700 Subject: [PATCH 08/37] Fix leaks --- icu4c/source/i18n/messageformat2.cpp | 8 +++----- icu4c/source/i18n/messageformat2_evaluation.cpp | 17 ++++++----------- icu4c/source/i18n/messageformat2_evaluation.h | 8 +++----- 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index c293224f7555..b56b5e281d1f 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -298,8 +298,7 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O } return InternalValue(defaultFormatterName, FunctionOptions(), - std::move(operand), - status); + std::move(operand)); } else { randVal = eval(context, std::move(randVal), status); // Don't call the function on error values @@ -320,8 +319,7 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O // Call the formatter function return InternalValue(functionName, std::move(resolvedOptions), - std::move(operand), - status); + std::move(operand)); } } @@ -441,7 +439,7 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, // Return an empty list of matches return; } - auto selectorImpl = getSelector(context, selectorName, status); + LocalPointer selectorImpl(getSelector(context, selectorName, status)); if (U_FAILURE(status)) { return; } diff --git a/icu4c/source/i18n/messageformat2_evaluation.cpp b/icu4c/source/i18n/messageformat2_evaluation.cpp index 97c2b7366f09..5818e7f24097 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.cpp +++ b/icu4c/source/i18n/messageformat2_evaluation.cpp @@ -128,15 +128,10 @@ InternalValue::InternalValue(InternalValue&& other) { } InternalValue::InternalValue(const FunctionName& name, - FunctionOptions&& options, - FormattedPlaceholder&& rand, - UErrorCode& status) : fallbackString(""), functionName(name) { - if (U_FAILURE(status)) { - return; - } - resolvedOptions.adoptInstead(create(std::move(options), status)); - operand = std::move(rand); -} + FunctionOptions&& options, + FormattedPlaceholder&& rand) + : fallbackString(""), functionName(name), + resolvedOptions(std::move(options)), operand(std::move(rand)) {} FormattedPlaceholder InternalValue::takeValue(UErrorCode& status) { if (U_FAILURE(status)) { @@ -164,11 +159,11 @@ FunctionOptions InternalValue::takeOptions(UErrorCode& status) { if (U_FAILURE(status)) { return {}; } - if (!resolvedOptions.isValid()) { + if (!isSuspension()) { status = U_ILLEGAL_ARGUMENT_ERROR; return {}; } - return std::move(*resolvedOptions.orphan()); + return std::move(resolvedOptions); } // Only works if not fully evaluated FunctionName InternalValue::getFunctionName(UErrorCode& status) const { diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index 4fa3dcb803b0..3d605fc22155 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -59,13 +59,11 @@ namespace message2 { explicit InternalValue(UnicodeString fb) : fallbackString(fb) {} // Fully-evaluated value constructor explicit InternalValue(FormattedPlaceholder&& f) - : fallbackString(""), functionName(""), resolvedOptions(nullptr), - operand(std::move(f)) {} + : fallbackString(""), functionName(""), operand(std::move(f)) {} // Suspension constructor InternalValue(const FunctionName& name, FunctionOptions&& options, - FormattedPlaceholder&& rand, - UErrorCode& status); + FormattedPlaceholder&& rand); // Error code is set if this isn't fully evaluated FormattedPlaceholder takeValue(UErrorCode& status); // Error code is set if this is not a suspension @@ -81,7 +79,7 @@ namespace message2 { private: UnicodeString fallbackString; // Non-empty if fallback FunctionName functionName; // Non-empty if this is a suspension - LocalPointer resolvedOptions; // Valid iff this is a suspension + FunctionOptions resolvedOptions; // Ignored unless this is a suspension FormattedPlaceholder operand; }; // class InternalValue From fe9f6350c27b6a88610d22476cc434f44db154c8 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Wed, 2 Oct 2024 11:41:30 -0700 Subject: [PATCH 09/37] Remove unused method --- icu4c/source/i18n/unicode/messageformat2_formattable.h | 1 - 1 file changed, 1 deletion(-) diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index f5a5e5ca25e5..87dd00a2caf5 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -869,7 +869,6 @@ class U_I18N_API FunctionOptions : public UObject { UnicodeString fallback; Formattable source; FormattedValue formatted; - void initOptions(UErrorCode&); FunctionOptions previousOptions; // Ignored unless type is kEvaluated Origin origin; }; // class FormattedPlaceholder From 2c4705ad9c9532f96552874d4b03da1716ff8ebb Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Wed, 2 Oct 2024 12:03:46 -0700 Subject: [PATCH 10/37] Fix warning --- icu4c/source/i18n/messageformat2.cpp | 3 ++- icu4c/source/i18n/messageformat2_evaluation.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index b56b5e281d1f..30ab1b28457b 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -135,7 +135,8 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O // The option is resolved; add it to the vector ResolvedFunctionOption resolvedOpt(k, std::move(optVal), status); - LocalPointer p(create(std::move(resolvedOpt), status)); + LocalPointer + p(create(std::move(resolvedOpt), status)); EMPTY_ON_ERROR(status); optionsVector->adoptElement(p.orphan(), status); } diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index 3d605fc22155..f4387fbde419 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -34,7 +34,7 @@ namespace message2 { // InternalValue tracks a value along with, possibly, a function that needs // to be applied to it in the future (once the value is required - // (by a .match or pattern)); + // (by a .match or pattern, or another function)); // while FormattedPlaceholder tracks a value and how it was constructed in the // past (by a function, or from a literal or argument). From bb2ecdbfd2e76ed31e1ab5c514fb1b56166eb5d3 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Thu, 3 Oct 2024 13:12:49 -0700 Subject: [PATCH 11/37] Tests pass --- icu4c/source/i18n/messageformat2.cpp | 293 ++------ .../source/i18n/messageformat2_evaluation.cpp | 190 ++++-- icu4c/source/i18n/messageformat2_evaluation.h | 52 +- .../i18n/messageformat2_formattable.cpp | 42 +- .../source/i18n/messageformat2_formatter.cpp | 145 +--- .../i18n/messageformat2_function_registry.cpp | 638 +++++++----------- ...essageformat2_function_registry_internal.h | 206 +++--- icu4c/source/i18n/unicode/messageformat2.h | 23 +- .../i18n/unicode/messageformat2_formattable.h | 33 +- .../messageformat2_function_registry.h | 297 ++------ .../test/intltest/messageformat2test.cpp | 8 +- .../source/test/intltest/messageformat2test.h | 125 ++-- .../intltest/messageformat2test_custom.cpp | 356 +++++----- .../intltest/messageformat2test_read_json.cpp | 1 - testdata/message2/runtime-errors.json | 27 - 15 files changed, 968 insertions(+), 1468 deletions(-) delete mode 100644 testdata/message2/runtime-errors.json diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index 30ab1b28457b..8ed9685c4dbf 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -32,23 +32,33 @@ static Formattable evalLiteral(const Literal& lit) { } // Assumes that `var` is a message argument; returns the argument's value. -[[nodiscard]] FormattedPlaceholder MessageFormatter::evalArgument(const VariableName& var, MessageContext& context, UErrorCode& errorCode) const { +[[nodiscard]] InternalValue MessageFormatter::evalArgument(const VariableName& var, + MessageContext& context, + UErrorCode& errorCode) const { if (U_SUCCESS(errorCode)) { // The fallback for a variable name is itself. UnicodeString str(DOLLAR); str += var; const Formattable* val = context.getGlobal(var, errorCode); if (U_SUCCESS(errorCode)) { - return FormattedPlaceholder(*val, str); + LocalPointer result(BaseValue::create(locale, *val, errorCode)); + if (U_SUCCESS(errorCode)) { + return InternalValue(result.orphan(), str); + } } } return {}; } // Returns the contents of the literal -[[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const Literal& lit) const { +[[nodiscard]] InternalValue MessageFormatter::formatLiteral(const Literal& lit, + UErrorCode& errorCode) const { // The fallback for a literal is itself. - return FormattedPlaceholder(evalLiteral(lit), lit.quoted()); + LocalPointer val(BaseValue::create(locale, evalLiteral(lit), errorCode)); + if (U_SUCCESS(errorCode)) { + return InternalValue(val.orphan(), lit.quoted()); + } + return {}; } [[nodiscard]] InternalValue MessageFormatter::formatOperand(const Environment& env, @@ -79,7 +89,7 @@ static Formattable evalLiteral(const Literal& lit) { return formatExpression(rhs.getEnv(), rhs.getExpr(), context, status); } // Variable wasn't found in locals -- check if it's global - FormattedPlaceholder result = evalArgument(var, context, status); + InternalValue result = evalArgument(var, context, status); if (status == U_ILLEGAL_ARGUMENT_ERROR) { status = U_ZERO_ERROR; // Unbound variable -- set a resolution error @@ -90,15 +100,18 @@ static Formattable evalLiteral(const Literal& lit) { str += var; return InternalValue(str); } - return InternalValue(std::move(result)); + return result; } else { U_ASSERT(rand.isLiteral()); - return InternalValue(formatLiteral(rand.asLiteral())); + return formatLiteral(rand.asLiteral(), status); } } // Resolves a function's options -FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const OptionMap& options, MessageContext& context, UErrorCode& status) const { +FunctionOptions MessageFormatter::resolveOptions(const Environment& env, + const OptionMap& options, + MessageContext& context, + UErrorCode& status) const { LocalPointer optionsVector(createUVector(status)); if (U_FAILURE(status)) { return {}; @@ -120,21 +133,10 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O } // Force evaluation in order to extract a FormattedPlaceholder // from `rhsVal` (which might be a suspension) - FormattedPlaceholder optVal = rhsVal.takeValue(status); - if (U_FAILURE(status)) { - status = U_ZERO_ERROR; - InternalValue applied = eval(context, std::move(rhsVal), status); - if (applied.isFallback()) { - // Skip this option - continue; - } - optVal = applied.takeValue(status); - U_ASSERT(U_SUCCESS(status)); - } - U_ASSERT(!optVal.isNullOperand()); // Option value can't be absent, syntactically + FunctionValue* optVal = rhsVal.takeValue(status); // The option is resolved; add it to the vector - ResolvedFunctionOption resolvedOpt(k, std::move(optVal), status); + ResolvedFunctionOption resolvedOpt(k, optVal); LocalPointer p(create(std::move(resolvedOpt), status)); EMPTY_ON_ERROR(status); @@ -144,134 +146,6 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O return FunctionOptions(std::move(*optionsVector), status); } -// Overload that dispatches on argument type. Syntax doesn't provide for options in this case. -[[nodiscard]] FunctionName MessageFormatter::getFormatterNameByType(const FormattedPlaceholder& argument, - UErrorCode& status) const { - if (U_FAILURE(status)) { - return {}; - } - - const Formattable* toFormat = argument.getSource(status); - // Null operand case should have been checked for already - U_ASSERT(U_SUCCESS(status)); - switch (toFormat->getType()) { - case UFMT_OBJECT: { - const FormattableObject* obj = toFormat->getObject(status); - U_ASSERT(U_SUCCESS(status)); - U_ASSERT(obj != nullptr); - const UnicodeString& type = obj->tag(); - FunctionName functionName; - if (!getDefaultFormatterNameByType(type, functionName)) { - // No formatter for this type -- follow default behavior - break; - } - return functionName; - } - default: { - // TODO: The array case isn't handled yet; not sure whether it's desirable - // to have a default list formatter - break; - } - } - // No formatter for this type, or it's a primitive type (which will be formatted later) - return {}; -} - -// Takes a possibly-suspended value and returns a fully-evaluated one; -// the result is either fully evaluated or an error value -[[nodiscard]] InternalValue MessageFormatter::eval(MessageContext& context, - InternalValue suspension, - UErrorCode& status) const { - EMPTY_ON_ERROR(status); - - // If we already have an error value or fully-evaluated value, - // just return it - if (!suspension.isSuspension()) { - return suspension; - } - - // Get the function name, operand, and options - // (which should succeed, since we already did the check) - FunctionName functionName = suspension.getFunctionName(status); - FormattedPlaceholder operand = suspension.takeOperand(status); - FunctionOptions options = suspension.takeOptions(status); - U_ASSERT(U_SUCCESS(status)); - - // Call the function with the operand and options - return apply(functionName, std::move(operand), std::move(options), context, status); -} - -// Overload that dispatches on function name -[[nodiscard]] InternalValue MessageFormatter::apply(const FunctionName& functionName, - FormattedPlaceholder&& argument, - FunctionOptions&& options, - MessageContext& context, - UErrorCode& status) const { - if (U_FAILURE(status)) { - return {}; - } - - DynamicErrors& errs = context.getErrors(); - - UnicodeString fallback = argument.getFallback(); - if (argument.isNullOperand()) { - fallback = UnicodeString(COLON); - fallback += functionName; - } - - if (isFormatter(functionName)) { - LocalPointer formatterImpl(getFormatter(functionName, status)); - if (U_FAILURE(status)) { - if (status == U_MF_FORMATTING_ERROR) { - errs.setFormattingError(functionName, status); - status = U_ZERO_ERROR; - return InternalValue(fallback); - } - if (status == U_MF_UNKNOWN_FUNCTION_ERROR) { - errs.setUnknownFunction(functionName, status); - status = U_ZERO_ERROR; - return InternalValue(fallback); - } - // Other errors are non-recoverable - return {}; - } - U_ASSERT(formatterImpl != nullptr); - - UErrorCode savedStatus = status; - FormattedPlaceholder result = formatterImpl->format(std::move(argument), std::move(options), status); - // Update errors - if (savedStatus != status) { - if (U_FAILURE(status)) { - if (status == U_MF_OPERAND_MISMATCH_ERROR) { - status = U_ZERO_ERROR; - errs.setOperandMismatchError(functionName, status); - } else { - status = U_ZERO_ERROR; - // Convey any error generated by the formatter - // as a formatting error, except for operand mismatch errors - errs.setFormattingError(functionName, status); - } - return InternalValue(fallback); - } else { - // Ignore warnings - status = savedStatus; - } - } - // Ignore the output if any errors occurred - if (errs.hasFormattingError()) { - return InternalValue(fallback); - } - return InternalValue(std::move(result)); - } - // No formatter with this name -- set error - if (isSelector(functionName)) { - errs.setFormattingError(functionName, status); - } else { - errs.setUnknownFunction(functionName, status); - } - return InternalValue(fallback); -} - // Formats an expression using `globalEnv` for the values of variables [[nodiscard]] InternalValue MessageFormatter::formatExpression(const Environment& globalEnv, const Expression& expr, @@ -286,30 +160,12 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O InternalValue randVal = formatOperand(globalEnv, rand, context, status); if (!expr.isFunctionCall()) { - if (randVal.isSuspension() || randVal.isFallback()) { - return randVal; - } - FormattedPlaceholder operand = randVal.takeValue(status); - U_ASSERT(U_SUCCESS(status)); - // In this case, the operand is an unannotated literal or argument - // Dispatch based on type of `operand` - FunctionName defaultFormatterName = getFormatterNameByType(operand, status); - if (defaultFormatterName.isEmpty()) { - return InternalValue(std::move(operand)); - } - return InternalValue(defaultFormatterName, - FunctionOptions(), - std::move(operand)); + return randVal; } else { - randVal = eval(context, std::move(randVal), status); // Don't call the function on error values - if (randVal.isFallback()) { return randVal; } - FormattedPlaceholder operand = randVal.takeValue(status); - U_ASSERT(U_SUCCESS(status)); - const Operator* rator = expr.getOperator(status); U_ASSERT(U_SUCCESS(status)); const FunctionName& functionName = rator->getFunctionName(); @@ -317,10 +173,38 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O // Resolve the options FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status); - // Call the formatter function - return InternalValue(functionName, - std::move(resolvedOptions), - std::move(operand)); + UnicodeString fallbackStr; + if (rand.isNull()) { + fallbackStr = UnicodeString(COLON); + fallbackStr += functionName; + } else { + fallbackStr = randVal.asFallback(); + } + + // Call the function + Function* function = lookupFunction(functionName, status); + if (U_FAILURE(status)) { + status = U_ZERO_ERROR; + context.getErrors().setUnknownFunction(functionName, status); + return InternalValue(fallbackStr); + } + // Calling takeValue() won't error out because we already checked the fallback case + // Nullptr represents an absent argument + FunctionValue* functionArg = randVal.isNullOperand() ? nullptr : randVal.takeValue(status); + U_ASSERT(U_SUCCESS(status)); + auto result = function->call(functionArg, + std::move(resolvedOptions), status); + if (status == U_MF_OPERAND_MISMATCH_ERROR) { + status = U_ZERO_ERROR; + context.getErrors().setOperandMismatchError(functionName, status); + return InternalValue(fallbackStr); + } + if (status == U_MF_FORMATTING_ERROR) { + status = U_ZERO_ERROR; + context.getErrors().setFormattingError(functionName, status); + return InternalValue(fallbackStr); + } + return InternalValue(result, fallbackStr); } } @@ -337,15 +221,16 @@ void MessageFormatter::formatPattern(MessageContext& context, const Environment& } else { // Format the expression InternalValue partVal = formatExpression(globalEnv, part.contents(), context, status); - // Force full evaluation - InternalValue partResult = eval(context, std::move(partVal), status); - if (partResult.isFallback()) { + if (partVal.isFallback()) { result += LEFT_CURLY_BRACE; - result += partResult.asFallback(); + result += partVal.asFallback(); result += RIGHT_CURLY_BRACE; } else { // Do final formatting (e.g. formatting numbers as strings) - result += partResult.takeValue(status).formatToString(locale, status); + const FunctionValue* val = partVal.takeValue(status); + // Shouldn't be null or a fallback + U_ASSERT(U_SUCCESS(status)); + result += val->formatToString(status); // Handle formatting errors. `formatToString()` can't take a context and thus can't // register an error directly if (status == U_MF_FORMATTING_ERROR) { @@ -363,32 +248,6 @@ void MessageFormatter::formatPattern(MessageContext& context, const Environment& // ------------------------------------------------------ // Selection -bool MessageFormatter::isSelectable(const InternalValue& rv) const { - UErrorCode localStatus = U_ZERO_ERROR; - const FunctionName& selectorName = rv.getFunctionName(localStatus); - if (U_FAILURE(localStatus)) { - return false; - } - return isSelector(selectorName); -} - -void MessageFormatter::setNotSelectableError(MessageContext& context, - const InternalValue& rv, - UErrorCode& status) const { - CHECK_ERROR(status); - if (rv.isFallback()) { - context.getErrors().setSelectorError({}, status); - return; - } - const FunctionName& functionName = rv.getFunctionName(status); - U_ASSERT(U_SUCCESS(status)); - if (isFormatter(functionName)) { - context.getErrors().setSelectorError(functionName, status); - } else { - context.getErrors().setUnknownFunction(functionName, status); - } -} - // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-selectors // `res` is a vector of ResolvedSelectors void MessageFormatter::resolveSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UVector& res) const { @@ -402,7 +261,7 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme for (int32_t i = 0; i < dataModel.numSelectors(); i++) { // 2i. Let rv be the resolved value of exp. InternalValue rv = formatExpression(env, selectors[i], context, status); - if (isSelectable(rv)) { + if (rv.isSelectable()) { // 2ii. If selection is supported for rv: // (True if this code has been reached) } else { @@ -411,7 +270,7 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme // Append nomatch as the last element of the list res. // Emit a Selection Error. // (Note: in this case, rv, being a fallback, serves as `nomatch`) - setNotSelectableError(context, rv, status); + context.getErrors().setSelectorError({}, status); } // 2ii(a). Append rv as the last element of the list res. // (Also fulfills 2iii) @@ -430,21 +289,10 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, UErrorCode& status) const { CHECK_ERROR(status); - if (!isSelectable(rv)) { + if (!rv.isSelectable()) { return; } - const FunctionName& selectorName = rv.getFunctionName(status); - if (U_FAILURE(status)) { - status = U_ZERO_ERROR; - // Return an empty list of matches - return; - } - LocalPointer selectorImpl(getSelector(context, selectorName, status)); - if (U_FAILURE(status)) { - return; - } - U_ASSERT(selectorImpl != nullptr); UErrorCode savedStatus = status; // Convert `keys` to an array @@ -471,18 +319,15 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, int32_t prefsLen = 0; // Call the selector - FormattedPlaceholder rand = rv.takeOperand(status); - FunctionOptions opts = rv.takeOptions(status); - U_ASSERT(U_SUCCESS(status)); // Did this check earlier - selectorImpl->selectKey(std::move(rand), std::move(opts), - adoptedKeys.getAlias(), keysLen, adoptedPrefs.getAlias(), prefsLen, - status); + // Already checked for fallback, so it's safe to call takeValue() + rv.takeValue(status)->selectKeys(adoptedKeys.getAlias(), keysLen, adoptedPrefs.getAlias(), prefsLen, + status); // Update errors if (savedStatus != status) { if (U_FAILURE(status)) { status = U_ZERO_ERROR; - context.getErrors().setSelectorError(selectorName, status); + context.getErrors().setSelectorError({}, status); } else { // Ignore warnings status = savedStatus; diff --git a/icu4c/source/i18n/messageformat2_evaluation.cpp b/icu4c/source/i18n/messageformat2_evaluation.cpp index 5818e7f24097..de0b515df0ab 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.cpp +++ b/icu4c/source/i18n/messageformat2_evaluation.cpp @@ -20,29 +20,57 @@ namespace message2 { using namespace data_model; +// BaseValue +// --------- + +BaseValue::BaseValue(const Locale& loc, const Formattable& source) + : locale(loc) { + operand = source; +} + +/* static */ BaseValue* BaseValue::create(const Locale& locale, + const Formattable& source, + UErrorCode& errorCode) { + return message2::create(BaseValue(locale, source), errorCode); +} + +extern UnicodeString formattableToString(const Locale&, const Formattable&, UErrorCode&); + +UnicodeString BaseValue::formatToString(UErrorCode& errorCode) const { + return formattableToString(locale, operand, errorCode); +} + +BaseValue& BaseValue::operator=(BaseValue&& other) noexcept { + operand = std::move(other.operand); + opts = std::move(other.opts); + locale = other.locale; + + return *this; +} + +BaseValue::BaseValue(BaseValue&& other) { + *this = std::move(other); +} + // Functions // ------------- ResolvedFunctionOption::ResolvedFunctionOption(ResolvedFunctionOption&& other) { - name = std::move(other.name); - value = std::move(other.value); + *this = std::move(other); } ResolvedFunctionOption::ResolvedFunctionOption(const UnicodeString& n, - FormattedPlaceholder&& v, - UErrorCode& status) { - CHECK_ERROR(status); + FunctionValue* f) : name(n), value(f) { + U_ASSERT(f != nullptr); +} - name = n; - LocalPointer - temp(create(std::move(v), status)); - if (U_SUCCESS(status)) { - value.adoptInstead(temp.orphan()); +ResolvedFunctionOption::~ResolvedFunctionOption() { + if (value != nullptr) { + delete value; + value = nullptr; } } -ResolvedFunctionOption::~ResolvedFunctionOption() {} - const ResolvedFunctionOption* FunctionOptions::getResolvedFunctionOptions(int32_t& len) const { len = functionOptionsLen; @@ -57,7 +85,7 @@ FunctionOptions::FunctionOptions(UVector&& optionsVector, UErrorCode& status) { options = moveVectorToArray(optionsVector, status); } -const FormattedPlaceholder* +const FunctionValue* FunctionOptions::getFunctionOption(const UnicodeString& key, UErrorCode& status) const { if (options == nullptr) { @@ -73,24 +101,29 @@ FunctionOptions::getFunctionOption(const UnicodeString& key, return nullptr; } -UnicodeString FunctionOptions::getStringFunctionOption(const UnicodeString& key) const { - UErrorCode localStatus = U_ZERO_ERROR; - const FormattedPlaceholder* option = getFunctionOption(key, localStatus); - if (U_SUCCESS(localStatus)) { - const Formattable* source = option->getSource(localStatus); - // Null operand should never appear as an option value - U_ASSERT(U_SUCCESS(localStatus)); - UnicodeString val = source->getString(localStatus); - if (U_SUCCESS(localStatus)) { - return val; + +UnicodeString +FunctionOptions::getStringFunctionOption(const UnicodeString& k, UErrorCode& errorCode) const { + const FunctionValue* option = getFunctionOption(k, errorCode); + if (U_SUCCESS(errorCode)) { + UnicodeString result = option->formatToString(errorCode); + if (U_SUCCESS(errorCode)) { + return result; } } - // For anything else, including non-string values, return "". - // Alternately, could try to stringify the non-string option. - // (Currently, no tests require that.) return {}; } +UnicodeString FunctionOptions::getStringFunctionOption(const UnicodeString& key) const { + UErrorCode localStatus = U_ZERO_ERROR; + + UnicodeString result = getStringFunctionOption(key, localStatus); + if (U_FAILURE(localStatus)) { + return {}; + } + return result; +} + FunctionOptions& FunctionOptions::operator=(FunctionOptions&& other) noexcept { functionOptionsLen = other.functionOptionsLen; options = other.options; @@ -110,16 +143,62 @@ FunctionOptions::~FunctionOptions() { } } +static bool containsOption(const UVector& opts, const ResolvedFunctionOption& opt) { + for (int32_t i = 0; i < opts.size(); i++) { + if (static_cast(opts[i])->getName() + == opt.getName()) { + return true; + } + } + return false; +} + +// Options in `this` take precedence +// `this` can't be used after mergeOptions is called +FunctionOptions FunctionOptions::mergeOptions(FunctionOptions&& other, + UErrorCode& status) { + UVector mergedOptions(status); + mergedOptions.setDeleter(uprv_deleteUObject); + + if (U_FAILURE(status)) { + return {}; + } + + // Create a new vector consisting of the options from this `FunctionOptions` + for (int32_t i = 0; i < functionOptionsLen; i++) { + mergedOptions.adoptElement(create(std::move(options[i]), status), + status); + } + + // Add each option from `other` that doesn't appear in this `FunctionOptions` + for (int i = 0; i < other.functionOptionsLen; i++) { + // Note: this is quadratic in the length of `options` + if (!containsOption(mergedOptions, other.options[i])) { + mergedOptions.adoptElement(create(std::move(other.options[i]), + status), + status); + } + } + + delete[] options; + options = nullptr; + functionOptionsLen = 0; + + return FunctionOptions(std::move(mergedOptions), status); +} + // InternalValue // ------------- InternalValue::~InternalValue() {} InternalValue& InternalValue::operator=(InternalValue&& other) { + isFallbackValue = other.isFallbackValue; fallbackString = other.fallbackString; - functionName = other.functionName; - resolvedOptions = std::move(other.resolvedOptions); - operand = std::move(other.operand); + if (!isFallbackValue) { + U_ASSERT(other.val.isValid()); + val.adoptInstead(other.val.orphan()); + } return *this; } @@ -127,57 +206,30 @@ InternalValue::InternalValue(InternalValue&& other) { *this = std::move(other); } -InternalValue::InternalValue(const FunctionName& name, - FunctionOptions&& options, - FormattedPlaceholder&& rand) - : fallbackString(""), functionName(name), - resolvedOptions(std::move(options)), operand(std::move(rand)) {} - -FormattedPlaceholder InternalValue::takeValue(UErrorCode& status) { - if (U_FAILURE(status)) { - return {}; - } - if (!functionName.isEmpty() || !fallbackString.isEmpty()) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return {}; - } - return std::move(operand); -} -// Only works if not fully evaluated -FormattedPlaceholder InternalValue::takeOperand(UErrorCode& status) { - if (U_FAILURE(status)) { - return {}; - } - if (functionName.isEmpty()) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return {}; - } - return std::move(operand); +InternalValue::InternalValue(FunctionValue* v, const UnicodeString& fb) + : fallbackString(fb), val(v) { + U_ASSERT(v != nullptr); } -// Only works if not fully evaluated -FunctionOptions InternalValue::takeOptions(UErrorCode& status) { + +FunctionValue* InternalValue::takeValue(UErrorCode& status) { if (U_FAILURE(status)) { return {}; } - if (!isSuspension()) { + if (isFallback() || isNullOperand()) { status = U_ILLEGAL_ARGUMENT_ERROR; return {}; } - return std::move(resolvedOptions); + U_ASSERT(val.isValid()); + return val.orphan(); } -// Only works if not fully evaluated -FunctionName InternalValue::getFunctionName(UErrorCode& status) const { - if (U_FAILURE(status)) { - return {}; - } - if (functionName.isEmpty()) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return {}; + +bool InternalValue::isSelectable() const { + if (isFallbackValue) { + return false; } - return functionName; + return val->isSelectable(); } - // PrioritizedVariant // ------------------ diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index f4387fbde419..6ad0b264fe8d 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -52,37 +52,45 @@ namespace message2 { // to be until the body of the message is processed. class InternalValue : public UObject { public: - bool isFallback() const { return !fallbackString.isEmpty(); } - bool isSuspension() const { return !functionName.isEmpty(); } - InternalValue() : fallbackString("") {} + bool isFallback() const { return isFallbackValue; } + bool isNullOperand() const { return isNull; } + bool isSelectable() const; + // Null operand constructor + InternalValue() : isFallbackValue(false), isNull(true), fallbackString("") {} // Fallback constructor - explicit InternalValue(UnicodeString fb) : fallbackString(fb) {} + explicit InternalValue(const UnicodeString& fb) + : isFallbackValue(true), fallbackString(fb) {} // Fully-evaluated value constructor - explicit InternalValue(FormattedPlaceholder&& f) - : fallbackString(""), functionName(""), operand(std::move(f)) {} - // Suspension constructor - InternalValue(const FunctionName& name, - FunctionOptions&& options, - FormattedPlaceholder&& rand); - // Error code is set if this isn't fully evaluated - FormattedPlaceholder takeValue(UErrorCode& status); - // Error code is set if this is not a suspension - FormattedPlaceholder takeOperand(UErrorCode& status); - // Error code is set if this is not a suspension - FunctionOptions takeOptions(UErrorCode& status); - // Error code is set if this is not a suspension - FunctionName getFunctionName(UErrorCode& status) const; + explicit InternalValue(FunctionValue* v, const UnicodeString& fb); + // Error code is set if this is a fallback or null + FunctionValue* takeValue(UErrorCode& status); UnicodeString asFallback() const { return fallbackString; } virtual ~InternalValue(); InternalValue& operator=(InternalValue&&); InternalValue(InternalValue&&); private: - UnicodeString fallbackString; // Non-empty if fallback - FunctionName functionName; // Non-empty if this is a suspension - FunctionOptions resolvedOptions; // Ignored unless this is a suspension - FormattedPlaceholder operand; + bool isFallbackValue = false; + bool isNull = false; + UnicodeString fallbackString; + LocalPointer val; }; // class InternalValue +// Used for arguments and literals + class BaseValue : public FunctionValue { + public: + static BaseValue* create(const Locale&, const Formattable&, UErrorCode&); + // Apply default formatters to the argument value + UnicodeString formatToString(UErrorCode&) const override; + UBool isSelectable() const override { return true; } + BaseValue() {} + BaseValue(BaseValue&&); + BaseValue& operator=(BaseValue&&) noexcept; + private: + Locale locale; + + BaseValue(const Locale&, const Formattable&); + }; // class BaseValue + // PrioritizedVariant // For how this class is used, see the references to (integer, variant) tuples diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index 694a480592ef..ee889d41a3cb 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -276,6 +276,7 @@ namespace message2 { df->format(date, result, 0, errorCode); } +#if false // Called when output is required and the contents are an unevaluated `Formattable`; // formats the source `Formattable` to a string with defaults, if it can be // formatted with a default formatter @@ -286,50 +287,59 @@ namespace message2 { const Formattable* toFormat = input.getSource(status); U_ASSERT(U_SUCCESS(status)); // Shouldn't get called on a null argument + + return formattableToString(locale, *toFormat, status); + } +#endif + + UnicodeString formattableToString(const Locale& locale, + const Formattable& toFormat, + UErrorCode& status) { + EMPTY_ON_ERROR(status); + // Try as decimal number first - if (toFormat->isNumeric()) { + if (toFormat.isNumeric()) { // Note: the ICU Formattable has to be created here since the StringPiece // refers to state inside the Formattable; so otherwise we'll have a reference // to a temporary object - icu::Formattable icuFormattable = toFormat->asICUFormattable(status); + icu::Formattable icuFormattable = toFormat.asICUFormattable(status); StringPiece asDecimal = icuFormattable.getDecimalNumber(status); if (U_FAILURE(status)) { return {}; } if (asDecimal != nullptr) { - return FormattedPlaceholder(input, - FormattedValue(formatNumberWithDefaults(locale, asDecimal, status))); + return formatNumberWithDefaults(locale, asDecimal, status).toString(status); } } - UFormattableType type = toFormat->getType(); + UFormattableType type = toFormat.getType(); switch (type) { case UFMT_DATE: { UnicodeString result; - UDate d = toFormat->getDate(status); + UDate d = toFormat.getDate(status); U_ASSERT(U_SUCCESS(status)); formatDateWithDefaults(locale, d, result, status); - return FormattedPlaceholder(input, FormattedValue(std::move(result))); + return result; } case UFMT_DOUBLE: { - double d = toFormat->getDouble(status); + double d = toFormat.getDouble(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, d, status))); + return formatNumberWithDefaults(locale, d, status).toString(status); } case UFMT_LONG: { - int32_t l = toFormat->getLong(status); + int32_t l = toFormat.getLong(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, l, status))); + return formatNumberWithDefaults(locale, l, status).toString(status); } case UFMT_INT64: { - int64_t i = toFormat->getInt64Value(status); + int64_t i = toFormat.getInt64Value(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, i, status))); + return formatNumberWithDefaults(locale, i, status).toString(status); } case UFMT_STRING: { - const UnicodeString& s = toFormat->getString(status); + const UnicodeString& s = toFormat.getString(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(UnicodeString(s))); + return s; } default: { // No default formatters for other types; use fallback @@ -342,6 +352,7 @@ namespace message2 { } } +#if false // Called when string output is required; forces output to be produced // if none is present (including formatting number output as a string) UnicodeString FormattedPlaceholder::formatToString(const Locale& locale, @@ -371,6 +382,7 @@ namespace message2 { } return evaluated.formatToString(locale, status); } +#endif } // namespace message2 diff --git a/icu4c/source/i18n/messageformat2_formatter.cpp b/icu4c/source/i18n/messageformat2_formatter.cpp index 8d17ae49b99a..819ff4a6a353 100644 --- a/icu4c/source/i18n/messageformat2_formatter.cpp +++ b/icu4c/source/i18n/messageformat2_formatter.cpp @@ -122,19 +122,18 @@ namespace message2 { // Set up the standard function registry MFFunctionRegistry::Builder standardFunctionsBuilder(success); - FormatterFactory* dateTime = StandardFunctions::DateTimeFactory::dateTime(success); - FormatterFactory* date = StandardFunctions::DateTimeFactory::date(success); - FormatterFactory* time = StandardFunctions::DateTimeFactory::time(success); - FormatterFactory* number = new StandardFunctions::NumberFactory(); - FormatterFactory* integer = new StandardFunctions::IntegerFactory(); - standardFunctionsBuilder.adoptFormatter(FunctionName(UnicodeString("datetime")), dateTime, success) - .adoptFormatter(FunctionName(UnicodeString("date")), date, success) - .adoptFormatter(FunctionName(UnicodeString("time")), time, success) - .adoptFormatter(FunctionName(UnicodeString("number")), number, success) - .adoptFormatter(FunctionName(UnicodeString("integer")), integer, success) - .adoptSelector(FunctionName(UnicodeString("number")), new StandardFunctions::PluralFactory(UPLURAL_TYPE_CARDINAL), success) - .adoptSelector(FunctionName(UnicodeString("integer")), new StandardFunctions::PluralFactory(StandardFunctions::PluralFactory::integer()), success) - .adoptSelector(FunctionName(UnicodeString("string")), new StandardFunctions::TextFactory(), success); + Function* dateTime = StandardFunctions::DateTime::dateTime(locale, success); + Function* date = StandardFunctions::DateTime::date(locale, success); + Function* time = StandardFunctions::DateTime::time(locale, success); + standardFunctionsBuilder.adoptFunction(FunctionName(UnicodeString("datetime")), dateTime, success) + .adoptFunction(FunctionName(UnicodeString("date")), date, success) + .adoptFunction(FunctionName(UnicodeString("time")), time, success) + .adoptFunction(FunctionName(UnicodeString("number")), + StandardFunctions::Number::number(locale, success), success) + .adoptFunction(FunctionName(UnicodeString("integer")), + StandardFunctions::Number::integer(locale, success), success) + .adoptFunction(FunctionName(UnicodeString("string")), + StandardFunctions::String::string(locale, success), success); CHECK_ERROR(success); standardMFFunctionRegistry = standardFunctionsBuilder.build(); CHECK_ERROR(success); @@ -214,118 +213,25 @@ namespace message2 { cleanup(); } - // Selector and formatter lookup - // ----------------------------- - - // Postcondition: selector != nullptr || U_FAILURE(status) - Selector* MessageFormatter::getSelector(MessageContext& context, const FunctionName& functionName, UErrorCode& status) const { - NULL_ON_ERROR(status); - U_ASSERT(isSelector(functionName)); - - const SelectorFactory* selectorFactory = lookupSelectorFactory(context, functionName, status); - NULL_ON_ERROR(status); - if (selectorFactory == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - // Create a specific instance of the selector - auto result = selectorFactory->createSelector(getLocale(), status); - NULL_ON_ERROR(status); - return result; - } - - // Returns an owned pointer - Formatter* MessageFormatter::getFormatter(const FunctionName& functionName, UErrorCode& status) const { - NULL_ON_ERROR(status); - - // Create the formatter - - // First, look up the formatter factory for this function - FormatterFactory* formatterFactory = lookupFormatterFactory(functionName, status); - NULL_ON_ERROR(status); - - U_ASSERT(formatterFactory != nullptr); - - // Create a specific instance of the formatter - Formatter* formatter = formatterFactory->createFormatter(locale, status); - NULL_ON_ERROR(status); - if (formatter == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - return formatter; - } - - bool MessageFormatter::getDefaultFormatterNameByType(const UnicodeString& type, FunctionName& name) const { - U_ASSERT(hasCustomMFFunctionRegistry()); - const MFFunctionRegistry& reg = getCustomMFFunctionRegistry(); - return reg.getDefaultFormatterNameByType(type, name); - } - // --------------------------------------------------- // Function registry - bool MessageFormatter::isBuiltInSelector(const FunctionName& functionName) const { - return standardMFFunctionRegistry.hasSelector(functionName); + bool MessageFormatter::isBuiltInFunction(const FunctionName& functionName) const { + return standardMFFunctionRegistry.hasFunction(functionName); } - bool MessageFormatter::isBuiltInFormatter(const FunctionName& functionName) const { - return standardMFFunctionRegistry.hasFormatter(functionName); - } - - // https://github.com/unicode-org/message-format-wg/issues/409 - // Unknown function = unknown function error - // Formatter used as selector = selector error - // Selector used as formatter = formatting error - const SelectorFactory* MessageFormatter::lookupSelectorFactory(MessageContext& context, const FunctionName& functionName, UErrorCode& status) const { - DynamicErrors& err = context.getErrors(); - - if (isBuiltInSelector(functionName)) { - return standardMFFunctionRegistry.getSelector(functionName); - } - if (isBuiltInFormatter(functionName)) { - err.setSelectorError(functionName, status); - return nullptr; - } - if (hasCustomMFFunctionRegistry()) { - const MFFunctionRegistry& customMFFunctionRegistry = getCustomMFFunctionRegistry(); - const SelectorFactory* selectorFactory = customMFFunctionRegistry.getSelector(functionName); - if (selectorFactory != nullptr) { - return selectorFactory; - } - if (customMFFunctionRegistry.getFormatter(functionName) != nullptr) { - err.setSelectorError(functionName, status); - return nullptr; - } - } - // Either there is no custom function registry and the function - // isn't built-in, or the function doesn't exist in either the built-in - // or custom registry. - // Unknown function error - err.setUnknownFunction(functionName, status); - return nullptr; - } - - FormatterFactory* MessageFormatter::lookupFormatterFactory(const FunctionName& functionName, - UErrorCode& status) const { + Function* MessageFormatter::lookupFunction(const FunctionName& functionName, + UErrorCode& status) const { NULL_ON_ERROR(status); - if (isBuiltInFormatter(functionName)) { - return standardMFFunctionRegistry.getFormatter(functionName); - } - if (isBuiltInSelector(functionName)) { - status = U_MF_FORMATTING_ERROR; - return nullptr; + if (isBuiltInFunction(functionName)) { + return standardMFFunctionRegistry.getFunction(functionName); } if (hasCustomMFFunctionRegistry()) { const MFFunctionRegistry& customMFFunctionRegistry = getCustomMFFunctionRegistry(); - FormatterFactory* formatterFactory = customMFFunctionRegistry.getFormatter(functionName); - if (formatterFactory != nullptr) { - return formatterFactory; - } - if (customMFFunctionRegistry.getSelector(functionName) != nullptr) { - status = U_MF_FORMATTING_ERROR; - return nullptr; + Function* function = customMFFunctionRegistry.getFunction(functionName); + if (function != nullptr) { + return function; } } // Either there is no custom function registry and the function @@ -336,13 +242,8 @@ namespace message2 { return nullptr; } - bool MessageFormatter::isCustomFormatter(const FunctionName& fn) const { - return hasCustomMFFunctionRegistry() && getCustomMFFunctionRegistry().getFormatter(fn) != nullptr; - } - - - bool MessageFormatter::isCustomSelector(const FunctionName& fn) const { - return hasCustomMFFunctionRegistry() && getCustomMFFunctionRegistry().getSelector(fn) != nullptr; + bool MessageFormatter::isCustomFunction(const FunctionName& fn) const { + return hasCustomMFFunctionRegistry() && getCustomMFFunctionRegistry().getFunction(fn) != nullptr; } } // namespace message2 diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 2efc54febdc3..a8c4fd8064d8 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -38,41 +38,22 @@ namespace message2 { // Function registry implementation -Formatter::~Formatter() {} -Selector::~Selector() {} -FormatterFactory::~FormatterFactory() {} -SelectorFactory::~SelectorFactory() {} +Function::~Function() {} +FunctionValue::~FunctionValue() {} MFFunctionRegistry MFFunctionRegistry::Builder::build() { - U_ASSERT(formatters != nullptr && selectors != nullptr && formattersByType != nullptr); - MFFunctionRegistry result = MFFunctionRegistry(formatters, selectors, formattersByType); - formatters = nullptr; - selectors = nullptr; - formattersByType = nullptr; + U_ASSERT(functions != nullptr); + MFFunctionRegistry result = MFFunctionRegistry(functions); + functions = nullptr; return result; } -MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::adoptSelector(const FunctionName& selectorName, SelectorFactory* selectorFactory, UErrorCode& errorCode) { +MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::adoptFunction(const FunctionName& functionName, + Function* function, + UErrorCode& errorCode) { if (U_SUCCESS(errorCode)) { - U_ASSERT(selectors != nullptr); - selectors->put(selectorName, selectorFactory, errorCode); - } - return *this; -} - -MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::adoptFormatter(const FunctionName& formatterName, FormatterFactory* formatterFactory, UErrorCode& errorCode) { - if (U_SUCCESS(errorCode)) { - U_ASSERT(formatters != nullptr); - formatters->put(formatterName, formatterFactory, errorCode); - } - return *this; -} - -MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::setDefaultFormatterNameByType(const UnicodeString& type, const FunctionName& functionName, UErrorCode& errorCode) { - if (U_SUCCESS(errorCode)) { - U_ASSERT(formattersByType != nullptr); - FunctionName* f = create(FunctionName(functionName), errorCode); - formattersByType->put(type, f, errorCode); + U_ASSERT(functions != nullptr); + functions->put(functionName, function, errorCode); } return *this; } @@ -80,90 +61,50 @@ MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::setDefaultFormatterNam MFFunctionRegistry::Builder::Builder(UErrorCode& errorCode) { CHECK_ERROR(errorCode); - formatters = new Hashtable(); - selectors = new Hashtable(); - formattersByType = new Hashtable(); - if (!(formatters != nullptr && selectors != nullptr && formattersByType != nullptr)) { + functions = new Hashtable(); + if (functions == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; } - formatters->setValueDeleter(uprv_deleteUObject); - selectors->setValueDeleter(uprv_deleteUObject); - formattersByType->setValueDeleter(uprv_deleteUObject); + functions->setValueDeleter(uprv_deleteUObject); } MFFunctionRegistry::Builder::~Builder() { - if (formatters != nullptr) { - delete formatters; - } - if (selectors != nullptr) { - delete selectors; - } - if (formattersByType != nullptr) { - delete formattersByType; + if (functions != nullptr) { + delete functions; + functions = nullptr; } } // Returns non-owned pointer. Returns pointer rather than reference because it can fail. -// Returns non-const because FormatterFactory is mutable. -// TODO: This is unsafe because of the cached-formatters map -// (the caller could delete the resulting pointer) -FormatterFactory* MFFunctionRegistry::getFormatter(const FunctionName& formatterName) const { - U_ASSERT(formatters != nullptr); - return static_cast(formatters->get(formatterName)); -} - -UBool MFFunctionRegistry::getDefaultFormatterNameByType(const UnicodeString& type, FunctionName& name) const { - U_ASSERT(formatters != nullptr); - const FunctionName* f = static_cast(formattersByType->get(type)); - if (f != nullptr) { - name = *f; - return true; - } - return false; -} - -const SelectorFactory* MFFunctionRegistry::getSelector(const FunctionName& selectorName) const { - U_ASSERT(selectors != nullptr); - return static_cast(selectors->get(selectorName)); +// Returns non-const because Function is mutable. +Function* MFFunctionRegistry::getFunction(const FunctionName& functionName) const { + U_ASSERT(functions != nullptr); + return static_cast(functions->get(functionName)); } -bool MFFunctionRegistry::hasFormatter(const FunctionName& f) const { - return getFormatter(f) != nullptr; +bool MFFunctionRegistry::hasFunction(const FunctionName& f) const { + return getFunction(f) != nullptr; } -bool MFFunctionRegistry::hasSelector(const FunctionName& s) const { - return getSelector(s) != nullptr; -} - -void MFFunctionRegistry::checkFormatter(const char* s) const { +void MFFunctionRegistry::checkFunction(const char* s) const { #if U_DEBUG - U_ASSERT(hasFormatter(FunctionName(UnicodeString(s)))); + U_ASSERT(hasFunction(FunctionName(UnicodeString(s)))); #else (void) s; #endif } -void MFFunctionRegistry::checkSelector(const char* s) const { -#if U_DEBUG - U_ASSERT(hasSelector(FunctionName(UnicodeString(s)))); -#else - (void) s; -#endif -} - // Debugging void MFFunctionRegistry::checkStandard() const { - checkFormatter("datetime"); - checkFormatter("date"); - checkFormatter("time"); - checkFormatter("number"); - checkFormatter("integer"); - checkSelector("number"); - checkSelector("integer"); - checkSelector("string"); + checkFunction("datetime"); + checkFunction("date"); + checkFunction("time"); + checkFunction("number"); + checkFunction("integer"); + checkFunction("string"); } -// Formatter/selector helpers +// Function/selector helpers // Converts `s` to a double, indicating failure via `errorCode` static void strToDouble(const UnicodeString& s, double& result, UErrorCode& errorCode) { @@ -215,33 +156,24 @@ static int64_t getInt64Value(const Locale& locale, const Formattable& value, UEr return 0; } -// Adopts its arguments -MFFunctionRegistry::MFFunctionRegistry(FormatterMap* f, SelectorMap* s, Hashtable* byType) : formatters(f), selectors(s), formattersByType(byType) { - U_ASSERT(f != nullptr && s != nullptr && byType != nullptr); +// Adopts its argument +MFFunctionRegistry::MFFunctionRegistry(FunctionMap* f) : functions(f) { + U_ASSERT(f != nullptr); } MFFunctionRegistry& MFFunctionRegistry::operator=(MFFunctionRegistry&& other) noexcept { cleanup(); - formatters = other.formatters; - selectors = other.selectors; - formattersByType = other.formattersByType; - other.formatters = nullptr; - other.selectors = nullptr; - other.formattersByType = nullptr; + functions = other.functions; + other.functions = nullptr; return *this; } void MFFunctionRegistry::cleanup() noexcept { - if (formatters != nullptr) { - delete formatters; - } - if (selectors != nullptr) { - delete selectors; - } - if (formattersByType != nullptr) { - delete formattersByType; + if (functions != nullptr) { + delete functions; + functions = nullptr; } } @@ -250,26 +182,43 @@ MFFunctionRegistry::~MFFunctionRegistry() { cleanup(); } -/* static */ UnicodeString -StandardFunctions::getStringOption(const FunctionOptions& opts, - const UnicodeString& key, - UErrorCode& status) { - const FormattedPlaceholder* optionVal = opts.getFunctionOption(key, status); - EMPTY_ON_ERROR(status); +// Specific function implementations - const Formattable* optionSrc = optionVal->getSource(status); - // Null operand should never appear as an option value - U_ASSERT(U_SUCCESS(status)); +// --------- Number - const UnicodeString& result = optionSrc->getString(status); - EMPTY_ON_ERROR(status); +/* static */ StandardFunctions::Number* +StandardFunctions::Number::integer(const Locale& loc, UErrorCode& success) { + return create(loc, true, success); +} - return result; +/* static */ StandardFunctions::Number* +StandardFunctions::Number::number(const Locale& loc, UErrorCode& success) { + return create(loc, false, success); } -// Specific formatter implementations +/* static */ StandardFunctions::Number* +StandardFunctions::Number::create(const Locale& loc, bool isInteger, UErrorCode& success) { + NULL_ON_ERROR(success); -// --------- Number + LocalPointer result(new Number(loc, isInteger)); + if (!result.isValid()) { + success = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result.orphan(); +} + +FunctionValue* StandardFunctions::Number::call(FunctionValue* operand, + FunctionOptions&& options, + UErrorCode& errorCode) { + LocalPointer + val(new NumberValue(*this, locale, operand, std::move(options), errorCode)); + if (val.isValid()) { + return val.orphan(); + } + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; +} /* static */ number::LocalizedNumberFormatter StandardFunctions::formatterForOptions(const Number& number, const FunctionOptions& opts, @@ -417,32 +366,6 @@ StandardFunctions::getStringOption(const FunctionOptions& opts, return nf.locale(number.locale); } -Formatter* StandardFunctions::NumberFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - Formatter* result = new Number(locale); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - -Formatter* StandardFunctions::IntegerFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - Formatter* result = new Number(Number::integer(locale)); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - -StandardFunctions::IntegerFactory::~IntegerFactory() {} - -static FormattedPlaceholder notANumber(const FormattedPlaceholder& input) { - return FormattedPlaceholder(input, FormattedValue(UnicodeString("NaN"))); -} - static double parseNumberLiteral(const UnicodeString& inputStr, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return {}; @@ -474,10 +397,9 @@ static double parseNumberLiteral(const UnicodeString& inputStr, UErrorCode& erro return result; } -static FormattedPlaceholder tryParsingNumberLiteral(const number::LocalizedNumberFormatter& nf, - FormattedPlaceholder&& arg, - const UnicodeString& input, - UErrorCode& errorCode) { +static number::FormattedNumber tryParsingNumberLiteral(const number::LocalizedNumberFormatter& nf, + const UnicodeString& input, + UErrorCode& errorCode) { double numberValue = parseNumberLiteral(input, errorCode); if (U_FAILURE(errorCode)) { return {}; @@ -489,18 +411,26 @@ static FormattedPlaceholder tryParsingNumberLiteral(const number::LocalizedNumbe if (errorCode == U_USING_DEFAULT_WARNING) { errorCode = savedStatus; } - return arg.withResult(FormattedValue(std::move(result))); + return result; } int32_t StandardFunctions::Number::digitSizeOption(const FunctionOptions& opts, const UnicodeString& k) const { UErrorCode localStatus = U_ZERO_ERROR; - const FormattedPlaceholder* opt = opts.getFunctionOption(k, - localStatus); + const FunctionValue* opt = opts.getFunctionOption(k, + localStatus); if (U_SUCCESS(localStatus)) { - const Formattable* src = opt->getSource(localStatus); - U_ASSERT(U_SUCCESS(localStatus)); // null shouldn't appear as an option value - int64_t val = getInt64Value(locale, *src, localStatus); + // First try the formatted value + UnicodeString formatted = opt->formatToString(localStatus); + int64_t val = 0; + if (U_SUCCESS(localStatus)) { + val = getInt64Value(locale, Formattable(formatted), localStatus); + } + if (U_FAILURE(localStatus)) { + localStatus = U_ZERO_ERROR; + } + // Next try the operand + val = getInt64Value(locale, opt->getOperand(), localStatus); if (U_SUCCESS(localStatus)) { return static_cast(val); } @@ -551,70 +481,73 @@ bool StandardFunctions::Number::usePercent(const FunctionOptions& opts) const { return (style == UnicodeString("percent")); } -/* static */ StandardFunctions::Number StandardFunctions::Number::integer(const Locale& loc) { - return StandardFunctions::Number(loc, true); -} - -FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& arg, FunctionOptions&& opts, UErrorCode& errorCode) const { - if (U_FAILURE(errorCode)) { - return {}; +StandardFunctions::NumberValue::NumberValue(const Number& parent, + const Locale& loc, + FunctionValue* arg, + FunctionOptions&& options, + UErrorCode& errorCode) : locale(loc) { + CHECK_ERROR(errorCode); + // Must have an argument + if (arg == nullptr) { + errorCode = U_MF_OPERAND_MISMATCH_ERROR; + return; } + opts = options.mergeOptions(arg->getResolvedOptions(), errorCode); + operand = arg->getOperand(); + number::LocalizedNumberFormatter realFormatter; - realFormatter = formatterForOptions(*this, opts, errorCode); + realFormatter = formatterForOptions(parent, opts, errorCode); - number::FormattedNumber numberResult; if (U_SUCCESS(errorCode)) { - const Formattable* toFormat = arg.getSource(errorCode); - if (U_FAILURE(errorCode)) { - // number must take an argument - errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return {}; - } - switch (toFormat->getType()) { + switch (operand.getType()) { case UFMT_DOUBLE: { - double d = toFormat->getDouble(errorCode); + double d = operand.getDouble(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - numberResult = realFormatter.formatDouble(d, errorCode); + formattedNumber = realFormatter.formatDouble(d, errorCode); break; } case UFMT_LONG: { - int32_t l = toFormat->getLong(errorCode); + int32_t l = operand.getLong(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - numberResult = realFormatter.formatInt(l, errorCode); + formattedNumber = realFormatter.formatInt(l, errorCode); break; } case UFMT_INT64: { - int64_t i = toFormat->getInt64(errorCode); + int64_t i = operand.getInt64(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - numberResult = realFormatter.formatInt(i, errorCode); + formattedNumber = realFormatter.formatInt(i, errorCode); break; } case UFMT_STRING: { // Try to parse the string as a number - return tryParsingNumberLiteral(realFormatter, - std::move(arg), - toFormat->getString(errorCode), - errorCode); + formattedNumber = tryParsingNumberLiteral(realFormatter, + operand.getString(errorCode), + errorCode); + break; } default: { // Other types can't be parsed as a number errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return notANumber(arg); + break; } } } - - return FormattedPlaceholder(arg, FormattedValue(std::move(numberResult))); } -StandardFunctions::Number::~Number() {} -StandardFunctions::NumberFactory::~NumberFactory() {} +UnicodeString StandardFunctions::NumberValue::formatToString(UErrorCode& errorCode) const { + if (U_FAILURE(errorCode)) { + return {}; + } -// --------- PluralFactory + return formattedNumber.toString(errorCode); +} +StandardFunctions::Number::~Number() {} +StandardFunctions::NumberValue::~NumberValue() {} -StandardFunctions::Plural::PluralType StandardFunctions::Plural::pluralType(const FunctionOptions& opts) const { +/* static */ StandardFunctions::Number::PluralType +StandardFunctions::Number::pluralType(const FunctionOptions& opts) { const UnicodeString& select = opts.getStringFunctionOption(UnicodeString("select")); if (select.length() > 0) { @@ -628,44 +561,19 @@ StandardFunctions::Plural::PluralType StandardFunctions::Plural::pluralType(cons return PluralType::PLURAL_CARDINAL; } -Selector* StandardFunctions::PluralFactory::createSelector(const Locale& locale, UErrorCode& errorCode) const { - NULL_ON_ERROR(errorCode); - - Selector* result; - if (isInteger) { - result = new Plural(Plural::integer(locale, errorCode)); - } else { - result = new Plural(locale, errorCode); - } - NULL_ON_ERROR(errorCode); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - -void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, - FunctionOptions&& opts, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& errorCode) const { +void StandardFunctions::NumberValue::selectKeys(const UnicodeString* keys, + int32_t keysLen, + UnicodeString* prefs, + int32_t& prefsLen, + UErrorCode& errorCode) { CHECK_ERROR(errorCode); - // Handle any formatting options - PluralType type = pluralType(opts); - FormattedPlaceholder resolvedSelector = numberFormatter->format(std::move(toFormat), - std::move(opts), - errorCode); - CHECK_ERROR(errorCode); + Number::PluralType type = Number::pluralType(opts); - U_ASSERT(resolvedSelector.isFunctionResult() - && resolvedSelector.output().isNumber()); + // (resolvedSelector is `this`) // See https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#number-selection // 1. Let exact be the JSON string representation of the numeric value of resolvedSelector - const number::FormattedNumber& formattedNumber = resolvedSelector.output().getNumber(); UnicodeString exact = formattedNumber.toString(errorCode); if (U_FAILURE(errorCode)) { @@ -677,8 +585,8 @@ void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, // Step 2. Let keyword be a string which is the result of rule selection on resolvedSelector. // If the option select is set to exact, rule-based selection is not used. Return the empty string. UnicodeString keyword; - if (type != PluralType::PLURAL_EXACT) { - UPluralType t = type == PluralType::PLURAL_ORDINAL ? UPLURAL_TYPE_ORDINAL : UPLURAL_TYPE_CARDINAL; + if (type != Number::PluralType::PLURAL_EXACT) { + UPluralType t = type == Number::PluralType::PLURAL_ORDINAL ? UPLURAL_TYPE_ORDINAL : UPLURAL_TYPE_CARDINAL; // Look up plural rules by locale and type LocalPointer rules(PluralRules::forLocale(locale, t, errorCode)); CHECK_ERROR(errorCode); @@ -713,7 +621,7 @@ void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, } // Return immediately if exact matching was requested - if (prefsLen == keysLen || type == PluralType::PLURAL_EXACT) { + if (prefsLen == keysLen || type == Number::PluralType::PLURAL_EXACT) { return; } @@ -738,35 +646,9 @@ void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, // (Implicit, since `prefs` is an out-parameter) } -StandardFunctions::Plural::Plural(const Locale& loc, UErrorCode& status) : locale(loc) { - CHECK_ERROR(status); - - numberFormatter.adoptInstead(new StandardFunctions::Number(loc)); - if (!numberFormatter.isValid()) { - status = U_MEMORY_ALLOCATION_ERROR; - } -} - -StandardFunctions::Plural::Plural(const Locale& loc, bool isInt, UErrorCode& status) : locale(loc), isInteger(isInt) { - CHECK_ERROR(status); - - if (isInteger) { - numberFormatter.adoptInstead(new StandardFunctions::Number(loc, true)); - } else { - numberFormatter.adoptInstead(new StandardFunctions::Number(loc)); - } - - if (!numberFormatter.isValid()) { - status = U_MEMORY_ALLOCATION_ERROR; - } -} - -StandardFunctions::Plural::~Plural() {} - -StandardFunctions::PluralFactory::~PluralFactory() {} - -// --------- DateTimeFactory +// --------- DateTime +/* // Date/time options only static UnicodeString defaultForOption(const UnicodeString& optionName) { if (optionName == UnicodeString("dateStyle") @@ -776,58 +658,39 @@ static UnicodeString defaultForOption(const UnicodeString& optionName) { } return {}; // Empty string is default } +*/ -// TODO -// Only DateTime currently uses the function options stored in the placeholder. -// It also doesn't use them very consistently (it looks at the previous set of options, -// and others aren't preserved). This needs to be generalized, -// but that depends on https://github.com/unicode-org/message-format-wg/issues/515 -// Finally, the option value is assumed to be a string, -// which works for datetime options but not necessarily in general. -UnicodeString StandardFunctions::DateTime::getFunctionOption(const FormattedPlaceholder& toFormat, - const FunctionOptions& opts, - const UnicodeString& optionName) const { - // Options passed to the current function invocation take priority - Formattable opt; - UnicodeString s; - UErrorCode localErrorCode = U_ZERO_ERROR; - s = getStringOption(opts, optionName, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return s; - } - // Next try the set of options used to construct `toFormat` - localErrorCode = U_ZERO_ERROR; - s = getStringOption(toFormat.getOptions(), optionName, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return s; - } - // Finally, use default - return defaultForOption(optionName); +/* static */ StandardFunctions::DateTime* +StandardFunctions::DateTime::date(const Locale& loc, UErrorCode& success) { + return DateTime::create(loc, DateTimeType::kDate, success); } -// Used for options that don't have defaults -UnicodeString StandardFunctions::DateTime::getFunctionOption(const FormattedPlaceholder& toFormat, - const FunctionOptions& opts, - const UnicodeString& optionName, - UErrorCode& errorCode) const { - if (U_SUCCESS(errorCode)) { - // Options passed to the current function invocation take priority - Formattable opt; - UnicodeString s; - UErrorCode localErrorCode = U_ZERO_ERROR; - s = getStringOption(opts, optionName, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return s; - } - // Next try the set of options used to construct `toFormat` - localErrorCode = U_ZERO_ERROR; - s = getStringOption(toFormat.getOptions(), optionName, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return s; - } - errorCode = U_ILLEGAL_ARGUMENT_ERROR; +/* static */ StandardFunctions::DateTime* +StandardFunctions::DateTime::time(const Locale& loc, UErrorCode& success) { + return DateTime::create(loc, DateTimeType::kTime, success); +} + +/* static */ StandardFunctions::DateTime* +StandardFunctions::DateTime::dateTime(const Locale& loc, UErrorCode& success) { + return DateTime::create(loc, DateTimeType::kDateTime, success); +} + +/* static */ StandardFunctions::DateTime* +StandardFunctions::DateTime::create(const Locale& loc, DateTimeType type, UErrorCode& success) { + NULL_ON_ERROR(success); + + LocalPointer result(new DateTime(loc, type)); + if (!result.isValid()) { + success = U_MEMORY_ALLOCATION_ERROR; + return nullptr; } - return {}; + return result.orphan(); +} + +FunctionValue* +StandardFunctions::DateTime::call(FunctionValue* val, FunctionOptions&& opts, UErrorCode& errorCode) { + auto result = new DateTimeValue(locale, type, val, std::move(opts), errorCode); + return result; } static DateFormat::EStyle stringToStyle(UnicodeString option, UErrorCode& errorCode) { @@ -853,58 +716,30 @@ static DateFormat::EStyle stringToStyle(UnicodeString option, UErrorCode& errorC return DateFormat::EStyle::kNone; } -/* static */ StandardFunctions::DateTimeFactory* StandardFunctions::DateTimeFactory::dateTime(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); +UnicodeString StandardFunctions::DateTimeValue::formatToString(UErrorCode& status) const { + (void) status; - DateTimeFactory* result = new StandardFunctions::DateTimeFactory(DateTimeType::DateTime); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; + return formattedDate; } -/* static */ StandardFunctions::DateTimeFactory* StandardFunctions::DateTimeFactory::date(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - DateTimeFactory* result = new DateTimeFactory(DateTimeType::Date); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - -/* static */ StandardFunctions::DateTimeFactory* StandardFunctions::DateTimeFactory::time(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); +StandardFunctions::DateTimeValue::DateTimeValue(const Locale& loc, + DateTime::DateTimeType type, + FunctionValue* val, + FunctionOptions&& options, + UErrorCode& errorCode) { + CHECK_ERROR(errorCode); - DateTimeFactory* result = new DateTimeFactory(DateTimeType::Time); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; + // Must have an argument + if (val == nullptr) { + errorCode = U_MF_OPERAND_MISMATCH_ERROR; + return; } - return result; -} - -Formatter* StandardFunctions::DateTimeFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - Formatter* result = new StandardFunctions::DateTime(locale, type); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} + locale = loc; + operand = val->getOperand(); + opts = options.mergeOptions(val->getResolvedOptions(), errorCode); -FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& toFormat, - FunctionOptions&& opts, - UErrorCode& errorCode) const { - if (U_FAILURE(errorCode)) { - return {}; - } - const Formattable* source = toFormat.getSource(errorCode); - // Function requires an operand - if (U_FAILURE(errorCode)) { - errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return {}; - } + const Formattable* source = &operand; LocalPointer df; Formattable opt; @@ -922,26 +757,26 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& bool hasTimeStyleOption = dateStyleOption.length() > 0; bool noOptions = opts.optionsCount() == 0; - bool useStyle = (type == DateTimeFactory::DateTimeType::DateTime + bool useStyle = (type == DateTime::DateTimeType::kDateTime && (hasDateStyleOption || hasTimeStyleOption || noOptions)) - || (type != DateTimeFactory::DateTimeType::DateTime); + || (type != DateTime::DateTimeType::kDateTime); - bool useDate = type == DateTimeFactory::DateTimeType::Date - || (type == DateTimeFactory::DateTimeType::DateTime + bool useDate = type == DateTime::DateTimeType::kDate + || (type == DateTime::DateTimeType::kDateTime && hasDateStyleOption); - bool useTime = type == DateTimeFactory::DateTimeType::Time - || (type == DateTimeFactory::DateTimeType::DateTime + bool useTime = type == DateTime::DateTimeType::kTime + || (type == DateTime::DateTimeType::kDateTime && hasTimeStyleOption); if (useStyle) { // Extract style options - if (type == DateTimeFactory::DateTimeType::DateTime) { + if (type == DateTime::DateTimeType::kDateTime) { // Note that the options-getting has to be repeated across the three cases, // since `:datetime` uses "dateStyle"/"timeStyle" and `:date` and `:time` // use "style" - dateStyle = stringToStyle(getFunctionOption(toFormat, opts, dateStyleName), errorCode); - timeStyle = stringToStyle(getFunctionOption(toFormat, opts, timeStyleName), errorCode); + dateStyle = stringToStyle(opts.getStringFunctionOption(dateStyleName), errorCode); + timeStyle = stringToStyle(opts.getStringFunctionOption(timeStyleName), errorCode); if (useDate && !useTime) { df.adoptInstead(DateFormat::createDateInstance(dateStyle, locale)); @@ -950,12 +785,12 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } else { df.adoptInstead(DateFormat::createDateTimeInstance(dateStyle, timeStyle, locale)); } - } else if (type == DateTimeFactory::DateTimeType::Date) { - dateStyle = stringToStyle(getFunctionOption(toFormat, opts, styleName), errorCode); + } else if (type == DateTime::DateTimeType::kDate) { + dateStyle = stringToStyle(opts.getStringFunctionOption(styleName), errorCode); df.adoptInstead(DateFormat::createDateInstance(dateStyle, locale)); } else { // :time - timeStyle = stringToStyle(getFunctionOption(toFormat, opts, styleName), errorCode); + timeStyle = stringToStyle(opts.getStringFunctionOption(styleName), errorCode); df.adoptInstead(DateFormat::createTimeInstance(timeStyle, locale)); } } else { @@ -966,7 +801,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& #define ADD_PATTERN(s) skeleton += UnicodeString(s) if (U_SUCCESS(errorCode)) { // Year - UnicodeString year = getFunctionOption(toFormat, opts, UnicodeString("year"), errorCode); + UnicodeString year = opts.getStringFunctionOption(UnicodeString("year"), errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -978,7 +813,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Month - UnicodeString month = getFunctionOption(toFormat, opts, UnicodeString("month"), errorCode); + UnicodeString month = opts.getStringFunctionOption(UnicodeString("month"), errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -997,7 +832,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Weekday - UnicodeString weekday = getFunctionOption(toFormat, opts, UnicodeString("weekday"), errorCode); + UnicodeString weekday = opts.getStringFunctionOption(UnicodeString("weekday"), errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1011,7 +846,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Day - UnicodeString day = getFunctionOption(toFormat, opts, UnicodeString("day"), errorCode); + UnicodeString day = opts.getStringFunctionOption(UnicodeString("day"), errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1023,7 +858,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Hour - UnicodeString hour = getFunctionOption(toFormat, opts, UnicodeString("hour"), errorCode); + UnicodeString hour = opts.getStringFunctionOption(UnicodeString("hour"), errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1035,7 +870,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Minute - UnicodeString minute = getFunctionOption(toFormat, opts, UnicodeString("minute"), errorCode); + UnicodeString minute = opts.getStringFunctionOption(UnicodeString("minute"), errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1047,7 +882,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Second - UnicodeString second = getFunctionOption(toFormat, opts, UnicodeString("second"), errorCode); + UnicodeString second = opts.getStringFunctionOption(UnicodeString("second"), errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1070,11 +905,11 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } if (U_FAILURE(errorCode)) { - return {}; + return; } if (!df.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; - return {}; + return; } UnicodeString result; @@ -1107,8 +942,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& // Use the parsed date as the source value // in the returned FormattedPlaceholder; this is necessary // so the date can be re-formatted - toFormat = FormattedPlaceholder(message2::Formattable::forDate(d), - toFormat.getFallback()); + operand = message2::Formattable::forDate(d); df->format(d, result, 0, errorCode); } break; @@ -1129,49 +963,73 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } if (U_FAILURE(errorCode)) { - return {}; + return; + } + // Ignore U_USING_DEFAULT_WARNING + if (errorCode == U_USING_DEFAULT_WARNING) { + errorCode = U_ZERO_ERROR; } - return FormattedPlaceholder(toFormat, std::move(opts), FormattedValue(std::move(result))); + formattedDate = result; } -StandardFunctions::DateTimeFactory::~DateTimeFactory() {} StandardFunctions::DateTime::~DateTime() {} +StandardFunctions::DateTimeValue::~DateTimeValue() {} -// --------- TextFactory +// --------- String -Selector* StandardFunctions::TextFactory::createSelector(const Locale& locale, UErrorCode& errorCode) const { - Selector* result = new TextSelector(locale); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; +/* static */ StandardFunctions::String* +StandardFunctions::String::string(const Locale& loc, UErrorCode& success) { + NULL_ON_ERROR(success); + + LocalPointer result(new String(loc)); + if (!result.isValid()) { + success = U_MEMORY_ALLOCATION_ERROR; return nullptr; } - return result; + return result.orphan(); } -void StandardFunctions::TextSelector::selectKey(FormattedPlaceholder&& toFormat, - FunctionOptions&& opts, - const UnicodeString* keys, +extern UnicodeString formattableToString(const Locale&, const Formattable&, UErrorCode&); + +FunctionValue* +StandardFunctions::String::call(FunctionValue* val, FunctionOptions&& opts, UErrorCode& errorCode) { + return new StringValue(locale, val, std::move(opts), errorCode); +} + +UnicodeString StandardFunctions::StringValue::formatToString(UErrorCode& errorCode) const { + (void) errorCode; + + return formattedString; +} + +StandardFunctions::StringValue::StringValue(const Locale& locale, + FunctionValue* val, + FunctionOptions&& options, + UErrorCode& status) { + CHECK_ERROR(status); + operand = val->getOperand(); + opts = std::move(options); // No options + // Convert to string + formattedString = formattableToString(locale, operand, status); +} + +void StandardFunctions::StringValue::selectKeys(const UnicodeString* keys, int32_t keysLen, UnicodeString* prefs, int32_t& prefsLen, - UErrorCode& errorCode) const { - // No options - (void) opts; - + UErrorCode& errorCode) { CHECK_ERROR(errorCode); // Just compares the key and value as strings prefsLen = 0; - // Convert to string - const UnicodeString& formattedValue = toFormat.formatToString(locale, errorCode); if (U_FAILURE(errorCode)) { return; } for (int32_t i = 0; i < keysLen; i++) { - if (keys[i] == formattedValue) { + if (keys[i] == formattedString) { prefs[0] = keys[i]; prefsLen = 1; break; @@ -1179,8 +1037,8 @@ void StandardFunctions::TextSelector::selectKey(FormattedPlaceholder&& toFormat, } } -StandardFunctions::TextFactory::~TextFactory() {} -StandardFunctions::TextSelector::~TextSelector() {} +StandardFunctions::String::~String() {} +StandardFunctions::StringValue::~StringValue() {} } // namespace message2 U_NAMESPACE_END diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index 8c3d29a1f06e..fec7c7ca3dce 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -31,95 +31,58 @@ namespace message2 { class StandardFunctions { friend class MessageFormatter; - static UnicodeString getStringOption(const FunctionOptions& opts, - const UnicodeString& optionName, - UErrorCode& errorCode); - class DateTime; + class DateTimeValue; - class DateTimeFactory : public FormatterFactory { + class DateTime : public Function { public: - Formatter* createFormatter(const Locale& locale, UErrorCode& status) override; - static DateTimeFactory* date(UErrorCode&); - static DateTimeFactory* time(UErrorCode&); - static DateTimeFactory* dateTime(UErrorCode&); - DateTimeFactory() = delete; - virtual ~DateTimeFactory(); + FunctionValue* call(FunctionValue* operand, + FunctionOptions&& options, + UErrorCode& errorCode) override; + static DateTime* date(const Locale&, UErrorCode&); + static DateTime* time(const Locale&, UErrorCode&); + static DateTime* dateTime(const Locale&, UErrorCode&); + virtual ~DateTime(); private: - friend class DateTime; + friend class DateTimeValue; typedef enum DateTimeType { - Date, - Time, - DateTime + kDate, + kTime, + kDateTime } DateTimeType; - - DateTimeType type; - DateTimeFactory(DateTimeType t) : type(t) {} - }; - - class DateTime : public Formatter { - public: - FormattedPlaceholder format(FormattedPlaceholder&& toFormat, FunctionOptions&& options, UErrorCode& status) const override; - virtual ~DateTime(); - - private: const Locale& locale; - const DateTimeFactory::DateTimeType type; - friend class DateTimeFactory; - DateTime(const Locale& l, DateTimeFactory::DateTimeType t) : locale(l), type(t) {} + const DateTimeType type; + static DateTime* create(const Locale&, DateTimeType, UErrorCode&); + DateTime(const Locale& l, DateTimeType t) : locale(l), type(t) {} const LocalPointer icuFormatter; - - /* - Looks up an option by name, first checking `opts`, then the cached options - in `toFormat` if applicable, and finally using a default - - Ignores any options with non-string values - */ - UnicodeString getFunctionOption(const FormattedPlaceholder& toFormat, - const FunctionOptions& opts, - const UnicodeString& optionName) const; - // Version for options that don't have defaults; sets the error - // code instead of returning a default value - UnicodeString getFunctionOption(const FormattedPlaceholder& toFormat, - const FunctionOptions& opts, - const UnicodeString& optionName, - UErrorCode& errorCode) const; - }; - // Note: IntegerFactory doesn't implement SelectorFactory; - // instead, an instance of PluralFactory is registered to the integer - // selector - // TODO - class IntegerFactory : public FormatterFactory { - public: - Formatter* createFormatter(const Locale& locale, UErrorCode& status) override; - virtual ~IntegerFactory(); - }; + class NumberValue; - class NumberFactory : public FormatterFactory { + class Number : public Function { public: - Formatter* createFormatter(const Locale& locale, UErrorCode& status) override; - virtual ~NumberFactory(); - private: - friend class IntegerFactory; - static NumberFactory integer(const Locale& locale, UErrorCode& status); - }; + static Number* integer(const Locale& loc, UErrorCode& success); + static Number* number(const Locale& loc, UErrorCode& success); - class Number : public Formatter { - public: - FormattedPlaceholder format(FormattedPlaceholder&& toFormat, FunctionOptions&& options, UErrorCode& status) const override; + FunctionValue* call(FunctionValue* operand, + FunctionOptions&& options, + UErrorCode& errorCode) override; virtual ~Number(); private: - friend class NumberFactory; + friend class NumberValue; friend class StandardFunctions; - Number(const Locale& loc) : locale(loc), icuFormatter(number::NumberFormatter::withLocale(loc)) {} + typedef enum PluralType { + PLURAL_ORDINAL, + PLURAL_CARDINAL, + PLURAL_EXACT + } PluralType; + + static Number* create(const Locale&, bool, UErrorCode&); Number(const Locale& loc, bool isInt) : locale(loc), isInteger(isInt), icuFormatter(number::NumberFormatter::withLocale(loc)) {} - static Number integer(const Locale& loc); // These options have their own accessor methods, since they have different default values. int32_t digitSizeOption(const FunctionOptions&, const UnicodeString&) const; @@ -133,83 +96,82 @@ namespace message2 { const Locale& locale; const bool isInteger = false; const number::LocalizedNumberFormatter icuFormatter; + + static PluralType pluralType(const FunctionOptions& opts); }; static number::LocalizedNumberFormatter formatterForOptions(const Number& number, const FunctionOptions& opts, UErrorCode& status); - class PluralFactory : public SelectorFactory { + + class NumberValue : public FunctionValue { public: - Selector* createSelector(const Locale& locale, UErrorCode& status) const override; - virtual ~PluralFactory(); + UnicodeString formatToString(UErrorCode&) const override; + void selectKeys(const UnicodeString* keys, + int32_t keysLen, + UnicodeString* prefs, + int32_t& prefsLen, + UErrorCode& status) override; + UBool isSelectable() const override { return true; } + NumberValue(); + virtual ~NumberValue(); + private: + friend class Number; + + Locale locale; + number::FormattedNumber formattedNumber; + NumberValue(const Number&, const Locale&, FunctionValue*, FunctionOptions&&, UErrorCode&); + }; // class NumberValue + class DateTimeValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const; + DateTimeValue(); + virtual ~DateTimeValue(); private: - friend class IntegerFactory; - friend class MessageFormatter; + friend class DateTime; - PluralFactory() {} - PluralFactory(bool isInt) : isInteger(isInt) {} - static PluralFactory integer() { return PluralFactory(true);} - const bool isInteger = false; - }; + Locale locale; + UnicodeString formattedDate; + DateTimeValue(const Locale&, DateTime::DateTimeType type, + FunctionValue*, FunctionOptions&&, UErrorCode&); + }; // class DateTimeValue - class Plural : public Selector { + class String : public Function { public: - void selectKey(FormattedPlaceholder&& val, - FunctionOptions&& options, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& status) const override; - virtual ~Plural(); + FunctionValue* call(FunctionValue* val, + FunctionOptions&& opts, + UErrorCode& errorCode) override; + static String* string(const Locale& locale, UErrorCode& status); + virtual ~String(); private: - friend class IntegerFactory; - friend class PluralFactory; + friend class StringFactory; - // Can't use UPluralType for this since we want to include - // exact matching as an option - typedef enum PluralType { - PLURAL_ORDINAL, - PLURAL_CARDINAL, - PLURAL_EXACT - } PluralType; - Plural(const Locale& loc, UErrorCode& errorCode); - Plural(const Locale& loc, bool isInt, UErrorCode& errorCode); - static Plural integer(const Locale& loc, UErrorCode& errorCode) { return Plural(loc, true, errorCode); } - PluralType pluralType(const FunctionOptions& opts) const; + // Formatting `value` to a string might require the locale const Locale& locale; - const bool isInteger = false; - LocalPointer numberFormatter; - }; - class TextFactory : public SelectorFactory { - public: - Selector* createSelector(const Locale& locale, UErrorCode& status) const override; - virtual ~TextFactory(); + String(const Locale& l) : locale(l) {} }; - class TextSelector : public Selector { + class StringValue : public FunctionValue { public: - void selectKey(FormattedPlaceholder&& val, - FunctionOptions&& options, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& status) const override; - virtual ~TextSelector(); - + UnicodeString formatToString(UErrorCode&) const override; + void selectKeys(const UnicodeString* keys, + int32_t keysLen, + UnicodeString* prefs, + int32_t& prefsLen, + UErrorCode& status) override; + UBool isSelectable() const override { return true; } + virtual ~StringValue(); private: - friend class TextFactory; + friend class String; - // Formatting `value` to a string might require the locale - const Locale& locale; + UnicodeString formattedString; + StringValue(const Locale&, FunctionValue*, FunctionOptions&&, UErrorCode&); + }; // class StringValue - TextSelector(const Locale& l) : locale(l) {} - }; }; extern void formatDateWithDefaults(const Locale& locale, UDate date, UnicodeString&, UErrorCode& errorCode); diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index a8c28f5deb1a..d4819d3ad867 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -32,6 +32,7 @@ namespace message2 { class MessageContext; class StaticErrors; class InternalValue; + class BaseValue; /** *

MessageFormatter is a Technical Preview API implementing MessageFormat 2.0. @@ -336,7 +337,6 @@ namespace message2 { // Selection methods - bool isSelectable(const InternalValue&) const; // Takes a vector of FormattedPlaceholders void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const; // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output) @@ -350,7 +350,7 @@ namespace message2 { void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const; // Formatting methods - [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const; + [[nodiscard]] InternalValue formatLiteral(const data_model::Literal&, UErrorCode&) const; void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; [[nodiscard]] InternalValue eval(MessageContext&, InternalValue, UErrorCode&) const; // Dispatches on argument type @@ -366,7 +366,7 @@ namespace message2 { [[nodiscard]] InternalValue formatExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const; [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const; [[nodiscard]] InternalValue formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const; - [[nodiscard]] FormattedPlaceholder evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const; + [[nodiscard]] InternalValue evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const; void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const; // Function registry methods @@ -379,20 +379,11 @@ namespace message2 { // (a FormatterFactory can have mutable state) const MFFunctionRegistry& getCustomMFFunctionRegistry() const; - bool isCustomFormatter(const FunctionName&) const; - FormatterFactory* lookupFormatterFactory(const FunctionName&, UErrorCode& status) const; - bool isBuiltInSelector(const FunctionName&) const; - bool isBuiltInFormatter(const FunctionName&) const; - bool isCustomSelector(const FunctionName&) const; - const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const; - bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); } - bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); } + bool isCustomFunction(const FunctionName&) const; + bool isBuiltInFunction(const FunctionName&) const; + bool isFunction(const FunctionName& fn) const { return isBuiltInFunction(fn) || isCustomFunction(fn); } void setNotSelectableError(MessageContext&, const InternalValue&, UErrorCode&) const; - const Formatter* lookupFormatter(const FunctionName&, UErrorCode&) const; - - Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const; - Formatter* getFormatter(const FunctionName&, UErrorCode&) const; - bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const; + Function* lookupFunction(const FunctionName&, UErrorCode&) const; // Checking for resolution errors void checkDeclarations(MessageContext&, Environment*&, UErrorCode&) const; diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index 87dd00a2caf5..17354e493ebd 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -28,10 +28,6 @@ class UVector; namespace message2 { - class Formatter; - class MessageContext; - class Selector; - // Formattable // ---------- @@ -449,25 +445,26 @@ namespace message2 { * @deprecated This API is for technology preview only. */ #ifndef U_IN_DOXYGEN -class FormattedPlaceholder; +class FunctionValue; class U_I18N_API ResolvedFunctionOption : public UObject { private: /* const */ UnicodeString name; - // This is a pointer because FormattedPlaceholder and ResolvedFunctionOption - // are mutually recursive - /* const */ LocalPointer value; + // This is a pointer because FunctionValue is an abstract class, + // and is a raw pointer because FunctionValue is forward-declared + /* const */ FunctionValue* value; public: const UnicodeString& getName() const { return name; } - const FormattedPlaceholder* getValue() const { return value.getAlias(); } - FormattedPlaceholder* takeValue() { return value.orphan(); } - ResolvedFunctionOption(const UnicodeString& n, FormattedPlaceholder&& f, UErrorCode& status); + FunctionValue* getValue() const { return value; } + // Adopts `f` + ResolvedFunctionOption(const UnicodeString& n, FunctionValue* f); ResolvedFunctionOption() {} ResolvedFunctionOption(ResolvedFunctionOption&&); ResolvedFunctionOption& operator=(ResolvedFunctionOption&& other) noexcept { name = std::move(other.name); value = std::move(other.value); + other.value = nullptr; return *this; } ResolvedFunctionOption& operator=(const ResolvedFunctionOption& other) = delete; @@ -587,11 +584,13 @@ class U_I18N_API ResolvedFunctionOption : public UObject { * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ -using FunctionOptionsMap = std::map; +using FunctionOptionsMap = std::map; /** * Structure encapsulating named options passed to a custom selector or formatter. * + * This class is immutable and movable but not copyable. + * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ @@ -602,8 +601,6 @@ class U_I18N_API FunctionOptions : public UObject { * The syntactic order of options is not guaranteed to * be preserved. * - * This class is immutable and movable but not copyable. - * * @return A map from strings to FormattedPlaceholder objects representing * the results of resolving each option value. * @@ -613,7 +610,7 @@ class U_I18N_API FunctionOptions : public UObject { FunctionOptionsMap getOptions() const { FunctionOptionsMap result; for (int32_t i = 0; i < functionOptionsLen; i++) { - const ResolvedFunctionOption& opt = options[i]; + ResolvedFunctionOption& opt = options[i]; result[opt.getName()] = opt.getValue(); } return result; @@ -656,6 +653,8 @@ class U_I18N_API FunctionOptions : public UObject { * @deprecated This API is for technology preview only. */ FunctionOptions& operator=(const FunctionOptions&) = delete; + // TODO + FunctionOptions mergeOptions(FunctionOptions&&, UErrorCode&); private: friend class MessageFormatter; friend class StandardFunctions; @@ -663,9 +662,11 @@ class U_I18N_API FunctionOptions : public UObject { explicit FunctionOptions(UVector&&, UErrorCode&); const ResolvedFunctionOption* getResolvedFunctionOptions(int32_t& len) const; - const FormattedPlaceholder* getFunctionOption(const UnicodeString&, UErrorCode&) const; + const FunctionValue* getFunctionOption(const UnicodeString&, UErrorCode&) const; // Returns empty string if option doesn't exist UnicodeString getStringFunctionOption(const UnicodeString&) const; + // Sets error code if option doesn't exist + UnicodeString getStringFunctionOption(const UnicodeString&, UErrorCode&) const; int32_t optionsCount() const { return functionOptionsLen; } // Named options passed to functions diff --git a/icu4c/source/i18n/unicode/messageformat2_function_registry.h b/icu4c/source/i18n/unicode/messageformat2_function_registry.h index b8429e3b83aa..664f9b3c0c98 100644 --- a/icu4c/source/i18n/unicode/messageformat2_function_registry.h +++ b/icu4c/source/i18n/unicode/messageformat2_function_registry.h @@ -28,81 +28,7 @@ namespace message2 { using namespace data_model; - /** - * Interface that factory classes for creating formatters must implement. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - class U_I18N_API FormatterFactory : public UObject { - // TODO: the coding guidelines say that interface classes - // shouldn't inherit from UObject, but if I change it so these - // classes don't, and the individual formatter factory classes - // inherit from public FormatterFactory, public UObject, then - // memory leaks ensue - public: - /** - * Constructs a new formatter object. This method is not const; - * formatter factories with local state may be defined. - * - * @param locale Locale to be used by the formatter. - * @param status Input/output error code. - * @return The new Formatter, which is non-null if U_SUCCESS(status). - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual Formatter* createFormatter(const Locale& locale, UErrorCode& status) = 0; - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~FormatterFactory(); - /** - * Copy constructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormatterFactory& operator=(const FormatterFactory&) = delete; - }; // class FormatterFactory - - /** - * Interface that factory classes for creating selectors must implement. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - class U_I18N_API SelectorFactory : public UObject { - public: - /** - * Constructs a new selector object. - * - * @param locale Locale to be used by the selector. - * @param status Input/output error code. - * @return The new selector, which is non-null if U_SUCCESS(status). - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual Selector* createSelector(const Locale& locale, UErrorCode& status) const = 0; - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~SelectorFactory(); - /** - * Copy constructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - SelectorFactory& operator=(const SelectorFactory&) = delete; - }; // class SelectorFactory + class Function; /** * Defines mappings from names of formatters and selectors to functions implementing them. @@ -117,50 +43,23 @@ namespace message2 { class U_I18N_API MFFunctionRegistry : public UObject { private: - using FormatterMap = Hashtable; // Map from stringified function names to FormatterFactory* - using SelectorMap = Hashtable; // Map from stringified function names to SelectorFactory* + using FunctionMap = Hashtable; // Map from function names to FunctionFactory* public: /** - * Looks up a formatter factory by the name of the formatter. The result is non-const, - * since formatter factories may have local state. Returns the result by pointer + * Looks up a function by the name of the function. The result is non-const, + * since functions may have local state. Returns the result by pointer * rather than by reference since it can fail. * - * @param formatterName Name of the desired formatter. - * @return A pointer to the `FormatterFactory` registered under `formatterName`, or null - * if no formatter was registered under that name. The pointer is not owned + * @param functionName Name of the desired function. + * @return A pointer to the Function registered under `functionName`, or null + * if no function was registered under that name. The pointer is not owned * by the caller. * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - FormatterFactory* getFormatter(const FunctionName& formatterName) const; - /** - * Looks up a selector factory by the name of the selector. (This returns the result by pointer - * rather than by reference since `FormatterFactory` is an abstract class.) - * - * @param selectorName Name of the desired selector. - * @return A pointer to the `SelectorFactory` registered under `selectorName`, or null - * if no formatter was registered under that name. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const SelectorFactory* getSelector(const FunctionName& selectorName) const; - /** - * Looks up a formatter factory by a type tag. This method gets the name of the default formatter registered - * for that type. If no formatter was explicitly registered for this type, it returns false. - * - * @param formatterType Type tag for the desired `FormattableObject` type to be formatted. - * @param name Output parameter; initialized to the name of the default formatter for `formatterType` - * if one has been registered. Its value is undefined otherwise. - * @return True if and only if the function registry contains a default formatter for `formatterType`. - * If the return value is false, then the value of `name` is undefined. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - UBool getDefaultFormatterNameByType(const UnicodeString& formatterType, FunctionName& name) const; + Function* getFunction(const FunctionName& functionName) const; /** * The mutable Builder class allows each formatter and selector factory * to be initialized separately; calling its `build()` method yields an @@ -174,9 +73,7 @@ namespace message2 { class U_I18N_API Builder : public UObject { private: // Must use raw pointers to avoid instantiating `LocalPointer` on an internal type - FormatterMap* formatters; - SelectorMap* selectors; - Hashtable* formattersByType; + FunctionMap* functions; // Do not define copy constructor/assignment operator Builder& operator=(const Builder&) = delete; @@ -200,46 +97,20 @@ namespace message2 { be re-thought. */ /** - * Registers a formatter factory to a given formatter name. - * - * @param formatterName Name of the formatter being registered. - * @param formatterFactory A pointer to a FormatterFactory object to use - * for creating `formatterName` formatters. This argument is adopted. - * @param errorCode Input/output error code - * @return A reference to the builder. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - Builder& adoptFormatter(const data_model::FunctionName& formatterName, FormatterFactory* formatterFactory, UErrorCode& errorCode); - /** - * Registers a formatter factory to a given type tag. - * (See `FormattableObject` for details on type tags.) - * - * @param type Tag for objects to be formatted with this formatter. - * @param functionName A reference to the name of the function to use for - * creating formatters for `formatterType` objects. - * @param errorCode Input/output error code - * @return A reference to the builder. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - Builder& setDefaultFormatterNameByType(const UnicodeString& type, const data_model::FunctionName& functionName, UErrorCode& errorCode); - - /** - * Registers a selector factory to a given selector name. Adopts `selectorFactory`. + * Registers a function to a given name. * - * @param selectorName Name of the selector being registered. - * @param selectorFactory A SelectorFactory object to use for creating `selectorName` - * selectors. + * @param functionName Name of the formatter being registered. + * @param function A pointer to a Function object. + * This argument is adopted. * @param errorCode Input/output error code * @return A reference to the builder. * - * @internal ICU 75 technology preview + * @internal ICU 77 technology preview * @deprecated This API is for technology preview only. */ - Builder& adoptSelector(const data_model::FunctionName& selectorName, SelectorFactory* selectorFactory, UErrorCode& errorCode); + Builder& adoptFunction(const data_model::FunctionName& functionName, + Function* function, + UErrorCode& errorCode); /** * Creates an immutable `MFFunctionRegistry` object with the selectors and formatters * that were previously registered. The builder cannot be used after this call. @@ -305,112 +176,72 @@ namespace message2 { MFFunctionRegistry& operator=(const MFFunctionRegistry&) = delete; MFFunctionRegistry(const MFFunctionRegistry&) = delete; - MFFunctionRegistry(FormatterMap* f, SelectorMap* s, Hashtable* byType); + MFFunctionRegistry(FunctionMap* f); MFFunctionRegistry() {} // Debugging; should only be called on a function registry with // all the standard functions registered - void checkFormatter(const char*) const; - void checkSelector(const char*) const; + void checkFunction(const char*) const; void checkStandard() const; - bool hasFormatter(const data_model::FunctionName& f) const; - bool hasSelector(const data_model::FunctionName& s) const; + bool hasFunction(const data_model::FunctionName& f) const; void cleanup() noexcept; // Must use raw pointers to avoid instantiating `LocalPointer` on an internal type - FormatterMap* formatters = nullptr; - SelectorMap* selectors = nullptr; - // Mapping from strings (type tags) to FunctionNames - Hashtable* formattersByType = nullptr; + FunctionMap* functions = nullptr; }; // class MFFunctionRegistry - /** - * Interface that formatter classes must implement. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - class U_I18N_API Formatter : public UObject { - public: - /** - * Formats the input passed in `context` by setting an output using one of the - * `FormattingContext` methods or indicating an error. - * - * @param toFormat Placeholder, including a source formattable value and possibly - * the output of a previous formatter applied to it; see - * `message2::FormattedPlaceholder` for details. Passed by move. - * @param options The named function options. Passed by move - * @param status Input/output error code. Should not be set directly by the - * custom formatter, which should use `FormattingContext::setFormattingWarning()` - * to signal errors. The custom formatter may pass `status` to other ICU functions - * that can signal errors using this mechanism. - * - * @return The formatted value. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual FormattedPlaceholder format(FormattedPlaceholder&& toFormat, - FunctionOptions&& options, - UErrorCode& status) const = 0; - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~Formatter(); - }; // class Formatter + class FunctionValue; /** - * Interface that selector classes must implement. + * Interface that function handler classes must implement. * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - class U_I18N_API Selector : public UObject { + class U_I18N_API Function : public UObject { public: - /** - * Compares the input to an array of keys, and returns an array of matching - * keys sorted by preference. - * - * @param toFormat The unnamed function argument; passed by move. - * @param options A reference to the named function options. - * @param keys An array of strings that are compared to the input - * (`context.getFormattableInput()`) in an implementation-specific way. - * @param keysLen The length of `keys`. - * @param prefs An array of strings with length `keysLen`. The contents of - * the array is undefined. `selectKey()` should set the contents - * of `prefs` to a subset of `keys`, with the best match placed at the lowest index. - * @param prefsLen A reference that `selectKey()` should set to the length of `prefs`, - * which must be less than or equal to `keysLen`. - * @param status Input/output error code. Should not be set directly by the - * custom selector, which should use `FormattingContext::setSelectorError()` - * to signal errors. The custom selector may pass `status` to other ICU functions - * that can signal errors using this mechanism. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual void selectKey(FormattedPlaceholder&& toFormat, - FunctionOptions&& options, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& status) const = 0; - // Note: This takes array arguments because the internal MessageFormat code has to - // call this method, and can't include any code that constructs std::vectors. - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~Selector(); - }; // class Selector + // Adopts its argument + virtual FunctionValue* call(FunctionValue*, FunctionOptions&&, UErrorCode&) = 0; + virtual ~Function(); + }; // class Function + + class U_I18N_API FunctionValue : public UObject { + public: + virtual UnicodeString formatToString(UErrorCode& status) const { + if (U_SUCCESS(status)) { + status = U_MF_FORMATTING_ERROR; + } + return {}; + } + virtual const Formattable& getOperand() const { return operand; } + // `this` can't be used after calling this method + virtual FunctionOptions getResolvedOptions() { return std::move(opts); } + // const method is for reading the options attached to another option + // (i.e. options don't escape) -- + // non-const method is for calling mergeOptions() -- i.e. options escape + virtual const FunctionOptions& getResolvedOptions() const { return opts; } + virtual UBool isSelectable() const { return false; } + virtual void selectKeys(const UnicodeString* keys, + int32_t keysLen, + UnicodeString* prefs, + int32_t& prefsLen, + UErrorCode& status) { + (void) keys; + (void) keysLen; + (void) prefs; + (void) prefsLen; + if (U_SUCCESS(status)) { + status = U_MF_SELECTOR_ERROR; + } + } + virtual ~FunctionValue(); + protected: + Formattable operand; + FunctionOptions opts; + }; // class FunctionValue + } // namespace message2 diff --git a/icu4c/source/test/intltest/messageformat2test.cpp b/icu4c/source/test/intltest/messageformat2test.cpp index 353082ef5c91..8ed6b27d88ae 100644 --- a/icu4c/source/test/intltest/messageformat2test.cpp +++ b/icu4c/source/test/intltest/messageformat2test.cpp @@ -278,7 +278,7 @@ void TestMessageFormat2::testAPICustomFunctions() { // Set up custom function registry MFFunctionRegistry::Builder builder(errorCode); MFFunctionRegistry functionRegistry = - builder.adoptFormatter(data_model::FunctionName("person"), new PersonNameFormatterFactory(), errorCode) + builder.adoptFunction(data_model::FunctionName("person"), new PersonNameFunction(), errorCode) .build(); Person* person = new Person(UnicodeString("Mr."), UnicodeString("John"), UnicodeString("Doe")); @@ -317,6 +317,8 @@ void TestMessageFormat2::testAPICustomFunctions() { assertEquals("testAPICustomFunctions", "Hello Mr. John Doe", result); // By type +// TODO +/* MFFunctionRegistry::Builder builderByType(errorCode); FunctionName personFormatterName("person"); MFFunctionRegistry functionRegistryByType = @@ -336,9 +338,13 @@ void TestMessageFormat2::testAPICustomFunctions() { // Expect "Hello John" because in the custom function we registered, // "informal" is the default formality and "length" is the default length assertEquals("testAPICustomFunctions", "Hello John", result); +*/ + delete person; } +PersonNameFunction::~PersonNameFunction() {} + // ICU-22890 lone surrogate cause infinity loop void TestMessageFormat2::testHighLoneSurrogate() { IcuTestErrorCode errorCode(*this, "testHighLoneSurrogate"); diff --git a/icu4c/source/test/intltest/messageformat2test.h b/icu4c/source/test/intltest/messageformat2test.h index 0cdc25633248..938afd3f30b1 100644 --- a/icu4c/source/test/intltest/messageformat2test.h +++ b/icu4c/source/test/intltest/messageformat2test.h @@ -99,11 +99,6 @@ U_NAMESPACE_BEGIN namespace message2 { // Custom function classes -class PersonNameFormatterFactory : public FormatterFactory { - - public: - Formatter* createFormatter(const Locale&, UErrorCode&) override; -}; class Person : public FormattableObject { public: @@ -117,11 +112,24 @@ class Person : public FormattableObject { const UnicodeString tagName; }; -class PersonNameFormatter : public Formatter { +class PersonNameFunction : public Function { public: - FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override; + FunctionValue* call(FunctionValue*, FunctionOptions&&, UErrorCode&) override; + virtual ~PersonNameFunction(); }; +class PersonNameValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const override; + PersonNameValue(); + virtual ~PersonNameValue(); + private: + friend class PersonNameFunction; + + UnicodeString formattedString; + PersonNameValue(FunctionValue*, FunctionOptions&&, UErrorCode&); +}; // class PersonNameValue + class FormattableProperties : public FormattableObject { public: const UnicodeString& tag() const override { return tagName; } @@ -134,33 +142,76 @@ class FormattableProperties : public FormattableObject { const UnicodeString tagName; }; -class GrammarCasesFormatterFactory : public FormatterFactory { +class GrammarCasesFunction : public Function { public: - Formatter* createFormatter(const Locale&, UErrorCode&) override; + FunctionValue* call(FunctionValue*, FunctionOptions&&, UErrorCode&) override; + static MFFunctionRegistry customRegistry(UErrorCode&); }; -class GrammarCasesFormatter : public Formatter { +class GrammarCasesValue : public FunctionValue { public: - FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override; - static MFFunctionRegistry customRegistry(UErrorCode&); + UnicodeString formatToString(UErrorCode&) const override; + GrammarCasesValue(); + virtual ~GrammarCasesValue(); private: + friend class GrammarCasesFunction; + + UnicodeString formattedString; + GrammarCasesValue(FunctionValue*, FunctionOptions&&, UErrorCode&); void getDativeAndGenitive(const UnicodeString&, UnicodeString& result) const; -}; +}; // class GrammarCasesValue -class ListFormatterFactory : public FormatterFactory { +class ListFunction : public Function { public: - Formatter* createFormatter(const Locale&, UErrorCode&) override; + FunctionValue* call(FunctionValue*, FunctionOptions&&, UErrorCode&) override; + static MFFunctionRegistry customRegistry(UErrorCode&); + ListFunction(const Locale& loc) : locale(loc) {} + virtual ~ListFunction(); + private: + Locale locale; }; -class ListFormatter : public Formatter { +class ListValue : public FunctionValue { public: - FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override; - static MFFunctionRegistry customRegistry(UErrorCode&); + UnicodeString formatToString(UErrorCode&) const override; + virtual ~ListValue(); private: - friend class ListFormatterFactory; - const Locale& locale; - ListFormatter(const Locale& loc) : locale(loc) {} -}; + friend class ListFunction; + + UnicodeString formattedString; + ListValue(const Locale&, + FunctionValue*, + FunctionOptions&&, + UErrorCode&); +}; // class ListValue + +class NounValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const override; + NounValue(); + virtual ~NounValue(); + private: + friend class NounFunction; + + UnicodeString formattedString; + NounValue(FunctionValue*, + FunctionOptions&&, + UErrorCode&); +}; // class NounValue + +class AdjectiveValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const override; + AdjectiveValue(); + virtual ~AdjectiveValue(); + private: + friend class AdjectiveFunction; + + UnicodeString formattedString; + AdjectiveValue(FunctionValue*, + FunctionOptions&&, + UErrorCode&); +}; // class AdjectiveValue /* class ResourceManagerFactory : public FormatterFactory { @@ -183,32 +234,18 @@ class ResourceManager : public Formatter { }; */ -class NounFormatterFactory : public FormatterFactory { - - public: - Formatter* createFormatter(const Locale&, UErrorCode&) override; -}; - -class AdjectiveFormatterFactory : public FormatterFactory { - +class NounFunction : public Function { public: - Formatter* createFormatter(const Locale&, UErrorCode&) override; + FunctionValue* call(FunctionValue*, FunctionOptions&&, UErrorCode&) override; + NounFunction() { } + virtual ~NounFunction(); }; -class NounFormatter : public Formatter { +class AdjectiveFunction : public Function { public: - FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override; - private: - friend class NounFormatterFactory; - NounFormatter() { } -}; - -class AdjectiveFormatter : public Formatter { - public: - FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override; - private: - friend class AdjectiveFormatterFactory; - AdjectiveFormatter() { } + FunctionValue* call(FunctionValue*, FunctionOptions&&, UErrorCode&) override; + AdjectiveFunction() { } + virtual ~AdjectiveFunction(); }; } // namespace message2 diff --git a/icu4c/source/test/intltest/messageformat2test_custom.cpp b/icu4c/source/test/intltest/messageformat2test_custom.cpp index a4f353b04884..a86c2278f968 100644 --- a/icu4c/source/test/intltest/messageformat2test_custom.cpp +++ b/icu4c/source/test/intltest/messageformat2test_custom.cpp @@ -32,7 +32,7 @@ void TestMessageFormat2::testPersonFormatter(IcuTestErrorCode& errorCode) { CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) - .adoptFormatter(FunctionName("person"), new PersonNameFormatterFactory(), errorCode) + .adoptFunction(FunctionName("person"), new PersonNameFunction(), errorCode) .build()); UnicodeString name = "name"; LocalPointer person(new Person(UnicodeString("Mr."), UnicodeString("John"), UnicodeString("Doe"))); @@ -98,7 +98,7 @@ void TestMessageFormat2::testCustomFunctionsComplexMessage(IcuTestErrorCode& err CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) - .adoptFormatter(FunctionName("person"), new PersonNameFormatterFactory(), errorCode) + .adoptFunction(FunctionName("person"), new PersonNameFunction(), errorCode) .build()); UnicodeString host = "host"; UnicodeString hostGender = "hostGender"; @@ -188,8 +188,8 @@ void TestMessageFormat2::testComplexOptions(IcuTestErrorCode& errorCode) { CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) - .adoptFormatter(FunctionName("noun"), new NounFormatterFactory(), errorCode) - .adoptFormatter(FunctionName("adjective"), new AdjectiveFormatterFactory(), errorCode) + .adoptFunction(FunctionName("noun"), new NounFunction(), errorCode) + .adoptFunction(FunctionName("adjective"), new AdjectiveFunction(), errorCode) .build()); UnicodeString name = "name"; TestCase::Builder testBuilder; @@ -239,34 +239,23 @@ void TestMessageFormat2::testCustomFunctions() { // -------------- Custom function implementations -Formatter* PersonNameFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return nullptr; - } - - // Locale not used - (void) locale; - - Formatter* result = new PersonNameFormatter(); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - static UnicodeString getStringOption(const FunctionOptionsMap& opt, const UnicodeString& k) { if (opt.count(k) == 0) { return {}; } UErrorCode localErrorCode = U_ZERO_ERROR; - const message2::Formattable* optVal = opt.at(k)->getSource(localErrorCode); - if (U_FAILURE(localErrorCode)) { + const message2::FunctionValue* optVal = opt.at(k); + if (optVal == nullptr) { return {}; } - const UnicodeString& val = optVal->getString(localErrorCode); + const UnicodeString& formatted = optVal->formatToString(localErrorCode); + if (U_SUCCESS(localErrorCode)) { + return formatted; + } + const UnicodeString& original = optVal->getOperand().getString(localErrorCode); if (U_SUCCESS(localErrorCode)) { - return val; + return original; } return {}; } @@ -276,18 +265,39 @@ static bool hasStringOption(const FunctionOptionsMap& opt, return getStringOption(opt, k) == v; } -message2::FormattedPlaceholder PersonNameFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { +FunctionValue* PersonNameFunction::call(FunctionValue* arg, + FunctionOptions&& opts, + UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + PersonNameValue* v = new PersonNameValue(arg, std::move(opts), errorCode); + if (U_SUCCESS(errorCode) && v == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return v; +} + +UnicodeString PersonNameValue::formatToString(UErrorCode& status) const { + (void) status; + return formattedString; +} + +PersonNameValue::PersonNameValue(FunctionValue* arg, + FunctionOptions&& options, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - return {}; + return; } + operand = arg->getOperand(); + opts = std::move(options); // Tests don't cover composition, so no need to merge options - const Formattable* toFormat = arg.getSource(errorCode); + const Formattable* toFormat = &operand; if (U_FAILURE(errorCode)) { errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return {}; + return; } - FunctionOptionsMap opt = options.getOptions(); + FunctionOptionsMap opt = opts.getOptions(); bool useFormal = hasStringOption(opt, "formality", "formal"); UnicodeString length = getStringOption(opt, "length"); @@ -298,12 +308,12 @@ message2::FormattedPlaceholder PersonNameFormatter::format(FormattedPlaceholder& const FormattableObject* fp = toFormat->getObject(errorCode); if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { errorCode = U_MF_FORMATTING_ERROR; - return {}; + return; } if (fp == nullptr || fp->tag() != u"person") { errorCode = U_MF_FORMATTING_ERROR; - return {}; + return; } const Person* p = static_cast(fp); @@ -311,59 +321,41 @@ message2::FormattedPlaceholder PersonNameFormatter::format(FormattedPlaceholder& UnicodeString firstName = p->firstName; UnicodeString lastName = p->lastName; - UnicodeString result; if (length == "long") { - result += title; - result += " "; - result += firstName; - result += " "; - result += lastName; + formattedString += title; + formattedString += " "; + formattedString += firstName; + formattedString += " "; + formattedString += lastName; } else if (length == "medium") { if (useFormal) { - result += firstName; - result += " "; - result += lastName; + formattedString += firstName; + formattedString += " "; + formattedString += lastName; } else { - result += title; - result += " "; - result += firstName; + formattedString += title; + formattedString += " "; + formattedString += firstName; } } else if (useFormal) { // Default to "short" length - result += title; - result += " "; - result += lastName; + formattedString += title; + formattedString += " "; + formattedString += lastName; } else { - result += firstName; + formattedString += firstName; } - - FormattedPlaceholder res = arg.withResult(FormattedValue(std::move(result))); - return res; } FormattableProperties::~FormattableProperties() {} Person::~Person() {} +PersonNameValue::~PersonNameValue() {} /* See ICU4J: CustomFormatterGrammarCaseTest.java */ -Formatter* GrammarCasesFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return nullptr; - } - - // Locale not used - (void) locale; - - Formatter* result = new GrammarCasesFormatter(); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - -/* static */ void GrammarCasesFormatter::getDativeAndGenitive(const UnicodeString& value, UnicodeString& result) const { +/* static */ void GrammarCasesValue::getDativeAndGenitive(const UnicodeString& value, UnicodeString& result) const { UnicodeString postfix; if (value.endsWith("ana")) { value.extract(0, value.length() - 3, postfix); @@ -387,32 +379,48 @@ Formatter* GrammarCasesFormatterFactory::createFormatter(const Locale& locale, U result += postfix; } -message2::FormattedPlaceholder GrammarCasesFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { - if (U_FAILURE(errorCode)) { - return {}; +FunctionValue* GrammarCasesFunction::call(FunctionValue* arg, + FunctionOptions&& opts, + UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + GrammarCasesValue* v = new GrammarCasesValue(arg, std::move(opts), errorCode); + if (U_SUCCESS(errorCode) && v == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; } + return v; +} - const Formattable* toFormat = arg.getSource(errorCode); - // Check for null operand +UnicodeString GrammarCasesValue::formatToString(UErrorCode& status) const { + (void) status; + return formattedString; +} + +GrammarCasesValue::GrammarCasesValue(FunctionValue* val, + FunctionOptions&& options, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - errorCode = U_MF_FORMATTING_ERROR; - return {}; + return; } + operand = val->getOperand(); + opts = std::move(options); // Tests don't cover composition, so no need to merge options + const Formattable* toFormat = &operand; + UnicodeString result; - const FunctionOptionsMap opt = options.getOptions(); + const FunctionOptionsMap opt = opts.getOptions(); switch (toFormat->getType()) { case UFMT_STRING: { const UnicodeString& in = toFormat->getString(errorCode); bool hasCase = opt.count("case") > 0; - const Formattable* caseAsFormattable = opt.at("case")->getSource(errorCode); + const Formattable& caseAsFormattable = opt.at("case")->getOperand(); if (U_FAILURE(errorCode)) { errorCode = U_MF_FORMATTING_ERROR; - return {}; + return; } - bool caseIsString = caseAsFormattable->getType() == UFMT_STRING; + bool caseIsString = caseAsFormattable.getType() == UFMT_STRING; if (hasCase && caseIsString) { - const UnicodeString& caseOpt = caseAsFormattable->getString(errorCode); + const UnicodeString& caseOpt = caseAsFormattable.getString(errorCode); if (caseOpt == "dative" || caseOpt == "genitive") { getDativeAndGenitive(in, result); } @@ -429,14 +437,14 @@ message2::FormattedPlaceholder GrammarCasesFormatter::format(FormattedPlaceholde } } - return arg.withResult(FormattedValue(std::move(result))); + formattedString = result; } void TestMessageFormat2::testGrammarCasesFormatter(IcuTestErrorCode& errorCode) { CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry = MFFunctionRegistry::Builder(errorCode) - .adoptFormatter(FunctionName("grammarBB"), new GrammarCasesFormatterFactory(), errorCode) + .adoptFunction(FunctionName("grammarBB"), new GrammarCasesFunction(), errorCode) .build(); TestCase::Builder testBuilder; @@ -488,31 +496,46 @@ void TestMessageFormat2::testGrammarCasesFormatter(IcuTestErrorCode& errorCode) TestUtils::runTestCase(*this, test, errorCode); } +GrammarCasesValue::~GrammarCasesValue() {} + /* See ICU4J: CustomFormatterListTest.java */ -Formatter* ListFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return nullptr; - } - Formatter* result = new ListFormatter(locale); - if (result == nullptr) { +FunctionValue* ListFunction::call(FunctionValue* arg, + FunctionOptions&& opts, + UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + ListValue* v = new ListValue(locale, arg, std::move(opts), errorCode); + if (U_SUCCESS(errorCode) && v == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; } - return result; + return v; +} + +UnicodeString ListValue::formatToString(UErrorCode& errorCode) const { + (void) errorCode; + + return formattedString; } -message2::FormattedPlaceholder message2::ListFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { +message2::ListValue::ListValue(const Locale& locale, + FunctionValue* val, + FunctionOptions&& options, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - return {}; + return; } - const Formattable* toFormat = arg.getSource(errorCode); + operand = val->getOperand(); + opts = std::move(options); // Tests don't cover composition, so no need to merge options + + const Formattable* toFormat = &operand; if (U_FAILURE(errorCode)) { // Must have an argument errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return {}; + return; } FunctionOptionsMap opt = options.getOptions(); @@ -530,42 +553,41 @@ message2::FormattedPlaceholder message2::ListFormatter::format(FormattedPlacehol } LocalPointer lf(icu::ListFormatter::createInstance(locale, type, width, errorCode)); if (U_FAILURE(errorCode)) { - return {}; + return; } - UnicodeString result; - switch (toFormat->getType()) { case UFMT_ARRAY: { int32_t n_items; const Formattable* objs = toFormat->getArray(n_items, errorCode); if (U_FAILURE(errorCode)) { errorCode = U_MF_FORMATTING_ERROR; - return {}; + return; } UnicodeString* parts = new UnicodeString[n_items]; if (parts == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; - return {}; + return; } for (int32_t i = 0; i < n_items; i++) { parts[i] = objs[i].getString(errorCode); } U_ASSERT(U_SUCCESS(errorCode)); - lf->format(parts, n_items, result, errorCode); + lf->format(parts, n_items, formattedString, errorCode); delete[] parts; break; } default: { - result += toFormat->getString(errorCode); + formattedString += toFormat->getString(errorCode); U_ASSERT(U_SUCCESS(errorCode)); break; } } - - return arg.withResult(FormattedValue(std::move(result))); } +ListValue::~ListValue() {} +ListFunction::~ListFunction() {} + void TestMessageFormat2::testListFormatter(IcuTestErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; @@ -579,10 +601,11 @@ void TestMessageFormat2::testListFormatter(IcuTestErrorCode& errorCode) { TestCase::Builder testBuilder; MFFunctionRegistry reg = MFFunctionRegistry::Builder(errorCode) - .adoptFormatter(FunctionName("listformat"), new ListFormatterFactory(), errorCode) + .adoptFunction(FunctionName("listformat"), new ListFunction(Locale("en")), errorCode) .build(); CHECK_ERROR(errorCode); + testBuilder.setLocale(Locale("en")); testBuilder.setFunctionRegistry(®); testBuilder.setArgument("languages", progLanguages, 3); @@ -803,129 +826,130 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { } #endif -Formatter* NounFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return nullptr; - } - - // Locale not used - (void) locale; +FunctionValue* NounFunction::call(FunctionValue* arg, + FunctionOptions&& opts, + UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); - Formatter* result = new NounFormatter(); - if (result == nullptr) { + NounValue* v = new NounValue(arg, std::move(opts), errorCode); + if (U_SUCCESS(errorCode) && v == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; } - return result; + return v; } -Formatter* AdjectiveFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return nullptr; - } +UnicodeString NounValue::formatToString(UErrorCode& status) const { + (void) status; - // Locale not used - (void) locale; - - Formatter* result = new AdjectiveFormatter(); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; + return formattedString; } -message2::FormattedPlaceholder NounFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { +NounValue::NounValue(FunctionValue* arg, + FunctionOptions&& options, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - return {}; + return; } - const Formattable* toFormat = arg.getSource(errorCode); - // Must have an argument - if (U_FAILURE(errorCode)) { - errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return {}; - } - FunctionOptionsMap opt = options.getOptions(); + operand = arg->getOperand(); + opts = std::move(options); + + const Formattable* toFormat = &operand; + FunctionOptionsMap opt = opts.getOptions(); // very simplified example bool useAccusative = hasStringOption(opt, "case", "accusative"); bool useSingular = hasStringOption(opt, "count", "1"); const UnicodeString& noun = toFormat->getString(errorCode); if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { - return {}; + errorCode = U_MF_FORMATTING_ERROR; + return; } - UnicodeString result; if (useAccusative) { if (useSingular) { - result = noun + " accusative, singular noun"; + formattedString = noun + " accusative, singular noun"; } else { - result = noun + " accusative, plural noun"; + formattedString = noun + " accusative, plural noun"; } } else { if (useSingular) { - result = noun + " dative, singular noun"; + formattedString = noun + " dative, singular noun"; } else { - result = noun + " dative, plural noun"; + formattedString = noun + " dative, plural noun"; } } - - return arg.withResultAndOptions(FormattedValue(result), std::move(options), errorCode); } -message2::FormattedPlaceholder AdjectiveFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { - if (U_FAILURE(errorCode)) { - return {}; +FunctionValue* AdjectiveFunction::call(FunctionValue* arg, + FunctionOptions&& opts, + UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + AdjectiveValue* v = new AdjectiveValue(arg, std::move(opts), errorCode); + if (U_SUCCESS(errorCode) && v == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; } + return v; +} - const Formattable* toFormat = arg.getSource(errorCode); - // Must have an argument +UnicodeString AdjectiveValue::formatToString(UErrorCode& status) const { + (void) status; + + return formattedString; +} + +AdjectiveValue::AdjectiveValue(FunctionValue* arg, + FunctionOptions&& options, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return {}; + return; } - const FunctionOptionsMap opt = options.getOptions(); + operand = arg->getOperand(); + opts = std::move(options); + + const Formattable* toFormat = &operand; + + const FunctionOptionsMap opt = opts.getOptions(); // Return empty string if no accord is provided if (opt.count("accord") <= 0) { - return {}; + return; } - const FormattedPlaceholder& accordOpt = *opt.at("accord"); - // Fail if no accord is provided, as this is a simplified example - const Formattable* accordSrc = accordOpt.getSource(errorCode); - if (U_FAILURE(errorCode)) { - return {}; - } - UnicodeString accord = accordSrc->getString(errorCode); + const FunctionValue& accordOpt = *opt.at("accord"); + const Formattable& accordSrc = accordOpt.getOperand(); + UnicodeString accord = accordSrc.getString(errorCode); const UnicodeString& adjective = toFormat->getString(errorCode); if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { - return {}; + errorCode = U_MF_FORMATTING_ERROR; + return; } - UnicodeString result = adjective + " " + accord; + formattedString = adjective + " " + accord; // very simplified example - const FunctionOptionsMap accordOptionsMap = accordOpt.getOptions().getOptions(); + FunctionOptionsMap accordOptionsMap = accordOpt.getResolvedOptions().getOptions(); bool accordIsAccusative = hasStringOption(accordOptionsMap, "case", "accusative"); bool accordIsSingular = hasStringOption(accordOptionsMap, "count", "1"); if (accordIsAccusative) { if (accordIsSingular) { - result += " (accusative, singular adjective)"; + formattedString += " (accusative, singular adjective)"; } else { - result += " (accusative, plural adjective)"; + formattedString += " (accusative, plural adjective)"; } } else { if (accordIsSingular) { - result += " (dative, singular adjective)"; + formattedString += " (dative, singular adjective)"; } else { - result += " (dative, plural adjective)"; + formattedString += " (dative, plural adjective)"; } } - - return arg.withResultAndOptions(FormattedValue(std::move(result)), - std::move(options), - errorCode); } +NounFunction::~NounFunction() {} +AdjectiveFunction::~AdjectiveFunction() {} +NounValue::~NounValue() {} +AdjectiveValue::~AdjectiveValue() {} #endif /* #if !UCONFIG_NO_MF2 */ diff --git a/icu4c/source/test/intltest/messageformat2test_read_json.cpp b/icu4c/source/test/intltest/messageformat2test_read_json.cpp index ddf93da632ce..4c3aacb42500 100644 --- a/icu4c/source/test/intltest/messageformat2test_read_json.cpp +++ b/icu4c/source/test/intltest/messageformat2test_read_json.cpp @@ -314,7 +314,6 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) { runTestsFromJsonFile(*this, "resolution-errors.json", errorCode); runTestsFromJsonFile(*this, "matches-whitespace.json", errorCode); runTestsFromJsonFile(*this, "alias-selector-annotations.json", errorCode); - runTestsFromJsonFile(*this, "runtime-errors.json", errorCode); // Re: the expected output for the first test in this file: // Note: the more "correct" fallback output seems like it should be "1.000 3" (ignoring the diff --git a/testdata/message2/runtime-errors.json b/testdata/message2/runtime-errors.json deleted file mode 100644 index b1bb0cd491a0..000000000000 --- a/testdata/message2/runtime-errors.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "scenario": "Runtime errors", - "description": "Tests for bad-selector and bad-operand errors", - "defaultTestProperties": { - "locale": "en-US" - }, - "tests": [ - { - "src": ".match {|horse| :date}\n 1 {{The value is one.}}\n * {{Formatter used as selector.}}", - "exp": "Formatter used as selector.", - "expErrors": [{"type": "bad-selector"}], - "ignoreJava": "ICU4J doesn't signal runtime errors?" - }, - { - "src": ".match {|horse| :number}\n 1 {{The value is one.}}\n * {{horse is not a number.}}", - "exp": "horse is not a number.", - "expErrors": [{"type": "bad-selector"}], - "ignoreJava": "ICU4J doesn't signal runtime errors?" - }, - { - "src": ".local $sel = {|horse| :number}\n .match {$sel}\n 1 {{The value is one.}}\n * {{horse is not a number.}}", - "exp": "horse is not a number.", - "expErrors": [{"type": "bad-selector"}], - "ignoreJava": "ICU4J doesn't signal runtime errors?" - } - ] -} From 8d875ed965efffb504cf771de7c8bce3a05f75b5 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Fri, 4 Oct 2024 15:28:41 -0700 Subject: [PATCH 12/37] Fixed leaks --- icu4c/source/i18n/messageformat2.cpp | 26 +++++++------ .../source/i18n/messageformat2_evaluation.cpp | 14 ++++++- icu4c/source/i18n/messageformat2_evaluation.h | 8 ++-- .../i18n/messageformat2_function_registry.cpp | 39 +++++++++---------- ...essageformat2_function_registry_internal.h | 22 +++++------ .../messageformat2_function_registry.h | 8 +++- .../source/test/intltest/messageformat2test.h | 20 +++++----- .../intltest/messageformat2test_custom.cpp | 30 +++++++------- 8 files changed, 93 insertions(+), 74 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index 8ed9685c4dbf..cc24b29d1eb0 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -62,15 +62,15 @@ static Formattable evalLiteral(const Literal& lit) { } [[nodiscard]] InternalValue MessageFormatter::formatOperand(const Environment& env, - const Operand& rand, - MessageContext& context, - UErrorCode &status) const { + const Operand& rand, + MessageContext& context, + UErrorCode &status) const { if (U_FAILURE(status)) { return {}; } if (rand.isNull()) { - return InternalValue(); + return InternalValue(status); } if (rand.isVariable()) { // Check if it's local or global @@ -190,10 +190,10 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, } // Calling takeValue() won't error out because we already checked the fallback case // Nullptr represents an absent argument - FunctionValue* functionArg = randVal.isNullOperand() ? nullptr : randVal.takeValue(status); + LocalPointer functionArg(randVal.takeValue(status)); U_ASSERT(U_SUCCESS(status)); - auto result = function->call(functionArg, - std::move(resolvedOptions), status); + LocalPointer functionResult( + function->call(*functionArg, std::move(resolvedOptions), status)); if (status == U_MF_OPERAND_MISMATCH_ERROR) { status = U_ZERO_ERROR; context.getErrors().setOperandMismatchError(functionName, status); @@ -204,7 +204,10 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, context.getErrors().setFormattingError(functionName, status); return InternalValue(fallbackStr); } - return InternalValue(result, fallbackStr); + if (U_FAILURE(status)) { + return {}; + } + return InternalValue(functionResult.orphan(), fallbackStr); } } @@ -227,7 +230,7 @@ void MessageFormatter::formatPattern(MessageContext& context, const Environment& result += RIGHT_CURLY_BRACE; } else { // Do final formatting (e.g. formatting numbers as strings) - const FunctionValue* val = partVal.takeValue(status); + LocalPointer val(partVal.takeValue(status)); // Shouldn't be null or a fallback U_ASSERT(U_SUCCESS(status)); result += val->formatToString(status); @@ -320,8 +323,9 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, // Call the selector // Already checked for fallback, so it's safe to call takeValue() - rv.takeValue(status)->selectKeys(adoptedKeys.getAlias(), keysLen, adoptedPrefs.getAlias(), prefsLen, - status); + LocalPointer rvVal(rv.takeValue(status)); + rvVal->selectKeys(adoptedKeys.getAlias(), keysLen, adoptedPrefs.getAlias(), prefsLen, + status); // Update errors if (savedStatus != status) { diff --git a/icu4c/source/i18n/messageformat2_evaluation.cpp b/icu4c/source/i18n/messageformat2_evaluation.cpp index de0b515df0ab..98a7843f09af 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.cpp +++ b/icu4c/source/i18n/messageformat2_evaluation.cpp @@ -192,6 +192,7 @@ FunctionOptions FunctionOptions::mergeOptions(FunctionOptions&& other, InternalValue::~InternalValue() {} + InternalValue& InternalValue::operator=(InternalValue&& other) { isFallbackValue = other.isFallbackValue; fallbackString = other.fallbackString; @@ -206,6 +207,17 @@ InternalValue::InternalValue(InternalValue&& other) { *this = std::move(other); } +InternalValue::InternalValue(UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + NullValue* nv = new NullValue(); + if (nv == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + val.adoptInstead(nv); +} + InternalValue::InternalValue(FunctionValue* v, const UnicodeString& fb) : fallbackString(fb), val(v) { U_ASSERT(v != nullptr); @@ -215,7 +227,7 @@ FunctionValue* InternalValue::takeValue(UErrorCode& status) { if (U_FAILURE(status)) { return {}; } - if (isFallback() || isNullOperand()) { + if (isFallback()) { status = U_ILLEGAL_ARGUMENT_ERROR; return {}; } diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index 6ad0b264fe8d..7b2502bb5cd0 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -53,16 +53,17 @@ namespace message2 { class InternalValue : public UObject { public: bool isFallback() const { return isFallbackValue; } - bool isNullOperand() const { return isNull; } + bool isNullOperand() const { return isFallback() ? false : val->isNullOperand(); } bool isSelectable() const; + InternalValue() : isFallbackValue(true), fallbackString("") {} // Null operand constructor - InternalValue() : isFallbackValue(false), isNull(true), fallbackString("") {} + explicit InternalValue(UErrorCode& status); // Fallback constructor explicit InternalValue(const UnicodeString& fb) : isFallbackValue(true), fallbackString(fb) {} // Fully-evaluated value constructor explicit InternalValue(FunctionValue* v, const UnicodeString& fb); - // Error code is set if this is a fallback or null + // Error code is set if this is a fallback FunctionValue* takeValue(UErrorCode& status); UnicodeString asFallback() const { return fallbackString; } virtual ~InternalValue(); @@ -70,7 +71,6 @@ namespace message2 { InternalValue(InternalValue&&); private: bool isFallbackValue = false; - bool isNull = false; UnicodeString fallbackString; LocalPointer val; }; // class InternalValue diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index a8c4fd8064d8..a48ab31948e9 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -208,11 +208,11 @@ StandardFunctions::Number::create(const Locale& loc, bool isInteger, UErrorCode& return result.orphan(); } -FunctionValue* StandardFunctions::Number::call(FunctionValue* operand, +FunctionValue* StandardFunctions::Number::call(FunctionValue& operand, FunctionOptions&& options, UErrorCode& errorCode) { LocalPointer - val(new NumberValue(*this, locale, operand, std::move(options), errorCode)); + val(new NumberValue(*this, operand, std::move(options), errorCode)); if (val.isValid()) { return val.orphan(); } @@ -482,19 +482,19 @@ bool StandardFunctions::Number::usePercent(const FunctionOptions& opts) const { } StandardFunctions::NumberValue::NumberValue(const Number& parent, - const Locale& loc, - FunctionValue* arg, + FunctionValue& arg, FunctionOptions&& options, - UErrorCode& errorCode) : locale(loc) { + UErrorCode& errorCode) { CHECK_ERROR(errorCode); // Must have an argument - if (arg == nullptr) { + if (arg.isNullOperand()) { errorCode = U_MF_OPERAND_MISMATCH_ERROR; return; } - opts = options.mergeOptions(arg->getResolvedOptions(), errorCode); - operand = arg->getOperand(); + locale = parent.locale; + opts = options.mergeOptions(arg.getResolvedOptions(), errorCode); + operand = arg.getOperand(); number::LocalizedNumberFormatter realFormatter; realFormatter = formatterForOptions(parent, opts, errorCode); @@ -688,8 +688,8 @@ StandardFunctions::DateTime::create(const Locale& loc, DateTimeType type, UError } FunctionValue* -StandardFunctions::DateTime::call(FunctionValue* val, FunctionOptions&& opts, UErrorCode& errorCode) { - auto result = new DateTimeValue(locale, type, val, std::move(opts), errorCode); +StandardFunctions::DateTime::call(FunctionValue& val, FunctionOptions&& opts, UErrorCode& errorCode) { + auto result = new DateTimeValue(type, locale, val, std::move(opts), errorCode); return result; } @@ -722,22 +722,21 @@ UnicodeString StandardFunctions::DateTimeValue::formatToString(UErrorCode& statu return formattedDate; } -StandardFunctions::DateTimeValue::DateTimeValue(const Locale& loc, - DateTime::DateTimeType type, - FunctionValue* val, +StandardFunctions::DateTimeValue::DateTimeValue(DateTime::DateTimeType type, + const Locale& locale, + FunctionValue& val, FunctionOptions&& options, UErrorCode& errorCode) { CHECK_ERROR(errorCode); // Must have an argument - if (val == nullptr) { + if (val.isNullOperand()) { errorCode = U_MF_OPERAND_MISMATCH_ERROR; return; } - locale = loc; - operand = val->getOperand(); - opts = options.mergeOptions(val->getResolvedOptions(), errorCode); + operand = val.getOperand(); + opts = options.mergeOptions(val.getResolvedOptions(), errorCode); const Formattable* source = &operand; @@ -992,7 +991,7 @@ StandardFunctions::String::string(const Locale& loc, UErrorCode& success) { extern UnicodeString formattableToString(const Locale&, const Formattable&, UErrorCode&); FunctionValue* -StandardFunctions::String::call(FunctionValue* val, FunctionOptions&& opts, UErrorCode& errorCode) { +StandardFunctions::String::call(FunctionValue& val, FunctionOptions&& opts, UErrorCode& errorCode) { return new StringValue(locale, val, std::move(opts), errorCode); } @@ -1003,11 +1002,11 @@ UnicodeString StandardFunctions::StringValue::formatToString(UErrorCode& errorCo } StandardFunctions::StringValue::StringValue(const Locale& locale, - FunctionValue* val, + FunctionValue& val, FunctionOptions&& options, UErrorCode& status) { CHECK_ERROR(status); - operand = val->getOperand(); + operand = val.getOperand(); opts = std::move(options); // No options // Convert to string formattedString = formattableToString(locale, operand, status); diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index fec7c7ca3dce..32b0f2f55a42 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -36,7 +36,7 @@ namespace message2 { class DateTime : public Function { public: - FunctionValue* call(FunctionValue* operand, + FunctionValue* call(FunctionValue& operand, FunctionOptions&& options, UErrorCode& errorCode) override; static DateTime* date(const Locale&, UErrorCode&); @@ -52,7 +52,8 @@ namespace message2 { kTime, kDateTime } DateTimeType; - const Locale& locale; + + Locale locale; const DateTimeType type; static DateTime* create(const Locale&, DateTimeType, UErrorCode&); DateTime(const Locale& l, DateTimeType t) : locale(l), type(t) {} @@ -66,7 +67,7 @@ namespace message2 { static Number* integer(const Locale& loc, UErrorCode& success); static Number* number(const Locale& loc, UErrorCode& success); - FunctionValue* call(FunctionValue* operand, + FunctionValue* call(FunctionValue& operand, FunctionOptions&& options, UErrorCode& errorCode) override; virtual ~Number(); @@ -93,7 +94,7 @@ namespace message2 { int32_t minimumIntegerDigits(const FunctionOptions& options) const; bool usePercent(const FunctionOptions& options) const; - const Locale& locale; + Locale locale; const bool isInteger = false; const number::LocalizedNumberFormatter icuFormatter; @@ -121,7 +122,7 @@ namespace message2 { Locale locale; number::FormattedNumber formattedNumber; - NumberValue(const Number&, const Locale&, FunctionValue*, FunctionOptions&&, UErrorCode&); + NumberValue(const Number&, FunctionValue&, FunctionOptions&&, UErrorCode&); }; // class NumberValue class DateTimeValue : public FunctionValue { @@ -132,15 +133,14 @@ namespace message2 { private: friend class DateTime; - Locale locale; UnicodeString formattedDate; - DateTimeValue(const Locale&, DateTime::DateTimeType type, - FunctionValue*, FunctionOptions&&, UErrorCode&); + DateTimeValue(DateTime::DateTimeType type, const Locale&, + FunctionValue&, FunctionOptions&&, UErrorCode&); }; // class DateTimeValue class String : public Function { public: - FunctionValue* call(FunctionValue* val, + FunctionValue* call(FunctionValue& val, FunctionOptions&& opts, UErrorCode& errorCode) override; static String* string(const Locale& locale, UErrorCode& status); @@ -150,7 +150,7 @@ namespace message2 { friend class StringFactory; // Formatting `value` to a string might require the locale - const Locale& locale; + Locale locale; String(const Locale& l) : locale(l) {} }; @@ -169,7 +169,7 @@ namespace message2 { friend class String; UnicodeString formattedString; - StringValue(const Locale&, FunctionValue*, FunctionOptions&&, UErrorCode&); + StringValue(const Locale&, FunctionValue&, FunctionOptions&&, UErrorCode&); }; // class StringValue }; diff --git a/icu4c/source/i18n/unicode/messageformat2_function_registry.h b/icu4c/source/i18n/unicode/messageformat2_function_registry.h index 664f9b3c0c98..8670594b40b7 100644 --- a/icu4c/source/i18n/unicode/messageformat2_function_registry.h +++ b/icu4c/source/i18n/unicode/messageformat2_function_registry.h @@ -202,8 +202,7 @@ namespace message2 { */ class U_I18N_API Function : public UObject { public: - // Adopts its argument - virtual FunctionValue* call(FunctionValue*, FunctionOptions&&, UErrorCode&) = 0; + virtual FunctionValue* call(FunctionValue&, FunctionOptions&&, UErrorCode&) = 0; virtual ~Function(); }; // class Function @@ -223,6 +222,7 @@ namespace message2 { // non-const method is for calling mergeOptions() -- i.e. options escape virtual const FunctionOptions& getResolvedOptions() const { return opts; } virtual UBool isSelectable() const { return false; } + virtual UBool isNullOperand() const { return false; } virtual void selectKeys(const UnicodeString* keys, int32_t keysLen, UnicodeString* prefs, @@ -242,6 +242,10 @@ namespace message2 { FunctionOptions opts; }; // class FunctionValue + class NullValue : public FunctionValue { + public: + virtual UBool isNullOperand() const { return true; } + }; // class NullValue } // namespace message2 diff --git a/icu4c/source/test/intltest/messageformat2test.h b/icu4c/source/test/intltest/messageformat2test.h index 938afd3f30b1..5bee2dc0317e 100644 --- a/icu4c/source/test/intltest/messageformat2test.h +++ b/icu4c/source/test/intltest/messageformat2test.h @@ -114,7 +114,7 @@ class Person : public FormattableObject { class PersonNameFunction : public Function { public: - FunctionValue* call(FunctionValue*, FunctionOptions&&, UErrorCode&) override; + FunctionValue* call(FunctionValue&, FunctionOptions&&, UErrorCode&) override; virtual ~PersonNameFunction(); }; @@ -127,7 +127,7 @@ class PersonNameValue : public FunctionValue { friend class PersonNameFunction; UnicodeString formattedString; - PersonNameValue(FunctionValue*, FunctionOptions&&, UErrorCode&); + PersonNameValue(FunctionValue&, FunctionOptions&&, UErrorCode&); }; // class PersonNameValue class FormattableProperties : public FormattableObject { @@ -144,7 +144,7 @@ class FormattableProperties : public FormattableObject { class GrammarCasesFunction : public Function { public: - FunctionValue* call(FunctionValue*, FunctionOptions&&, UErrorCode&) override; + FunctionValue* call(FunctionValue&, FunctionOptions&&, UErrorCode&) override; static MFFunctionRegistry customRegistry(UErrorCode&); }; @@ -157,13 +157,13 @@ class GrammarCasesValue : public FunctionValue { friend class GrammarCasesFunction; UnicodeString formattedString; - GrammarCasesValue(FunctionValue*, FunctionOptions&&, UErrorCode&); + GrammarCasesValue(FunctionValue&, FunctionOptions&&, UErrorCode&); void getDativeAndGenitive(const UnicodeString&, UnicodeString& result) const; }; // class GrammarCasesValue class ListFunction : public Function { public: - FunctionValue* call(FunctionValue*, FunctionOptions&&, UErrorCode&) override; + FunctionValue* call(FunctionValue&, FunctionOptions&&, UErrorCode&) override; static MFFunctionRegistry customRegistry(UErrorCode&); ListFunction(const Locale& loc) : locale(loc) {} virtual ~ListFunction(); @@ -180,7 +180,7 @@ class ListValue : public FunctionValue { UnicodeString formattedString; ListValue(const Locale&, - FunctionValue*, + FunctionValue&, FunctionOptions&&, UErrorCode&); }; // class ListValue @@ -194,7 +194,7 @@ class NounValue : public FunctionValue { friend class NounFunction; UnicodeString formattedString; - NounValue(FunctionValue*, + NounValue(FunctionValue&, FunctionOptions&&, UErrorCode&); }; // class NounValue @@ -208,7 +208,7 @@ class AdjectiveValue : public FunctionValue { friend class AdjectiveFunction; UnicodeString formattedString; - AdjectiveValue(FunctionValue*, + AdjectiveValue(FunctionValue&, FunctionOptions&&, UErrorCode&); }; // class AdjectiveValue @@ -236,14 +236,14 @@ class ResourceManager : public Formatter { class NounFunction : public Function { public: - FunctionValue* call(FunctionValue*, FunctionOptions&&, UErrorCode&) override; + FunctionValue* call(FunctionValue&, FunctionOptions&&, UErrorCode&) override; NounFunction() { } virtual ~NounFunction(); }; class AdjectiveFunction : public Function { public: - FunctionValue* call(FunctionValue*, FunctionOptions&&, UErrorCode&) override; + FunctionValue* call(FunctionValue&, FunctionOptions&&, UErrorCode&) override; AdjectiveFunction() { } virtual ~AdjectiveFunction(); }; diff --git a/icu4c/source/test/intltest/messageformat2test_custom.cpp b/icu4c/source/test/intltest/messageformat2test_custom.cpp index a86c2278f968..db7adb5852b3 100644 --- a/icu4c/source/test/intltest/messageformat2test_custom.cpp +++ b/icu4c/source/test/intltest/messageformat2test_custom.cpp @@ -265,7 +265,7 @@ static bool hasStringOption(const FunctionOptionsMap& opt, return getStringOption(opt, k) == v; } -FunctionValue* PersonNameFunction::call(FunctionValue* arg, +FunctionValue* PersonNameFunction::call(FunctionValue& arg, FunctionOptions&& opts, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); @@ -282,13 +282,13 @@ UnicodeString PersonNameValue::formatToString(UErrorCode& status) const { return formattedString; } -PersonNameValue::PersonNameValue(FunctionValue* arg, +PersonNameValue::PersonNameValue(FunctionValue& arg, FunctionOptions&& options, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; } - operand = arg->getOperand(); + operand = arg.getOperand(); opts = std::move(options); // Tests don't cover composition, so no need to merge options const Formattable* toFormat = &operand; @@ -379,7 +379,7 @@ PersonNameValue::~PersonNameValue() {} result += postfix; } -FunctionValue* GrammarCasesFunction::call(FunctionValue* arg, +FunctionValue* GrammarCasesFunction::call(FunctionValue& arg, FunctionOptions&& opts, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); @@ -396,14 +396,14 @@ UnicodeString GrammarCasesValue::formatToString(UErrorCode& status) const { return formattedString; } -GrammarCasesValue::GrammarCasesValue(FunctionValue* val, +GrammarCasesValue::GrammarCasesValue(FunctionValue& val, FunctionOptions&& options, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; } - operand = val->getOperand(); + operand = val.getOperand(); opts = std::move(options); // Tests don't cover composition, so no need to merge options const Formattable* toFormat = &operand; @@ -502,7 +502,7 @@ GrammarCasesValue::~GrammarCasesValue() {} See ICU4J: CustomFormatterListTest.java */ -FunctionValue* ListFunction::call(FunctionValue* arg, +FunctionValue* ListFunction::call(FunctionValue& arg, FunctionOptions&& opts, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); @@ -521,14 +521,14 @@ UnicodeString ListValue::formatToString(UErrorCode& errorCode) const { } message2::ListValue::ListValue(const Locale& locale, - FunctionValue* val, + FunctionValue& val, FunctionOptions&& options, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; } - operand = val->getOperand(); + operand = val.getOperand(); opts = std::move(options); // Tests don't cover composition, so no need to merge options const Formattable* toFormat = &operand; @@ -826,7 +826,7 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { } #endif -FunctionValue* NounFunction::call(FunctionValue* arg, +FunctionValue* NounFunction::call(FunctionValue& arg, FunctionOptions&& opts, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); @@ -844,14 +844,14 @@ UnicodeString NounValue::formatToString(UErrorCode& status) const { return formattedString; } -NounValue::NounValue(FunctionValue* arg, +NounValue::NounValue(FunctionValue& arg, FunctionOptions&& options, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; } - operand = arg->getOperand(); + operand = arg.getOperand(); opts = std::move(options); const Formattable* toFormat = &operand; @@ -881,7 +881,7 @@ NounValue::NounValue(FunctionValue* arg, } } -FunctionValue* AdjectiveFunction::call(FunctionValue* arg, +FunctionValue* AdjectiveFunction::call(FunctionValue& arg, FunctionOptions&& opts, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); @@ -899,14 +899,14 @@ UnicodeString AdjectiveValue::formatToString(UErrorCode& status) const { return formattedString; } -AdjectiveValue::AdjectiveValue(FunctionValue* arg, +AdjectiveValue::AdjectiveValue(FunctionValue& arg, FunctionOptions&& options, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; } - operand = arg->getOperand(); + operand = arg.getOperand(); opts = std::move(options); const Formattable* toFormat = &operand; From 0510d0a4460af38dc3097c9352a372a16ddbb3db Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Fri, 4 Oct 2024 15:35:01 -0700 Subject: [PATCH 13/37] Refactor --- icu4c/source/i18n/messageformat2.cpp | 10 +++++----- icu4c/source/i18n/messageformat2_evaluation.h | 13 +++++++------ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index cc24b29d1eb0..0d13b12ada1c 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -70,7 +70,7 @@ static Formattable evalLiteral(const Literal& lit) { } if (rand.isNull()) { - return InternalValue(status); + return InternalValue::null(status); } if (rand.isVariable()) { // Check if it's local or global @@ -98,7 +98,7 @@ static Formattable evalLiteral(const Literal& lit) { // https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution UnicodeString str(DOLLAR); str += var; - return InternalValue(str); + return InternalValue::fallback(str); } return result; } else { @@ -186,7 +186,7 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, if (U_FAILURE(status)) { status = U_ZERO_ERROR; context.getErrors().setUnknownFunction(functionName, status); - return InternalValue(fallbackStr); + return InternalValue::fallback(fallbackStr); } // Calling takeValue() won't error out because we already checked the fallback case // Nullptr represents an absent argument @@ -197,12 +197,12 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, if (status == U_MF_OPERAND_MISMATCH_ERROR) { status = U_ZERO_ERROR; context.getErrors().setOperandMismatchError(functionName, status); - return InternalValue(fallbackStr); + return InternalValue::fallback(fallbackStr); } if (status == U_MF_FORMATTING_ERROR) { status = U_ZERO_ERROR; context.getErrors().setFormattingError(functionName, status); - return InternalValue(fallbackStr); + return InternalValue::fallback(fallbackStr); } if (U_FAILURE(status)) { return {}; diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index 7b2502bb5cd0..17ac053ce3c5 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -56,12 +56,8 @@ namespace message2 { bool isNullOperand() const { return isFallback() ? false : val->isNullOperand(); } bool isSelectable() const; InternalValue() : isFallbackValue(true), fallbackString("") {} - // Null operand constructor - explicit InternalValue(UErrorCode& status); - // Fallback constructor - explicit InternalValue(const UnicodeString& fb) - : isFallbackValue(true), fallbackString(fb) {} - // Fully-evaluated value constructor + static InternalValue null(UErrorCode& status) { return InternalValue(status); } + static InternalValue fallback(const UnicodeString& s) { return InternalValue(s); } explicit InternalValue(FunctionValue* v, const UnicodeString& fb); // Error code is set if this is a fallback FunctionValue* takeValue(UErrorCode& status); @@ -73,6 +69,11 @@ namespace message2 { bool isFallbackValue = false; UnicodeString fallbackString; LocalPointer val; + // Null operand constructor + explicit InternalValue(UErrorCode& status); + // Fallback constructor + explicit InternalValue(const UnicodeString& fb) + : isFallbackValue(true), fallbackString(fb) {} }; // class InternalValue // Used for arguments and literals From 25e563fe943f911a4dfdd68e84d518450cda2eaf Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Fri, 4 Oct 2024 15:38:06 -0700 Subject: [PATCH 14/37] Remove FormattedPlaceholder and FormattedValue (no longer used) --- .../i18n/messageformat2_formattable.cpp | 92 ------ icu4c/source/i18n/unicode/messageformat2.h | 10 +- .../i18n/unicode/messageformat2_formattable.h | 301 ------------------ 3 files changed, 3 insertions(+), 400 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index ee889d41a3cb..e042f6efb846 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -147,98 +147,6 @@ namespace message2 { FormattedMessage::~FormattedMessage() {} - FormattedValue::FormattedValue(const UnicodeString& s) { - type = kString; - stringOutput = std::move(s); - } - - FormattedValue::FormattedValue(number::FormattedNumber&& n) { - type = kNumber; - numberOutput = std::move(n); - } - - FormattedValue& FormattedValue::operator=(FormattedValue&& other) noexcept { - type = other.type; - if (type == kString) { - stringOutput = std::move(other.stringOutput); - } else { - numberOutput = std::move(other.numberOutput); - } - return *this; - } - - FormattedValue::~FormattedValue() {} - - // FormattedPlaceholder - // ------------------- - - - FormattedPlaceholder& FormattedPlaceholder::operator=(FormattedPlaceholder&& other) noexcept { - origin = other.origin; - source = other.source; - formatted = std::move(other.formatted); - previousOptions = std::move(other.previousOptions); - fallback = other.fallback; - return *this; - } - - const Formattable* FormattedPlaceholder::getSource(UErrorCode& errorCode) const { - if (U_SUCCESS(errorCode)) { - if (origin != kNull) { - return &source; - } else { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - } - } - return nullptr; - } - - FormattedPlaceholder FormattedPlaceholder::withResult(FormattedValue&& result) { - formatted = std::move(result); - origin = kFunctionResult; - return std::move(*this); - } - - FormattedPlaceholder FormattedPlaceholder::withResultAndOptions(FormattedValue&& result, - FunctionOptions&& opts, - UErrorCode& status) { - if (U_FAILURE(status)) { - return {}; - } - formatted = std::move(result); - origin = kFunctionResult; - previousOptions = std::move(opts); - return std::move(*this); - } - - FormattedPlaceholder::FormattedPlaceholder(const FormattedPlaceholder& input, - FunctionOptions&& opts, - FormattedValue&& output) - : fallback(input.fallback), - source(input.source), - formatted(std::move(output)), - previousOptions(std::move(opts)), - origin(kFunctionResult) {} - - FormattedPlaceholder::FormattedPlaceholder(const FormattedPlaceholder& input, - FormattedValue&& output) - : fallback(input.fallback), - source(input.source), - formatted(std::move(output)), - origin(kFunctionResult) {} - - FormattedPlaceholder::FormattedPlaceholder(const Formattable& input, - const UnicodeString& fb) - : fallback(fb), source(input), origin(kArgumentOrLiteral) {} - - FormattedPlaceholder::FormattedPlaceholder() : origin(kNull) {} - - const message2::FunctionOptions& FormattedPlaceholder::getOptions() const { - return previousOptions; - } - - FormattedPlaceholder::~FormattedPlaceholder() {} - // Default formatters // ------------------ diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index d4819d3ad867..c58b0d7082e3 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -354,15 +354,11 @@ namespace message2 { void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; [[nodiscard]] InternalValue eval(MessageContext&, InternalValue, UErrorCode&) const; // Dispatches on argument type +#if false +// TODO [[nodiscard]] FunctionName getFormatterNameByType(const FormattedPlaceholder& argument, UErrorCode& status) const; - // Formats a call to a formatting function - // Dispatches on function name - [[nodiscard]] InternalValue apply(const FunctionName& functionName, - FormattedPlaceholder&& argument, - FunctionOptions&& options, - MessageContext& context, - UErrorCode& status) const; +#endif [[nodiscard]] InternalValue formatExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const; [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const; [[nodiscard]] InternalValue formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const; diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index 17354e493ebd..7342622d37c3 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -473,110 +473,6 @@ class U_I18N_API ResolvedFunctionOption : public UObject { }; // class ResolvedFunctionOption #endif - - - // TODO doc comments - // Encapsulates either a formatted string or formatted number; - // more output types could be added in the future. - - /** - * A `FormattedValue` represents the result of formatting a `message2::Formattable`. - * It contains either a string or a formatted number. (More types could be added - * in the future.) - * - * `FormattedValue` is immutable and movable. It is not copyable. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - class U_I18N_API FormattedValue : public UObject { - public: - /** - * Formatted string constructor. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - explicit FormattedValue(const UnicodeString&); - /** - * Formatted number constructor. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - explicit FormattedValue(number::FormattedNumber&&); - /** - * Default constructor. Leaves the FormattedValue in - * a valid but undefined state. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedValue() : type(kString) {} - /** - * Returns true iff this is a formatted string. - * - * @return True if and only if this value is a formatted string. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - bool isString() const { return type == kString; } - /** - * Returns true iff this is a formatted number. - * - * @return True if and only if this value is a formatted number. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - bool isNumber() const { return type == kNumber; } - /** - * Gets the string contents of this value. If !isString(), then - * the result is undefined. - * @return A reference to a formatted string. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const UnicodeString& getString() const { return stringOutput; } - /** - * Gets the number contents of this value. If !isNumber(), then - * the result is undefined. - * @return A reference to a formatted number. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const number::FormattedNumber& getNumber() const { return numberOutput; } - /** - * Move assignment operator: - * The source FormattedValue will be left in a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedValue& operator=(FormattedValue&&) noexcept; - /** - * Move constructor: - * The source FormattedValue will be left in a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedValue(FormattedValue&& other) { *this = std::move(other); } - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~FormattedValue(); - private: - enum Type { - kString, - kNumber - }; - Type type; - UnicodeString stringOutput; - number::FormattedNumber numberOutput; - }; // class FormattedValue - /** * Mapping from option names to `message2::Formattable` objects, obtained * by calling `getOptions()` on a `FunctionOptions` object. @@ -677,203 +573,6 @@ class U_I18N_API FunctionOptions : public UObject { int32_t functionOptionsLen = 0; }; // class FunctionOptions - /** - * A `FormattablePlaceholder` encapsulates an input value (a `message2::Formattable`) - * together with an optional output value (a `message2::FormattedValue`). - * More information, such as source line/column numbers, could be added to the class - * in the future. - * - * `FormattablePlaceholder` is immutable (not deeply immutable) and movable. - * It is not copyable. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - class U_I18N_API FormattedPlaceholder : public UObject { - public: - /** - * Constructor for fully formatted placeholders. - * - * @param input A `FormattedPlaceholder` containing the fallback string and source - * `Formattable` used to construct the formatted value. - * @param output A `FormattedValue` representing the formatted output of `input`. - * Passed by move. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder(const FormattedPlaceholder& input, FormattedValue&& output); - /** - * Constructor for fully formatted placeholders with options. - * - * @param input A `FormattedPlaceholder` containing the fallback string and source - * `Formattable` used to construct the formatted value. - * @param opts Function options that were used to construct `output`. May be the empty map. - * @param output A `FormattedValue` representing the formatted output of `input`. - * Passed by move. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder(const FormattedPlaceholder& input, - FunctionOptions&& opts, - FormattedValue&& output); - /** - * Constructor for unformatted placeholders. - * - * @param input A `Formattable` object. - * @param fb Fallback string to use if an error occurs while formatting the input. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder(const Formattable& input, const UnicodeString& fb); - /** - * Default constructor. Leaves the FormattedPlaceholder in a - * valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder(); - /** - * Returns true iff this FormattedPlaceholder represents a null operand - * (the absence of an operand). - * - * @return A boolean indicating whether this is a null operand. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - UBool isNullOperand() const { return origin == kNull; } - /** - * Returns a pointer to - * the source Formattable value for this placeholder. - * Sets the error code to failure for a null or fallback placeholder. - * - * @return A message2::Formattable value. - * - * @internal ICU 77 technology preview - * @deprecated This API is for technology preview only. - */ - const message2::Formattable* getSource(UErrorCode&) const; - /** - * Returns a reference to the option map for this placeholder. - * - * @return The options map for this placeholder. - * - * @internal ICU 77 technology preview - * @deprecated This API is for technology preview only. - */ - const message2::FunctionOptions& getOptions() const; - /** - * Returns a FormattedPlaceholder with `result` as the result value - * and everything else kept the same. - * `this` cannot be used after calling this method. - * - * @param result FormattedValue to use as the result - * @return A FormattedPlaceholder - * - */ - FormattedPlaceholder withResult(FormattedValue&& result); - /** - * Returns a FormattedPlaceholder with `result` as the result value - * and `options` as the option map - * and everything else kept the same. - * `this` cannot be used after calling this method. - * - * @param result FormattedValue to use as the result - * @param options FunctionOptionsMap to use as the options - * @param errorCode Input/output error code - * @return A FormattedPlaceholder - * - */ - FormattedPlaceholder withResultAndOptions(FormattedValue&& result, - FunctionOptions&& options, - UErrorCode& errorCode); - /** - * Returns true iff this has formatting output. - * - * @return True if and only if this was constructed from both an input `Formattable` and - * output `FormattedValue`. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - bool isFunctionResult() const { return origin == kFunctionResult; } - /** - * Gets the fallback value of this placeholder, to be used in its place if an error occurs while - * formatting it. - * @return A reference to this placeholder's fallback string. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const UnicodeString& getFallback() const { return fallback; } - /** - * Returns the formatted output of this placeholder. The result is undefined if !isEvaluated(). - * @return A fully formatted `FormattedPlaceholder`. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const FormattedValue& output() const { return formatted; } - /** - * Move assignment operator: - * The source FormattedPlaceholder will be left in a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder& operator=(FormattedPlaceholder&&) noexcept; - /** - * Move constructor: - * The source FormattedPlaceholder will be left in a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder(FormattedPlaceholder&& other) { *this = std::move(other); } - /** - * Formats this as a string, using defaults. If this is - * either the null operand or is a fallback value, the return value is the result of formatting the - * fallback value (which is the default fallback string if this is the null operand). - * If there is no formatted output and the input is object- or array-typed, - * then the argument is treated as a fallback value, since there is no default formatter - * for objects or arrays. - * - * @param locale The locale to use for formatting numbers or dates - * @param status Input/output error code - * @return The result of formatting this placeholder. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - UnicodeString formatToString(const Locale& locale, - UErrorCode& status) const; - /** - * Destructor. - * - * @internal ICU 77 technology preview - * @deprecated This API is for ICU internal use only. - */ - virtual ~FormattedPlaceholder(); - private: - friend class MessageFormatter; - - enum Origin { - kNull, // Represents the operand of an expression with no syntactic operand - // (Functions can be nullary in MF2 but the C++ representations must - // take an argument, hence we need a representation for "no argument") - kArgumentOrLiteral, // Represents a `Formattable` originating from an argument or literal - kFunctionResult, // Represents the result of applying a function to another - // FormattedPlaceholder - }; - UnicodeString fallback; - Formattable source; - FormattedValue formatted; - FunctionOptions previousOptions; // Ignored unless type is kEvaluated - Origin origin; - }; // class FormattedPlaceholder - /** * Not yet implemented: The result of a message formatting operation. Based on * ICU4J's FormattedMessage.java. From 243bafed4bad3aa31e65d2b815841b04785c4f0c Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Fri, 4 Oct 2024 16:17:51 -0700 Subject: [PATCH 15/37] Re-add default formatters by type --- icu4c/source/i18n/messageformat2.cpp | 110 ++++++++++++------ .../source/i18n/messageformat2_evaluation.cpp | 12 ++ icu4c/source/i18n/messageformat2_evaluation.h | 20 +--- .../source/i18n/messageformat2_formatter.cpp | 9 ++ .../i18n/messageformat2_function_registry.cpp | 45 ++++++- icu4c/source/i18n/unicode/messageformat2.h | 10 +- .../messageformat2_function_registry.h | 36 +++++- .../test/intltest/messageformat2test.cpp | 13 +-- 8 files changed, 183 insertions(+), 72 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index 0d13b12ada1c..f26a1481f3f6 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -146,6 +146,50 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, return FunctionOptions(std::move(*optionsVector), status); } +[[nodiscard]] InternalValue MessageFormatter::apply(const FunctionName& functionName, + InternalValue&& rand, + FunctionOptions&& options, + MessageContext& context, + UErrorCode& status) const { + EMPTY_ON_ERROR(status); + + UnicodeString fallbackStr; + + if (rand.isNullOperand()) { + fallbackStr = UnicodeString(COLON); + fallbackStr += functionName; + } else { + fallbackStr = rand.asFallback(); + } + + // Call the function + Function* function = lookupFunction(functionName, status); + if (U_FAILURE(status)) { + status = U_ZERO_ERROR; + context.getErrors().setUnknownFunction(functionName, status); + return InternalValue::fallback(fallbackStr); + } + // Calling takeValue() won't error out because we already checked the fallback case + LocalPointer functionArg(rand.takeValue(status)); + U_ASSERT(U_SUCCESS(status)); + LocalPointer + functionResult(function->call(*functionArg, std::move(options), status)); + if (status == U_MF_OPERAND_MISMATCH_ERROR) { + status = U_ZERO_ERROR; + context.getErrors().setOperandMismatchError(functionName, status); + return InternalValue::fallback(fallbackStr); + } + if (status == U_MF_FORMATTING_ERROR) { + status = U_ZERO_ERROR; + context.getErrors().setFormattingError(functionName, status); + return InternalValue::fallback(fallbackStr); + } + if (U_FAILURE(status)) { + return {}; + } + return InternalValue(functionResult.orphan(), fallbackStr); +} + // Formats an expression using `globalEnv` for the values of variables [[nodiscard]] InternalValue MessageFormatter::formatExpression(const Environment& globalEnv, const Expression& expr, @@ -160,7 +204,34 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, InternalValue randVal = formatOperand(globalEnv, rand, context, status); if (!expr.isFunctionCall()) { - return randVal; + const FunctionValue* contained = randVal.getValue(status); + if (U_FAILURE(status)) { + // Fallback or null -- no implicit formatter + status = U_ZERO_ERROR; + return randVal; + } + const Formattable& toFormat = contained->getOperand(); + switch (toFormat.getType()) { + case UFMT_OBJECT: { + const FormattableObject* obj = toFormat.getObject(status); + U_ASSERT(U_SUCCESS(status)); + U_ASSERT(obj != nullptr); + const UnicodeString& type = obj->tag(); + FunctionName functionName; + if (!getDefaultFormatterNameByType(type, functionName)) { + // No formatter for this type -- follow default behavior + return randVal; + } + return apply(functionName, + std::move(randVal), + FunctionOptions(), + context, + status); + } + default: + // Other types not handled + return randVal; + } } else { // Don't call the function on error values if (randVal.isFallback()) { @@ -173,41 +244,8 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, // Resolve the options FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status); - UnicodeString fallbackStr; - if (rand.isNull()) { - fallbackStr = UnicodeString(COLON); - fallbackStr += functionName; - } else { - fallbackStr = randVal.asFallback(); - } - - // Call the function - Function* function = lookupFunction(functionName, status); - if (U_FAILURE(status)) { - status = U_ZERO_ERROR; - context.getErrors().setUnknownFunction(functionName, status); - return InternalValue::fallback(fallbackStr); - } - // Calling takeValue() won't error out because we already checked the fallback case - // Nullptr represents an absent argument - LocalPointer functionArg(randVal.takeValue(status)); - U_ASSERT(U_SUCCESS(status)); - LocalPointer functionResult( - function->call(*functionArg, std::move(resolvedOptions), status)); - if (status == U_MF_OPERAND_MISMATCH_ERROR) { - status = U_ZERO_ERROR; - context.getErrors().setOperandMismatchError(functionName, status); - return InternalValue::fallback(fallbackStr); - } - if (status == U_MF_FORMATTING_ERROR) { - status = U_ZERO_ERROR; - context.getErrors().setFormattingError(functionName, status); - return InternalValue::fallback(fallbackStr); - } - if (U_FAILURE(status)) { - return {}; - } - return InternalValue(functionResult.orphan(), fallbackStr); + return apply(functionName, + std::move(randVal), std::move(resolvedOptions), context, status); } } diff --git a/icu4c/source/i18n/messageformat2_evaluation.cpp b/icu4c/source/i18n/messageformat2_evaluation.cpp index 98a7843f09af..b09ec3fa634d 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.cpp +++ b/icu4c/source/i18n/messageformat2_evaluation.cpp @@ -235,6 +235,18 @@ FunctionValue* InternalValue::takeValue(UErrorCode& status) { return val.orphan(); } +const FunctionValue* InternalValue::getValue(UErrorCode& status) const { + if (U_FAILURE(status)) { + return {}; + } + if (isFallback()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return {}; + } + U_ASSERT(val.isValid()); + return val.getAlias(); +} + bool InternalValue::isSelectable() const { if (isFallbackValue) { return false; diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index 17ac053ce3c5..08170b1a80d1 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -32,24 +32,9 @@ namespace message2 { using namespace data_model; - // InternalValue tracks a value along with, possibly, a function that needs - // to be applied to it in the future (once the value is required - // (by a .match or pattern, or another function)); - // while FormattedPlaceholder tracks a value and how it was constructed in the - // past (by a function, or from a literal or argument). - // InternalValue represents an intermediate value in the message - // formatter. An InternalValue can either be a fallback value (representing - // an error that occurred during formatting); a "suspension", meaning a function - // call that has yet to be fully resolved; or a fully-resolved FormattedPlaceholder. - // The "suspension" state is used in implementing selection; in a message like: - // .local $x = {1 :number} - // .match $x - // [...] - // $x can't be bound to a fully formatted value; the annotation needs to be - // preserved until the .match is evaluated. Moreover, any given function could - // be both a formatter and a selector, and it's ambiguous which one it's intended - // to be until the body of the message is processed. + // formatter. +// TODO class InternalValue : public UObject { public: bool isFallback() const { return isFallbackValue; } @@ -61,6 +46,7 @@ namespace message2 { explicit InternalValue(FunctionValue* v, const UnicodeString& fb); // Error code is set if this is a fallback FunctionValue* takeValue(UErrorCode& status); + const FunctionValue* getValue(UErrorCode& status) const; UnicodeString asFallback() const { return fallbackString; } virtual ~InternalValue(); InternalValue& operator=(InternalValue&&); diff --git a/icu4c/source/i18n/messageformat2_formatter.cpp b/icu4c/source/i18n/messageformat2_formatter.cpp index 819ff4a6a353..cf808fa26654 100644 --- a/icu4c/source/i18n/messageformat2_formatter.cpp +++ b/icu4c/source/i18n/messageformat2_formatter.cpp @@ -242,6 +242,15 @@ namespace message2 { return nullptr; } + bool MessageFormatter::getDefaultFormatterNameByType(const UnicodeString& tag, + FunctionName& result) const { + if (hasCustomMFFunctionRegistry()) { + const MFFunctionRegistry& customMFFunctionRegistry = getCustomMFFunctionRegistry(); + return customMFFunctionRegistry.getDefaultFormatterNameByType(tag, result); + } + return false; + } + bool MessageFormatter::isCustomFunction(const FunctionName& fn) const { return hasCustomMFFunctionRegistry() && getCustomMFFunctionRegistry().getFunction(fn) != nullptr; } diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index a48ab31948e9..31defd097782 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -43,8 +43,10 @@ FunctionValue::~FunctionValue() {} MFFunctionRegistry MFFunctionRegistry::Builder::build() { U_ASSERT(functions != nullptr); - MFFunctionRegistry result = MFFunctionRegistry(functions); + U_ASSERT(formattersByType != nullptr); + MFFunctionRegistry result = MFFunctionRegistry(functions, formattersByType); functions = nullptr; + formattersByType = nullptr; return result; } @@ -58,14 +60,29 @@ MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::adoptFunction(const Fu return *this; } +MFFunctionRegistry::Builder& +MFFunctionRegistry::Builder::setDefaultFormatterNameByType(const UnicodeString& type, + const FunctionName& functionName, + UErrorCode& errorCode) { + if (U_SUCCESS(errorCode)) { + U_ASSERT(formattersByType != nullptr); + FunctionName* f = create(FunctionName(functionName), errorCode); + formattersByType->put(type, f, errorCode); + } + return *this; + } + MFFunctionRegistry::Builder::Builder(UErrorCode& errorCode) { CHECK_ERROR(errorCode); functions = new Hashtable(); - if (functions == nullptr) { + formattersByType = new Hashtable(); + if (functions == nullptr || formattersByType == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; } + functions->setValueDeleter(uprv_deleteUObject); + formattersByType->setValueDeleter(uprv_deleteUObject); } MFFunctionRegistry::Builder::~Builder() { @@ -73,6 +90,10 @@ MFFunctionRegistry::Builder::~Builder() { delete functions; functions = nullptr; } + if (formattersByType != nullptr) { + delete formattersByType; + formattersByType = nullptr; + } } // Returns non-owned pointer. Returns pointer rather than reference because it can fail. @@ -82,6 +103,16 @@ Function* MFFunctionRegistry::getFunction(const FunctionName& functionName) cons return static_cast(functions->get(functionName)); } +UBool MFFunctionRegistry::getDefaultFormatterNameByType(const UnicodeString& type, FunctionName& name) const { + U_ASSERT(formattersByType != nullptr); + const FunctionName* f = static_cast(formattersByType->get(type)); + if (f != nullptr) { + name = *f; + return true; + } + return false; +} + bool MFFunctionRegistry::hasFunction(const FunctionName& f) const { return getFunction(f) != nullptr; } @@ -157,8 +188,10 @@ static int64_t getInt64Value(const Locale& locale, const Formattable& value, UEr } // Adopts its argument -MFFunctionRegistry::MFFunctionRegistry(FunctionMap* f) : functions(f) { +MFFunctionRegistry::MFFunctionRegistry(FunctionMap* f, Hashtable* byType) + : functions(f), formattersByType(byType) { U_ASSERT(f != nullptr); + U_ASSERT(byType != nullptr); } MFFunctionRegistry& MFFunctionRegistry::operator=(MFFunctionRegistry&& other) noexcept { @@ -166,6 +199,8 @@ MFFunctionRegistry& MFFunctionRegistry::operator=(MFFunctionRegistry&& other) no functions = other.functions; other.functions = nullptr; + formattersByType = other.formattersByType; + other.formattersByType = nullptr; return *this; } @@ -175,6 +210,10 @@ void MFFunctionRegistry::cleanup() noexcept { delete functions; functions = nullptr; } + if (formattersByType != nullptr) { + delete formattersByType; + formattersByType = nullptr; + } } diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index c58b0d7082e3..09ad2173daec 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -352,13 +352,8 @@ namespace message2 { // Formatting methods [[nodiscard]] InternalValue formatLiteral(const data_model::Literal&, UErrorCode&) const; void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; - [[nodiscard]] InternalValue eval(MessageContext&, InternalValue, UErrorCode&) const; - // Dispatches on argument type -#if false -// TODO - [[nodiscard]] FunctionName getFormatterNameByType(const FormattedPlaceholder& argument, - UErrorCode& status) const; -#endif + [[nodiscard]] InternalValue apply(const FunctionName&, InternalValue&&, FunctionOptions&&, + MessageContext&, UErrorCode&) const; [[nodiscard]] InternalValue formatExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const; [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const; [[nodiscard]] InternalValue formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const; @@ -380,6 +375,7 @@ namespace message2 { bool isFunction(const FunctionName& fn) const { return isBuiltInFunction(fn) || isCustomFunction(fn); } void setNotSelectableError(MessageContext&, const InternalValue&, UErrorCode&) const; Function* lookupFunction(const FunctionName&, UErrorCode&) const; + bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const; // Checking for resolution errors void checkDeclarations(MessageContext&, Environment*&, UErrorCode&) const; diff --git a/icu4c/source/i18n/unicode/messageformat2_function_registry.h b/icu4c/source/i18n/unicode/messageformat2_function_registry.h index 8670594b40b7..81aa1f452ffa 100644 --- a/icu4c/source/i18n/unicode/messageformat2_function_registry.h +++ b/icu4c/source/i18n/unicode/messageformat2_function_registry.h @@ -60,6 +60,20 @@ namespace message2 { * @deprecated This API is for technology preview only. */ Function* getFunction(const FunctionName& functionName) const; + /** + * Looks up a function by a type tag. This method gets the name of the default formatter registered + * for that type. If no formatter was explicitly registered for this type, it returns false. + * + * @param formatterType Type tag for the desired `FormattableObject` type to be formatted. + * @param name Output parameter; initialized to the name of the default formatter for `formatterType` + * if one has been registered. Its value is undefined otherwise. + * @return True if and only if the function registry contains a default formatter for `formatterType`. + * If the return value is false, then the value of `name` is undefined. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + UBool getDefaultFormatterNameByType(const UnicodeString& formatterType, FunctionName& name) const; /** * The mutable Builder class allows each formatter and selector factory * to be initialized separately; calling its `build()` method yields an @@ -74,6 +88,8 @@ namespace message2 { private: // Must use raw pointers to avoid instantiating `LocalPointer` on an internal type FunctionMap* functions; + // Mapping from strings (type tags) to FunctionNames + Hashtable* formattersByType = nullptr; // Do not define copy constructor/assignment operator Builder& operator=(const Builder&) = delete; @@ -111,6 +127,22 @@ namespace message2 { Builder& adoptFunction(const data_model::FunctionName& functionName, Function* function, UErrorCode& errorCode); + /** + * Registers a formatter factory to a given type tag. + * (See `FormattableObject` for details on type tags.) + * + * @param type Tag for objects to be formatted with this formatter. + * @param functionName A reference to the name of the function to use for + * creating formatters for `formatterType` objects. + * @param errorCode Input/output error code + * @return A reference to the builder. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& setDefaultFormatterNameByType(const UnicodeString& type, + const data_model::FunctionName& functionName, + UErrorCode& errorCode); /** * Creates an immutable `MFFunctionRegistry` object with the selectors and formatters * that were previously registered. The builder cannot be used after this call. @@ -176,7 +208,7 @@ namespace message2 { MFFunctionRegistry& operator=(const MFFunctionRegistry&) = delete; MFFunctionRegistry(const MFFunctionRegistry&) = delete; - MFFunctionRegistry(FunctionMap* f); + MFFunctionRegistry(FunctionMap*, Hashtable*); MFFunctionRegistry() {} @@ -190,6 +222,8 @@ namespace message2 { // Must use raw pointers to avoid instantiating `LocalPointer` on an internal type FunctionMap* functions = nullptr; + // Mapping from strings (type tags) to FunctionNames + Hashtable* formattersByType = nullptr; }; // class MFFunctionRegistry class FunctionValue; diff --git a/icu4c/source/test/intltest/messageformat2test.cpp b/icu4c/source/test/intltest/messageformat2test.cpp index 8ed6b27d88ae..0c42c83f0ecd 100644 --- a/icu4c/source/test/intltest/messageformat2test.cpp +++ b/icu4c/source/test/intltest/messageformat2test.cpp @@ -317,16 +317,14 @@ void TestMessageFormat2::testAPICustomFunctions() { assertEquals("testAPICustomFunctions", "Hello Mr. John Doe", result); // By type -// TODO -/* MFFunctionRegistry::Builder builderByType(errorCode); - FunctionName personFormatterName("person"); + FunctionName personFunctionName("person"); MFFunctionRegistry functionRegistryByType = - builderByType.adoptFormatter(personFormatterName, - new PersonNameFormatterFactory(), - errorCode) + builderByType.adoptFunction(personFunctionName, + new PersonNameFunction(), + errorCode) .setDefaultFormatterNameByType("person", - personFormatterName, + personFunctionName, errorCode) .build(); mfBuilder.setFunctionRegistry(functionRegistryByType); @@ -338,7 +336,6 @@ void TestMessageFormat2::testAPICustomFunctions() { // Expect "Hello John" because in the custom function we registered, // "informal" is the default formality and "length" is the default length assertEquals("testAPICustomFunctions", "Hello John", result); -*/ delete person; } From acafbc6cea610025044c1b656eadda82a8573205 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Fri, 4 Oct 2024 16:38:05 -0700 Subject: [PATCH 16/37] Rename functions --- icu4c/source/i18n/messageformat2.cpp | 60 +++++++++++----------- icu4c/source/i18n/unicode/messageformat2.h | 6 +-- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index f26a1481f3f6..0620097fa147 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -26,11 +26,6 @@ using namespace data_model; // ------------------------------------------------------ // Formatting -// The result of formatting a literal is just itself. -static Formattable evalLiteral(const Literal& lit) { - return Formattable(lit.unquoted()); -} - // Assumes that `var` is a message argument; returns the argument's value. [[nodiscard]] InternalValue MessageFormatter::evalArgument(const VariableName& var, MessageContext& context, @@ -39,9 +34,12 @@ static Formattable evalLiteral(const Literal& lit) { // The fallback for a variable name is itself. UnicodeString str(DOLLAR); str += var; + // Look up the variable in the global environment const Formattable* val = context.getGlobal(var, errorCode); if (U_SUCCESS(errorCode)) { + // If it exists, create a BaseValue (FunctionValue) for it LocalPointer result(BaseValue::create(locale, *val, errorCode)); + // Add fallback and return an InternalValue if (U_SUCCESS(errorCode)) { return InternalValue(result.orphan(), str); } @@ -51,20 +49,23 @@ static Formattable evalLiteral(const Literal& lit) { } // Returns the contents of the literal -[[nodiscard]] InternalValue MessageFormatter::formatLiteral(const Literal& lit, - UErrorCode& errorCode) const { - // The fallback for a literal is itself. - LocalPointer val(BaseValue::create(locale, evalLiteral(lit), errorCode)); +[[nodiscard]] InternalValue MessageFormatter::evalLiteral(const Literal& lit, + UErrorCode& errorCode) const { + // Create a BaseValue (FunctionValue) that wraps the literal + LocalPointer val(BaseValue::create(locale, + Formattable(lit.unquoted()), + errorCode)); if (U_SUCCESS(errorCode)) { + // The fallback for a literal is itself. return InternalValue(val.orphan(), lit.quoted()); } return {}; } -[[nodiscard]] InternalValue MessageFormatter::formatOperand(const Environment& env, - const Operand& rand, - MessageContext& context, - UErrorCode &status) const { +[[nodiscard]] InternalValue MessageFormatter::evalOperand(const Environment& env, + const Operand& rand, + MessageContext& context, + UErrorCode &status) const { if (U_FAILURE(status)) { return {}; } @@ -76,17 +77,16 @@ static Formattable evalLiteral(const Literal& lit) { // Check if it's local or global // Note: there is no name shadowing; this is enforced by the parser const VariableName& var = rand.asVariable(); - // TODO: Currently, this code implements lazy evaluation of locals. + // Currently, this code implements lazy evaluation of locals. // That is, the environment binds names to a closure, not a resolved value. - // Eager vs. lazy evaluation is an open issue: - // see https://github.com/unicode-org/message-format-wg/issues/299 + // The spec does not require either eager or lazy evaluation. // Look up the variable in the environment if (env.has(var)) { // `var` is a local -- look it up const Closure& rhs = env.lookup(var); - // Format the expression using the environment from the closure - return formatExpression(rhs.getEnv(), rhs.getExpr(), context, status); + // Evaluate the expression using the environment from the closure + return evalExpression(rhs.getEnv(), rhs.getExpr(), context, status); } // Variable wasn't found in locals -- check if it's global InternalValue result = evalArgument(var, context, status); @@ -103,7 +103,7 @@ static Formattable evalLiteral(const Literal& lit) { return result; } else { U_ASSERT(rand.isLiteral()); - return formatLiteral(rand.asLiteral(), status); + return evalLiteral(rand.asLiteral(), status); } } @@ -125,14 +125,11 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const UnicodeString& k = opt.getName(); const Operand& v = opt.getValue(); - // Options are fully evaluated before calling the function // Format the operand - InternalValue rhsVal = formatOperand(env, v, context, status); + InternalValue rhsVal = evalOperand(env, v, context, status); if (U_FAILURE(status)) { return {}; } - // Force evaluation in order to extract a FormattedPlaceholder - // from `rhsVal` (which might be a suspension) FunctionValue* optVal = rhsVal.takeValue(status); // The option is resolved; add it to the vector @@ -191,7 +188,7 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, } // Formats an expression using `globalEnv` for the values of variables -[[nodiscard]] InternalValue MessageFormatter::formatExpression(const Environment& globalEnv, +[[nodiscard]] InternalValue MessageFormatter::evalExpression(const Environment& globalEnv, const Expression& expr, MessageContext& context, UErrorCode &status) const { @@ -200,8 +197,8 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, } const Operand& rand = expr.getOperand(); - // Format the operand (formatOperand handles the case of a null operand) - InternalValue randVal = formatOperand(globalEnv, rand, context, status); + // Evaluate the operand (evalOperand handles the case of a null operand) + InternalValue randVal = evalOperand(globalEnv, rand, context, status); if (!expr.isFunctionCall()) { const FunctionValue* contained = randVal.getValue(status); @@ -250,7 +247,10 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, } // Formats each text and expression part of a pattern, appending the results to `result` -void MessageFormatter::formatPattern(MessageContext& context, const Environment& globalEnv, const Pattern& pat, UErrorCode &status, UnicodeString& result) const { +void MessageFormatter::formatPattern(MessageContext& context, + const Environment& globalEnv, + const Pattern& pat, + UErrorCode &status, UnicodeString& result) const { CHECK_ERROR(status); for (int32_t i = 0; i < pat.numParts(); i++) { @@ -261,7 +261,7 @@ void MessageFormatter::formatPattern(MessageContext& context, const Environment& // Markup is ignored } else { // Format the expression - InternalValue partVal = formatExpression(globalEnv, part.contents(), context, status); + InternalValue partVal = evalExpression(globalEnv, part.contents(), context, status); if (partVal.isFallback()) { result += LEFT_CURLY_BRACE; result += partVal.asFallback(); @@ -301,7 +301,7 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme // 2. For each expression exp of the message's selectors for (int32_t i = 0; i < dataModel.numSelectors(); i++) { // 2i. Let rv be the resolved value of exp. - InternalValue rv = formatExpression(env, selectors[i], context, status); + InternalValue rv = evalExpression(env, selectors[i], context, status); if (rv.isSelectable()) { // 2ii. If selection is supported for rv: // (True if this code has been reached) @@ -361,7 +361,7 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, // Call the selector // Already checked for fallback, so it's safe to call takeValue() - LocalPointer rvVal(rv.takeValue(status)); + LocalPointer rvVal(rv.takeValue(status)); rvVal->selectKeys(adoptedKeys.getAlias(), keysLen, adoptedPrefs.getAlias(), prefsLen, status); diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index 09ad2173daec..e3497c23bc52 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -350,13 +350,13 @@ namespace message2 { void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const; // Formatting methods - [[nodiscard]] InternalValue formatLiteral(const data_model::Literal&, UErrorCode&) const; + [[nodiscard]] InternalValue evalLiteral(const data_model::Literal&, UErrorCode&) const; void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; [[nodiscard]] InternalValue apply(const FunctionName&, InternalValue&&, FunctionOptions&&, MessageContext&, UErrorCode&) const; - [[nodiscard]] InternalValue formatExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const; + [[nodiscard]] InternalValue evalExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const; [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const; - [[nodiscard]] InternalValue formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const; + [[nodiscard]] InternalValue evalOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const; [[nodiscard]] InternalValue evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const; void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const; From 037b027bdc269bd7420f25592213509d1537a00e Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Fri, 4 Oct 2024 16:44:41 -0700 Subject: [PATCH 17/37] Comments --- icu4c/source/i18n/messageformat2.cpp | 43 +++++++++++++++++++++------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index 0620097fa147..9843c9920e7d 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -70,9 +70,13 @@ using namespace data_model; return {}; } + // Three cases: absent operand; variable; or literal + + // Absent (null) operand if (rand.isNull()) { return InternalValue::null(status); } + // Variable reference if (rand.isVariable()) { // Check if it's local or global // Note: there is no name shadowing; this is enforced by the parser @@ -100,8 +104,11 @@ using namespace data_model; str += var; return InternalValue::fallback(str); } + // Looking up the global variable succeeded; return it return result; - } else { + } + // Literal + else { U_ASSERT(rand.isLiteral()); return evalLiteral(rand.asLiteral(), status); } @@ -112,11 +119,13 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const OptionMap& options, MessageContext& context, UErrorCode& status) const { + // Create a vector of options LocalPointer optionsVector(createUVector(status)); if (U_FAILURE(status)) { return {}; } LocalPointer resolvedOpt; + // For each option... for (int i = 0; i < options.size(); i++) { const Option& opt = options.getOption(i, status); if (U_FAILURE(status)) { @@ -125,11 +134,12 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const UnicodeString& k = opt.getName(); const Operand& v = opt.getValue(); - // Format the operand + // ...evaluate its right-hand side... InternalValue rhsVal = evalOperand(env, v, context, status); if (U_FAILURE(status)) { return {}; } + // ...giving a FunctionValue. FunctionValue* optVal = rhsVal.takeValue(status); // The option is resolved; add it to the vector @@ -139,10 +149,12 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, EMPTY_ON_ERROR(status); optionsVector->adoptElement(p.orphan(), status); } - + // Return a new FunctionOptions constructed from the vector of options return FunctionOptions(std::move(*optionsVector), status); } +// Looks up `functionName` and applies it to an operand and options, +// handling errors if the function is unbound [[nodiscard]] InternalValue MessageFormatter::apply(const FunctionName& functionName, InternalValue&& rand, FunctionOptions&& options, @@ -152,6 +164,7 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, UnicodeString fallbackStr; + // Create the fallback string for this function call if (rand.isNullOperand()) { fallbackStr = UnicodeString(COLON); fallbackStr += functionName; @@ -159,18 +172,22 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, fallbackStr = rand.asFallback(); } - // Call the function + // Look up the function name Function* function = lookupFunction(functionName, status); if (U_FAILURE(status)) { + // Function is unknown -- set error and use the fallback value status = U_ZERO_ERROR; context.getErrors().setUnknownFunction(functionName, status); return InternalValue::fallback(fallbackStr); } - // Calling takeValue() won't error out because we already checked the fallback case + // Value is not a fallback, so we can safely call takeValue() LocalPointer functionArg(rand.takeValue(status)); U_ASSERT(U_SUCCESS(status)); + // Call the function LocalPointer functionResult(function->call(*functionArg, std::move(options), status)); + // Handle any errors signaled by the function + // (and use the fallback value) if (status == U_MF_OPERAND_MISMATCH_ERROR) { status = U_ZERO_ERROR; context.getErrors().setOperandMismatchError(functionName, status); @@ -184,14 +201,15 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, if (U_FAILURE(status)) { return {}; } + // Success; return the result return InternalValue(functionResult.orphan(), fallbackStr); } -// Formats an expression using `globalEnv` for the values of variables +// Evaluates an expression using `globalEnv` for the values of variables [[nodiscard]] InternalValue MessageFormatter::evalExpression(const Environment& globalEnv, - const Expression& expr, - MessageContext& context, - UErrorCode &status) const { + const Expression& expr, + MessageContext& context, + UErrorCode &status) const { if (U_FAILURE(status)) { return {}; } @@ -200,6 +218,7 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, // Evaluate the operand (evalOperand handles the case of a null operand) InternalValue randVal = evalOperand(globalEnv, rand, context, status); + // If there's no function, we check for an implicit formatter if (!expr.isFunctionCall()) { const FunctionValue* contained = randVal.getValue(status); if (U_FAILURE(status)) { @@ -208,6 +227,7 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, return randVal; } const Formattable& toFormat = contained->getOperand(); + // If it has an object type, there might be an implicit formatter for it... switch (toFormat.getType()) { case UFMT_OBJECT: { const FormattableObject* obj = toFormat.getObject(status); @@ -219,6 +239,7 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, // No formatter for this type -- follow default behavior return randVal; } + // ... apply the implicit formatter return apply(functionName, std::move(randVal), FunctionOptions(), @@ -226,7 +247,7 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, status); } default: - // Other types not handled + // No formatters for other types, so just return the evaluated operand return randVal; } } else { @@ -234,6 +255,7 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, if (randVal.isFallback()) { return randVal; } + // Get the function name and options from the operator const Operator* rator = expr.getOperator(status); U_ASSERT(U_SUCCESS(status)); const FunctionName& functionName = rator->getFunctionName(); @@ -241,6 +263,7 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, // Resolve the options FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status); + // Call the function with the operand and arguments return apply(functionName, std::move(randVal), std::move(resolvedOptions), context, status); } From f82c921ab8d1278f299f9275574ee2102571b8cc Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Mon, 7 Oct 2024 12:52:15 -0700 Subject: [PATCH 18/37] Bring back FunctionFactory --- icu4c/source/i18n/messageformat2.cpp | 3 +- .../source/i18n/messageformat2_formatter.cpp | 36 +++-- .../i18n/messageformat2_function_registry.cpp | 147 ++++++++++++++---- ...essageformat2_function_registry_internal.h | 66 ++++++-- icu4c/source/i18n/unicode/messageformat2.h | 3 +- .../messageformat2_function_registry.h | 52 ++++++- .../test/intltest/messageformat2test.cpp | 6 +- .../source/test/intltest/messageformat2test.h | 30 ++++ .../intltest/messageformat2test_custom.cpp | 88 ++++++++++- 9 files changed, 357 insertions(+), 74 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index 9843c9920e7d..067291a80c36 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -173,13 +173,14 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, } // Look up the function name - Function* function = lookupFunction(functionName, status); + FunctionFactory* functionFactory = lookupFunctionFactory(functionName, status); if (U_FAILURE(status)) { // Function is unknown -- set error and use the fallback value status = U_ZERO_ERROR; context.getErrors().setUnknownFunction(functionName, status); return InternalValue::fallback(fallbackStr); } + LocalPointer function(functionFactory->createFunction(locale, status)); // Value is not a fallback, so we can safely call takeValue() LocalPointer functionArg(rand.takeValue(status)); U_ASSERT(U_SUCCESS(status)); diff --git a/icu4c/source/i18n/messageformat2_formatter.cpp b/icu4c/source/i18n/messageformat2_formatter.cpp index cf808fa26654..e73a8ec827f0 100644 --- a/icu4c/source/i18n/messageformat2_formatter.cpp +++ b/icu4c/source/i18n/messageformat2_formatter.cpp @@ -122,18 +122,23 @@ namespace message2 { // Set up the standard function registry MFFunctionRegistry::Builder standardFunctionsBuilder(success); - Function* dateTime = StandardFunctions::DateTime::dateTime(locale, success); - Function* date = StandardFunctions::DateTime::date(locale, success); - Function* time = StandardFunctions::DateTime::time(locale, success); - standardFunctionsBuilder.adoptFunction(FunctionName(UnicodeString("datetime")), dateTime, success) - .adoptFunction(FunctionName(UnicodeString("date")), date, success) - .adoptFunction(FunctionName(UnicodeString("time")), time, success) - .adoptFunction(FunctionName(UnicodeString("number")), - StandardFunctions::Number::number(locale, success), success) - .adoptFunction(FunctionName(UnicodeString("integer")), - StandardFunctions::Number::integer(locale, success), success) - .adoptFunction(FunctionName(UnicodeString("string")), - StandardFunctions::String::string(locale, success), success); + LocalPointer dateTime(StandardFunctions::DateTimeFactory::dateTime(success)); + LocalPointer date(StandardFunctions::DateTimeFactory::date(success)); + LocalPointer time(StandardFunctions::DateTimeFactory::time(success)); + LocalPointer number(StandardFunctions::NumberFactory::number(success)); + LocalPointer integer(StandardFunctions::NumberFactory::integer(success)); + LocalPointer string(StandardFunctions::StringFactory::string(success)); + CHECK_ERROR(success); + standardFunctionsBuilder.adoptFunctionFactory(FunctionName(UnicodeString("datetime")), + dateTime.orphan(), success) + .adoptFunctionFactory(FunctionName(UnicodeString("date")), date.orphan(), success) + .adoptFunctionFactory(FunctionName(UnicodeString("time")), time.orphan(), success) + .adoptFunctionFactory(FunctionName(UnicodeString("number")), + number.orphan(), success) + .adoptFunctionFactory(FunctionName(UnicodeString("integer")), + integer.orphan(), success) + .adoptFunctionFactory(FunctionName(UnicodeString("string")), + string.orphan(), success); CHECK_ERROR(success); standardMFFunctionRegistry = standardFunctionsBuilder.build(); CHECK_ERROR(success); @@ -220,8 +225,9 @@ namespace message2 { return standardMFFunctionRegistry.hasFunction(functionName); } - Function* MessageFormatter::lookupFunction(const FunctionName& functionName, - UErrorCode& status) const { + FunctionFactory* + MessageFormatter::lookupFunctionFactory(const FunctionName& functionName, + UErrorCode& status) const { NULL_ON_ERROR(status); if (isBuiltInFunction(functionName)) { @@ -229,7 +235,7 @@ namespace message2 { } if (hasCustomMFFunctionRegistry()) { const MFFunctionRegistry& customMFFunctionRegistry = getCustomMFFunctionRegistry(); - Function* function = customMFFunctionRegistry.getFunction(functionName); + FunctionFactory* function = customMFFunctionRegistry.getFunction(functionName); if (function != nullptr) { return function; } diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 31defd097782..da6622b2d073 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -38,6 +38,7 @@ namespace message2 { // Function registry implementation +FunctionFactory::~FunctionFactory() {} Function::~Function() {} FunctionValue::~FunctionValue() {} @@ -50,9 +51,10 @@ MFFunctionRegistry MFFunctionRegistry::Builder::build() { return result; } -MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::adoptFunction(const FunctionName& functionName, - Function* function, - UErrorCode& errorCode) { +MFFunctionRegistry::Builder& +MFFunctionRegistry::Builder::adoptFunctionFactory(const FunctionName& functionName, + FunctionFactory* function, + UErrorCode& errorCode) { if (U_SUCCESS(errorCode)) { U_ASSERT(functions != nullptr); functions->put(functionName, function, errorCode); @@ -98,9 +100,9 @@ MFFunctionRegistry::Builder::~Builder() { // Returns non-owned pointer. Returns pointer rather than reference because it can fail. // Returns non-const because Function is mutable. -Function* MFFunctionRegistry::getFunction(const FunctionName& functionName) const { +FunctionFactory* MFFunctionRegistry::getFunction(const FunctionName& functionName) const { U_ASSERT(functions != nullptr); - return static_cast(functions->get(functionName)); + return static_cast(functions->get(functionName)); } UBool MFFunctionRegistry::getDefaultFormatterNameByType(const UnicodeString& type, FunctionName& name) const { @@ -225,6 +227,40 @@ MFFunctionRegistry::~MFFunctionRegistry() { // --------- Number +/* static */ StandardFunctions::NumberFactory* +StandardFunctions::NumberFactory::integer(UErrorCode& success) { + return NumberFactory::create(true, success); +} + +/* static */ StandardFunctions::NumberFactory* +StandardFunctions::NumberFactory::number(UErrorCode& success) { + return NumberFactory::create(false, success); +} + +/* static */ StandardFunctions::NumberFactory* +StandardFunctions::NumberFactory::create(bool isInteger, + UErrorCode& success) { + NULL_ON_ERROR(success); + + LocalPointer result(new NumberFactory(isInteger)); + if (!result.isValid()) { + success = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result.orphan(); +} + +Function* +StandardFunctions::NumberFactory::createFunction(const Locale& locale, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + Number* result = new Number(locale, isInteger); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + /* static */ StandardFunctions::Number* StandardFunctions::Number::integer(const Locale& loc, UErrorCode& success) { return create(loc, true, success); @@ -582,6 +618,7 @@ UnicodeString StandardFunctions::NumberValue::formatToString(UErrorCode& errorCo return formattedNumber.toString(errorCode); } +StandardFunctions::NumberFactory::~NumberFactory() {} StandardFunctions::Number::~Number() {} StandardFunctions::NumberValue::~NumberValue() {} @@ -699,23 +736,49 @@ static UnicodeString defaultForOption(const UnicodeString& optionName) { } */ -/* static */ StandardFunctions::DateTime* -StandardFunctions::DateTime::date(const Locale& loc, UErrorCode& success) { - return DateTime::create(loc, DateTimeType::kDate, success); +/* static */ StandardFunctions::DateTimeFactory* +StandardFunctions::DateTimeFactory::date(UErrorCode& success) { + return DateTimeFactory::create(DateTimeType::kDate, success); } -/* static */ StandardFunctions::DateTime* -StandardFunctions::DateTime::time(const Locale& loc, UErrorCode& success) { - return DateTime::create(loc, DateTimeType::kTime, success); +/* static */ StandardFunctions::DateTimeFactory* +StandardFunctions::DateTimeFactory::time(UErrorCode& success) { + return DateTimeFactory::create(DateTimeType::kTime, success); } -/* static */ StandardFunctions::DateTime* -StandardFunctions::DateTime::dateTime(const Locale& loc, UErrorCode& success) { - return DateTime::create(loc, DateTimeType::kDateTime, success); +/* static */ StandardFunctions::DateTimeFactory* +StandardFunctions::DateTimeFactory::dateTime(UErrorCode& success) { + return DateTimeFactory::create(DateTimeType::kDateTime, success); +} + +/* static */ StandardFunctions::DateTimeFactory* +StandardFunctions::DateTimeFactory::create(DateTimeFactory::DateTimeType type, + UErrorCode& success) { + NULL_ON_ERROR(success); + + LocalPointer result(new DateTimeFactory(type)); + if (!result.isValid()) { + success = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result.orphan(); +} + +Function* +StandardFunctions::DateTimeFactory::createFunction(const Locale& locale, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + DateTime* result = new DateTime(locale, type); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; } /* static */ StandardFunctions::DateTime* -StandardFunctions::DateTime::create(const Locale& loc, DateTimeType type, UErrorCode& success) { +StandardFunctions::DateTime::create(const Locale& loc, + DateTimeFactory::DateTimeType type, + UErrorCode& success) { NULL_ON_ERROR(success); LocalPointer result(new DateTime(loc, type)); @@ -728,7 +791,12 @@ StandardFunctions::DateTime::create(const Locale& loc, DateTimeType type, UError FunctionValue* StandardFunctions::DateTime::call(FunctionValue& val, FunctionOptions&& opts, UErrorCode& errorCode) { - auto result = new DateTimeValue(type, locale, val, std::move(opts), errorCode); + NULL_ON_ERROR(errorCode); + + auto result = new DateTimeValue(locale, type, val, std::move(opts), errorCode); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } return result; } @@ -761,8 +829,8 @@ UnicodeString StandardFunctions::DateTimeValue::formatToString(UErrorCode& statu return formattedDate; } -StandardFunctions::DateTimeValue::DateTimeValue(DateTime::DateTimeType type, - const Locale& locale, +StandardFunctions::DateTimeValue::DateTimeValue(const Locale& locale, + DateTimeFactory::DateTimeType type, FunctionValue& val, FunctionOptions&& options, UErrorCode& errorCode) { @@ -795,21 +863,23 @@ StandardFunctions::DateTimeValue::DateTimeValue(DateTime::DateTimeType type, bool hasTimeStyleOption = dateStyleOption.length() > 0; bool noOptions = opts.optionsCount() == 0; - bool useStyle = (type == DateTime::DateTimeType::kDateTime + using DateTimeType = DateTimeFactory::DateTimeType; + + bool useStyle = (type == DateTimeType::kDateTime && (hasDateStyleOption || hasTimeStyleOption || noOptions)) - || (type != DateTime::DateTimeType::kDateTime); + || (type != DateTimeType::kDateTime); - bool useDate = type == DateTime::DateTimeType::kDate - || (type == DateTime::DateTimeType::kDateTime + bool useDate = type == DateTimeType::kDate + || (type == DateTimeType::kDateTime && hasDateStyleOption); - bool useTime = type == DateTime::DateTimeType::kTime - || (type == DateTime::DateTimeType::kDateTime + bool useTime = type == DateTimeType::kTime + || (type == DateTimeType::kDateTime && hasTimeStyleOption); if (useStyle) { // Extract style options - if (type == DateTime::DateTimeType::kDateTime) { + if (type == DateTimeType::kDateTime) { // Note that the options-getting has to be repeated across the three cases, // since `:datetime` uses "dateStyle"/"timeStyle" and `:date` and `:time` // use "style" @@ -823,7 +893,7 @@ StandardFunctions::DateTimeValue::DateTimeValue(DateTime::DateTimeType type, } else { df.adoptInstead(DateFormat::createDateTimeInstance(dateStyle, timeStyle, locale)); } - } else if (type == DateTime::DateTimeType::kDate) { + } else if (type == DateTimeType::kDate) { dateStyle = stringToStyle(opts.getStringFunctionOption(styleName), errorCode); df.adoptInstead(DateFormat::createDateInstance(dateStyle, locale)); } else { @@ -1010,11 +1080,24 @@ StandardFunctions::DateTimeValue::DateTimeValue(DateTime::DateTimeType type, formattedDate = result; } +StandardFunctions::DateTimeFactory::~DateTimeFactory() {} StandardFunctions::DateTime::~DateTime() {} StandardFunctions::DateTimeValue::~DateTimeValue() {} // --------- String +/* static */ StandardFunctions::StringFactory* +StandardFunctions::StringFactory::string(UErrorCode& success) { + NULL_ON_ERROR(success); + + LocalPointer result(new StringFactory()); + if (!result.isValid()) { + success = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result.orphan(); +} + /* static */ StandardFunctions::String* StandardFunctions::String::string(const Locale& loc, UErrorCode& success) { NULL_ON_ERROR(success); @@ -1027,6 +1110,17 @@ StandardFunctions::String::string(const Locale& loc, UErrorCode& success) { return result.orphan(); } +Function* +StandardFunctions::StringFactory::createFunction(const Locale& locale, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + String* result = new String(locale); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + extern UnicodeString formattableToString(const Locale&, const Formattable&, UErrorCode&); FunctionValue* @@ -1075,6 +1169,7 @@ void StandardFunctions::StringValue::selectKeys(const UnicodeString* keys, } } +StandardFunctions::StringFactory::~StringFactory() {} StandardFunctions::String::~String() {} StandardFunctions::StringValue::~StringValue() {} diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index 32b0f2f55a42..bd46103cc85d 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -34,17 +34,15 @@ namespace message2 { class DateTime; class DateTimeValue; - class DateTime : public Function { + class DateTimeFactory : public FunctionFactory { public: - FunctionValue* call(FunctionValue& operand, - FunctionOptions&& options, - UErrorCode& errorCode) override; - static DateTime* date(const Locale&, UErrorCode&); - static DateTime* time(const Locale&, UErrorCode&); - static DateTime* dateTime(const Locale&, UErrorCode&); - virtual ~DateTime(); - + Function* createFunction(const Locale& locale, UErrorCode& status) override; + static DateTimeFactory* date(UErrorCode&); + static DateTimeFactory* time(UErrorCode&); + static DateTimeFactory* dateTime(UErrorCode&); + virtual ~DateTimeFactory(); private: + friend class DateTime; friend class DateTimeValue; typedef enum DateTimeType { @@ -53,15 +51,49 @@ namespace message2 { kDateTime } DateTimeType; + DateTimeType type; + + static DateTimeFactory* create(const DateTimeType, + UErrorCode&); + + DateTimeFactory(const DateTimeType t) : type(t) {} + }; // class DateTimeFactory + + class DateTime : public Function { + public: + FunctionValue* call(FunctionValue& operand, + FunctionOptions&& options, + UErrorCode& errorCode) override; + virtual ~DateTime(); + + private: + friend class DateTimeFactory; + friend class DateTimeValue; + Locale locale; - const DateTimeType type; - static DateTime* create(const Locale&, DateTimeType, UErrorCode&); - DateTime(const Locale& l, DateTimeType t) : locale(l), type(t) {} + const DateTimeFactory::DateTimeType type; + static DateTime* create(const Locale&, + DateTimeFactory::DateTimeType, + UErrorCode&); + DateTime(const Locale& l, DateTimeFactory::DateTimeType t) + : locale(l), type(t) {} const LocalPointer icuFormatter; }; class NumberValue; + class NumberFactory : public FunctionFactory { + public: + Function* createFunction(const Locale& locale, UErrorCode& status) override; + static NumberFactory* integer(UErrorCode& success); + static NumberFactory* number(UErrorCode& success); + virtual ~NumberFactory(); + private: + static NumberFactory* create(bool, UErrorCode&); + NumberFactory(bool isInt) : isInteger(isInt) {} + bool isInteger; + }; // class NumberFactory + class Number : public Function { public: static Number* integer(const Locale& loc, UErrorCode& success); @@ -134,10 +166,18 @@ namespace message2 { friend class DateTime; UnicodeString formattedDate; - DateTimeValue(DateTime::DateTimeType type, const Locale&, + DateTimeValue(const Locale& locale, DateTimeFactory::DateTimeType type, FunctionValue&, FunctionOptions&&, UErrorCode&); }; // class DateTimeValue + class StringFactory : public FunctionFactory { + public: + Function* createFunction(const Locale& locale, UErrorCode& status) override; + static StringFactory* string(UErrorCode& status); + virtual ~StringFactory(); + private: + }; // class StringFactory + class String : public Function { public: FunctionValue* call(FunctionValue& val, diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index e3497c23bc52..f1ecfa91fb21 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -374,7 +374,8 @@ namespace message2 { bool isBuiltInFunction(const FunctionName&) const; bool isFunction(const FunctionName& fn) const { return isBuiltInFunction(fn) || isCustomFunction(fn); } void setNotSelectableError(MessageContext&, const InternalValue&, UErrorCode&) const; - Function* lookupFunction(const FunctionName&, UErrorCode&) const; + // Result is not adopted + FunctionFactory* lookupFunctionFactory(const FunctionName&, UErrorCode&) const; bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const; // Checking for resolution errors diff --git a/icu4c/source/i18n/unicode/messageformat2_function_registry.h b/icu4c/source/i18n/unicode/messageformat2_function_registry.h index 81aa1f452ffa..f76fca6d580b 100644 --- a/icu4c/source/i18n/unicode/messageformat2_function_registry.h +++ b/icu4c/source/i18n/unicode/messageformat2_function_registry.h @@ -28,7 +28,7 @@ namespace message2 { using namespace data_model; - class Function; + class FunctionFactory; /** * Defines mappings from names of formatters and selectors to functions implementing them. @@ -47,19 +47,19 @@ namespace message2 { public: /** - * Looks up a function by the name of the function. The result is non-const, + * Looks up a function factory by the name of the function. The result is non-const, * since functions may have local state. Returns the result by pointer * rather than by reference since it can fail. * * @param functionName Name of the desired function. - * @return A pointer to the Function registered under `functionName`, or null + * @return A pointer to the function factory registered under `functionName`, or null * if no function was registered under that name. The pointer is not owned * by the caller. * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - Function* getFunction(const FunctionName& functionName) const; + FunctionFactory* getFunction(const FunctionName& functionName) const; /** * Looks up a function by a type tag. This method gets the name of the default formatter registered * for that type. If no formatter was explicitly registered for this type, it returns false. @@ -124,9 +124,9 @@ namespace message2 { * @internal ICU 77 technology preview * @deprecated This API is for technology preview only. */ - Builder& adoptFunction(const data_model::FunctionName& functionName, - Function* function, - UErrorCode& errorCode); + Builder& adoptFunctionFactory(const data_model::FunctionName& functionName, + FunctionFactory* function, + UErrorCode& errorCode); /** * Registers a formatter factory to a given type tag. * (See `FormattableObject` for details on type tags.) @@ -226,6 +226,44 @@ namespace message2 { Hashtable* formattersByType = nullptr; }; // class MFFunctionRegistry + class Function; + + /** + * Interface that function factory classes must implement. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API FunctionFactory : public UObject { + public: + /** + * Constructs a new function object. This method is not const; + * function factories with local state may be defined. + * + * @param locale Locale to be used by the function. + * @param status Input/output error code. + * @return The new Formatter, which is non-null if U_SUCCESS(status). + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + virtual Function* createFunction(const Locale& locale, UErrorCode& status) = 0; + /** + * Destructor. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~FunctionFactory(); + /** + * Copy constructor. + * + * @internal ICU 75 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionFactory& operator=(const FunctionFactory&) = delete; + }; // class FunctionFactory + class FunctionValue; /** diff --git a/icu4c/source/test/intltest/messageformat2test.cpp b/icu4c/source/test/intltest/messageformat2test.cpp index 0c42c83f0ecd..a7f6d727addb 100644 --- a/icu4c/source/test/intltest/messageformat2test.cpp +++ b/icu4c/source/test/intltest/messageformat2test.cpp @@ -278,7 +278,7 @@ void TestMessageFormat2::testAPICustomFunctions() { // Set up custom function registry MFFunctionRegistry::Builder builder(errorCode); MFFunctionRegistry functionRegistry = - builder.adoptFunction(data_model::FunctionName("person"), new PersonNameFunction(), errorCode) + builder.adoptFunctionFactory(data_model::FunctionName("person"), new PersonNameFactory(), errorCode) .build(); Person* person = new Person(UnicodeString("Mr."), UnicodeString("John"), UnicodeString("Doe")); @@ -320,8 +320,8 @@ void TestMessageFormat2::testAPICustomFunctions() { MFFunctionRegistry::Builder builderByType(errorCode); FunctionName personFunctionName("person"); MFFunctionRegistry functionRegistryByType = - builderByType.adoptFunction(personFunctionName, - new PersonNameFunction(), + builderByType.adoptFunctionFactory(personFunctionName, + new PersonNameFactory(), errorCode) .setDefaultFormatterNameByType("person", personFunctionName, diff --git a/icu4c/source/test/intltest/messageformat2test.h b/icu4c/source/test/intltest/messageformat2test.h index 5bee2dc0317e..f675e4a22431 100644 --- a/icu4c/source/test/intltest/messageformat2test.h +++ b/icu4c/source/test/intltest/messageformat2test.h @@ -112,10 +112,20 @@ class Person : public FormattableObject { const UnicodeString tagName; }; +class PersonNameFactory : public FunctionFactory { + Function* createFunction(const Locale& locale, UErrorCode& status) override; + virtual ~PersonNameFactory(); +}; + class PersonNameFunction : public Function { public: FunctionValue* call(FunctionValue&, FunctionOptions&&, UErrorCode&) override; virtual ~PersonNameFunction(); + private: + friend class PersonNameFactory; + + const Locale locale; + PersonNameFunction(const Locale& loc) : locale(loc) {} }; class PersonNameValue : public FunctionValue { @@ -142,6 +152,11 @@ class FormattableProperties : public FormattableObject { const UnicodeString tagName; }; +class GrammarCasesFactory : public FunctionFactory { + Function* createFunction(const Locale& locale, UErrorCode& status) override; + virtual ~GrammarCasesFactory(); +}; + class GrammarCasesFunction : public Function { public: FunctionValue* call(FunctionValue&, FunctionOptions&&, UErrorCode&) override; @@ -161,6 +176,11 @@ class GrammarCasesValue : public FunctionValue { void getDativeAndGenitive(const UnicodeString&, UnicodeString& result) const; }; // class GrammarCasesValue +class ListFactory : public FunctionFactory { + Function* createFunction(const Locale& locale, UErrorCode& status) override; + virtual ~ListFactory(); +}; + class ListFunction : public Function { public: FunctionValue* call(FunctionValue&, FunctionOptions&&, UErrorCode&) override; @@ -185,6 +205,11 @@ class ListValue : public FunctionValue { UErrorCode&); }; // class ListValue +class NounFunctionFactory : public FunctionFactory { + Function* createFunction(const Locale& locale, UErrorCode& status) override; + virtual ~NounFunctionFactory(); +}; + class NounValue : public FunctionValue { public: UnicodeString formatToString(UErrorCode&) const override; @@ -199,6 +224,11 @@ class NounValue : public FunctionValue { UErrorCode&); }; // class NounValue +class AdjectiveFunctionFactory : public FunctionFactory { + Function* createFunction(const Locale& locale, UErrorCode& status) override; + virtual ~AdjectiveFunctionFactory(); +}; + class AdjectiveValue : public FunctionValue { public: UnicodeString formatToString(UErrorCode&) const override; diff --git a/icu4c/source/test/intltest/messageformat2test_custom.cpp b/icu4c/source/test/intltest/messageformat2test_custom.cpp index db7adb5852b3..8a076f295dcf 100644 --- a/icu4c/source/test/intltest/messageformat2test_custom.cpp +++ b/icu4c/source/test/intltest/messageformat2test_custom.cpp @@ -32,7 +32,9 @@ void TestMessageFormat2::testPersonFormatter(IcuTestErrorCode& errorCode) { CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) - .adoptFunction(FunctionName("person"), new PersonNameFunction(), errorCode) + .adoptFunctionFactory(FunctionName("person"), + new PersonNameFactory(), + errorCode) .build()); UnicodeString name = "name"; LocalPointer person(new Person(UnicodeString("Mr."), UnicodeString("John"), UnicodeString("Doe"))); @@ -98,7 +100,9 @@ void TestMessageFormat2::testCustomFunctionsComplexMessage(IcuTestErrorCode& err CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) - .adoptFunction(FunctionName("person"), new PersonNameFunction(), errorCode) + .adoptFunctionFactory(FunctionName("person"), + new PersonNameFactory(), + errorCode) .build()); UnicodeString host = "host"; UnicodeString hostGender = "hostGender"; @@ -188,8 +192,12 @@ void TestMessageFormat2::testComplexOptions(IcuTestErrorCode& errorCode) { CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) - .adoptFunction(FunctionName("noun"), new NounFunction(), errorCode) - .adoptFunction(FunctionName("adjective"), new AdjectiveFunction(), errorCode) + .adoptFunctionFactory(FunctionName("noun"), + new NounFunctionFactory(), + errorCode) + .adoptFunctionFactory(FunctionName("adjective"), + new AdjectiveFunctionFactory(), + errorCode) .build()); UnicodeString name = "name"; TestCase::Builder testBuilder; @@ -265,6 +273,16 @@ static bool hasStringOption(const FunctionOptionsMap& opt, return getStringOption(opt, k) == v; } +Function* PersonNameFactory::createFunction(const Locale& locale, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + PersonNameFunction* result = new PersonNameFunction(locale); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + FunctionValue* PersonNameFunction::call(FunctionValue& arg, FunctionOptions&& opts, UErrorCode& errorCode) { @@ -349,6 +367,7 @@ PersonNameValue::PersonNameValue(FunctionValue& arg, FormattableProperties::~FormattableProperties() {} Person::~Person() {} +PersonNameFactory::~PersonNameFactory() {} PersonNameValue::~PersonNameValue() {} /* @@ -379,9 +398,21 @@ PersonNameValue::~PersonNameValue() {} result += postfix; } +Function* GrammarCasesFactory::createFunction(const Locale& locale, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + (void) locale; + + GrammarCasesFunction* result = new GrammarCasesFunction(); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + FunctionValue* GrammarCasesFunction::call(FunctionValue& arg, - FunctionOptions&& opts, - UErrorCode& errorCode) { + FunctionOptions&& opts, + UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); GrammarCasesValue* v = new GrammarCasesValue(arg, std::move(opts), errorCode); @@ -444,7 +475,7 @@ void TestMessageFormat2::testGrammarCasesFormatter(IcuTestErrorCode& errorCode) CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry = MFFunctionRegistry::Builder(errorCode) - .adoptFunction(FunctionName("grammarBB"), new GrammarCasesFunction(), errorCode) + .adoptFunctionFactory(FunctionName("grammarBB"), new GrammarCasesFactory(), errorCode) .build(); TestCase::Builder testBuilder; @@ -496,12 +527,26 @@ void TestMessageFormat2::testGrammarCasesFormatter(IcuTestErrorCode& errorCode) TestUtils::runTestCase(*this, test, errorCode); } +GrammarCasesFactory::~GrammarCasesFactory() {} GrammarCasesValue::~GrammarCasesValue() {} /* See ICU4J: CustomFormatterListTest.java */ +Function* ListFactory::createFunction(const Locale& locale, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + (void) locale; + + ListFunction* result = new ListFunction(locale); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + + FunctionValue* ListFunction::call(FunctionValue& arg, FunctionOptions&& opts, UErrorCode& errorCode) { @@ -585,6 +630,7 @@ message2::ListValue::ListValue(const Locale& locale, } } +ListFactory::~ListFactory() {} ListValue::~ListValue() {} ListFunction::~ListFunction() {} @@ -601,7 +647,7 @@ void TestMessageFormat2::testListFormatter(IcuTestErrorCode& errorCode) { TestCase::Builder testBuilder; MFFunctionRegistry reg = MFFunctionRegistry::Builder(errorCode) - .adoptFunction(FunctionName("listformat"), new ListFunction(Locale("en")), errorCode) + .adoptFunctionFactory(FunctionName("listformat"), new ListFactory(), errorCode) .build(); CHECK_ERROR(errorCode); @@ -826,6 +872,18 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { } #endif +Function* NounFunctionFactory::createFunction(const Locale& locale, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + (void) locale; + + NounFunction* result = new NounFunction(); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + FunctionValue* NounFunction::call(FunctionValue& arg, FunctionOptions&& opts, UErrorCode& errorCode) { @@ -881,6 +939,18 @@ NounValue::NounValue(FunctionValue& arg, } } +Function* AdjectiveFunctionFactory::createFunction(const Locale& locale, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + (void) locale; + + AdjectiveFunction* result = new AdjectiveFunction(); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + FunctionValue* AdjectiveFunction::call(FunctionValue& arg, FunctionOptions&& opts, UErrorCode& errorCode) { @@ -946,6 +1016,8 @@ AdjectiveValue::AdjectiveValue(FunctionValue& arg, } } +NounFunctionFactory::~NounFunctionFactory() {} +AdjectiveFunctionFactory::~AdjectiveFunctionFactory() {} NounFunction::~NounFunction() {} AdjectiveFunction::~AdjectiveFunction() {} NounValue::~NounValue() {} From 10dfe45c30dd44f7deee0d16223a854fb5567220 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Mon, 7 Oct 2024 13:07:45 -0700 Subject: [PATCH 19/37] Un-ignore a test; add comments --- icu4c/source/i18n/messageformat2_evaluation.h | 8 +++- .../i18n/messageformat2_formattable.cpp | 48 ------------------- testdata/message2/icu-test-functions.json | 3 +- 3 files changed, 7 insertions(+), 52 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index 08170b1a80d1..9cc9302d4e30 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -34,7 +34,9 @@ namespace message2 { // InternalValue represents an intermediate value in the message // formatter. -// TODO + // It can be either a FunctionValue or a "fallback value". A fallback value + // is a string that serves as a replacement for expressions whose evaluation + // caused an error. Fallback values are not passed to functions. class InternalValue : public UObject { public: bool isFallback() const { return isFallbackValue; } @@ -62,7 +64,9 @@ namespace message2 { : isFallbackValue(true), fallbackString(fb) {} }; // class InternalValue -// Used for arguments and literals + + // A BaseValue wraps a literal value or argument value so it can be used + // in a context that expects a FunctionValue. class BaseValue : public FunctionValue { public: static BaseValue* create(const Locale&, const Formattable&, UErrorCode&); diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index e042f6efb846..23219268de37 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -184,22 +184,6 @@ namespace message2 { df->format(date, result, 0, errorCode); } -#if false - // Called when output is required and the contents are an unevaluated `Formattable`; - // formats the source `Formattable` to a string with defaults, if it can be - // formatted with a default formatter - static FormattedPlaceholder formatWithDefaults(const Locale& locale, const FormattedPlaceholder& input, UErrorCode& status) { - if (U_FAILURE(status)) { - return {}; - } - - const Formattable* toFormat = input.getSource(status); - U_ASSERT(U_SUCCESS(status)); // Shouldn't get called on a null argument - - return formattableToString(locale, *toFormat, status); - } -#endif - UnicodeString formattableToString(const Locale& locale, const Formattable& toFormat, UErrorCode& status) { @@ -260,38 +244,6 @@ namespace message2 { } } -#if false - // Called when string output is required; forces output to be produced - // if none is present (including formatting number output as a string) - UnicodeString FormattedPlaceholder::formatToString(const Locale& locale, - UErrorCode& status) const { - if (U_FAILURE(status)) { - return {}; - } - - // Function result: either just return the string, or format the number - // as a string and return it - if (isFunctionResult()) { - if (formatted.isString()) { - return formatted.getString(); - } else { - return formatted.getNumber().toString(status); - } - } - // Unannotated value: apply default formatters - UErrorCode savedStatus = status; - FormattedPlaceholder evaluated = formatWithDefaults(locale, *this, status); - if (status == U_MF_FORMATTING_ERROR) { - return {}; - } - // Ignore U_USING_DEFAULT_WARNING - if (status == U_USING_DEFAULT_WARNING) { - status = savedStatus; - } - return evaluated.formatToString(locale, status); - } -#endif - } // namespace message2 U_NAMESPACE_END diff --git a/testdata/message2/icu-test-functions.json b/testdata/message2/icu-test-functions.json index a97446addf0e..2d2185c71b05 100644 --- a/testdata/message2/icu-test-functions.json +++ b/testdata/message2/icu-test-functions.json @@ -131,8 +131,7 @@ "exp": "Hello John, you want '9:43 PM', 'August 3, 2024 at 9:43 PM', or '8/3/24, 9:43:57 PM Pacific Daylight Time' or even 'Saturday, August 3, 2024 at 9:43 PM'?", "params": [{"name": "exp", "value": { "date": 1722746637000 }}, {"name": "user", "value": "John"}, - {"name": "tsOver", "value": "full" }], - "ignoreCpp": "ICU-22754 ICU4C doesn't implement this kind of function composition yet. See https://github.com/unicode-org/message-format-wg/issues/515" + {"name": "tsOver", "value": "full" }] }, { "src": [ From a2f4a598659d6eb23855a42e84eb4ff8a804f0cd Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Mon, 7 Oct 2024 13:29:00 -0700 Subject: [PATCH 20/37] Add doc comments --- icu4c/source/i18n/messageformat2_evaluation.h | 6 + .../i18n/unicode/messageformat2_formattable.h | 15 +- .../messageformat2_function_registry.h | 143 ++++++++++++++++-- 3 files changed, 149 insertions(+), 15 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index 9cc9302d4e30..6760b31c5d39 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -82,6 +82,12 @@ namespace message2 { BaseValue(const Locale&, const Formattable&); }; // class BaseValue + // A NullValue represents the absence of an argument. + class NullValue : public FunctionValue { + public: + virtual UBool isNullOperand() const { return true; } + }; // class NullValue + // PrioritizedVariant // For how this class is used, see the references to (integer, variant) tuples diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index 7342622d37c3..dcd9f5535f51 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -511,6 +511,18 @@ class U_I18N_API FunctionOptions : public UObject { } return result; } + /** + * Returns a new FunctionOptions object containing all the key-value + * pairs from `this` and `other`. When `this` and `other` define options with + * the same name, `this` takes preference. + * `this` cannot be used after calling this method. + * + * @return The result of merging `this` and `other`. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionOptions mergeOptions(FunctionOptions&& other, UErrorCode&); /** * Default constructor. * Returns an empty mapping. @@ -549,8 +561,7 @@ class U_I18N_API FunctionOptions : public UObject { * @deprecated This API is for technology preview only. */ FunctionOptions& operator=(const FunctionOptions&) = delete; - // TODO - FunctionOptions mergeOptions(FunctionOptions&&, UErrorCode&); + private: friend class MessageFormatter; friend class StandardFunctions; diff --git a/icu4c/source/i18n/unicode/messageformat2_function_registry.h b/icu4c/source/i18n/unicode/messageformat2_function_registry.h index f76fca6d580b..bf07d22096e9 100644 --- a/icu4c/source/i18n/unicode/messageformat2_function_registry.h +++ b/icu4c/source/i18n/unicode/messageformat2_function_registry.h @@ -269,32 +269,136 @@ namespace message2 { /** * Interface that function handler classes must implement. * - * @internal ICU 75 technology preview + * @internal ICU 77 technology preview * @deprecated This API is for technology preview only. */ class U_I18N_API Function : public UObject { - public: - virtual FunctionValue* call(FunctionValue&, FunctionOptions&&, UErrorCode&) = 0; - virtual ~Function(); + public: + /** + * Calls this Function on a FunctionValue operand and its FunctionOptions options, + * returning a new pointer to a FunctionValue (which is adopted by the caller). + * + * @param operand The unnamed argument to the function. + * @param options Resolved options for this function. + * @param status Input/output error code + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + virtual FunctionValue* call(FunctionValue& operand, + FunctionOptions&& options, + UErrorCode& status) = 0; + /** + * Destructor. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~Function(); }; // class Function + /** + * Type representing argument and return values for custom functions. + * It encapsulates an operand and resolved options, and can be extended with + * additional state. + * Adding a new custom function requires adding a new class that + * implements this interface. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ class U_I18N_API FunctionValue : public UObject { public: + /** + * Returns the string representation of this value. The default + * method signals an error. Must be overridden by classes + * implementing values that support formatting. + * + * @param status Input/output error code + * @return A string. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ virtual UnicodeString formatToString(UErrorCode& status) const { if (U_SUCCESS(status)) { status = U_MF_FORMATTING_ERROR; } return {}; } + /** + * Returns the Formattable operand that was used to construct + * this value. The operand may be obtained from calling getOperand() + * on the input FunctionValue, or it may be constructed separately. + * + * @return A reference to a message2::Formattable object. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ virtual const Formattable& getOperand() const { return operand; } - // `this` can't be used after calling this method + /** + * Returns the resolved options that were used to construct this value. + * `this` may not be used after calling this method. This overload + * is provided so that mergeOptions(), which passes its `this` argument + * by move, can be called. + * + * @return The resolved options for this value. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ virtual FunctionOptions getResolvedOptions() { return std::move(opts); } - // const method is for reading the options attached to another option - // (i.e. options don't escape) -- - // non-const method is for calling mergeOptions() -- i.e. options escape + /** + * Returns a reference to the resolved options for this value. + * + * @return A reference to the resolved options for this value. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ virtual const FunctionOptions& getResolvedOptions() const { return opts; } + /** + * Returns true if this value supports selection. The default method + * returns false. The method must be overridden for values that support + * selection. + * + * @return True iff this value supports selection. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ virtual UBool isSelectable() const { return false; } + /** + * Returns true if this value represents a null operand, that is, + * the absence of an argument. This method should not be overridden. + * It can be called in order to check whether the argument is present. + * Some functions may be nullary (they may work with no arguments). + * + * @return True iff this value represents an absent operand. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ virtual UBool isNullOperand() const { return false; } + /** + * Compares this value to an array of keys, and returns an array of matching + * keys sorted by preference. The default implementation of this method + * signals an error. It should be overridden for value classes that support + * selection. + * + * @param keys An array of strings to compare to the input. + * @param keysLen The length of `keys`. + * @param prefs An array of strings with length `keysLen`. The contents of + * the array is undefined. `selectKey()` should set the contents + * of `prefs` to a subset of `keys`, with the best match placed at the lowest index. + * @param prefsLen A reference that `selectKey()` should set to the length of `prefs`, + * which must be less than or equal to `keysLen`. + * @param status Input/output error code. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ virtual void selectKeys(const UnicodeString* keys, int32_t keysLen, UnicodeString* prefs, @@ -308,17 +412,30 @@ namespace message2 { status = U_MF_SELECTOR_ERROR; } } + /** + * Destructor. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ virtual ~FunctionValue(); protected: + /** + * Operand used to construct this value. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ Formattable operand; + /** + * Resolved options attached to this value. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ FunctionOptions opts; }; // class FunctionValue - class NullValue : public FunctionValue { - public: - virtual UBool isNullOperand() const { return true; } - }; // class NullValue - } // namespace message2 U_NAMESPACE_END From df5e6540cee1cad17e3a8a37ac5a95d1416bd214 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Mon, 14 Oct 2024 11:50:29 -0700 Subject: [PATCH 21/37] Add test --- testdata/message2/spec/functions/integer.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/testdata/message2/spec/functions/integer.json b/testdata/message2/spec/functions/integer.json index c8e75077a221..8f386338f3ec 100644 --- a/testdata/message2/spec/functions/integer.json +++ b/testdata/message2/spec/functions/integer.json @@ -27,6 +27,10 @@ } ], "exp": "one" + }, + { + "src": ".local $x = {1.25 :integer} .local $y = {$x :number} {{{$x}}}", + "exp": "1" } ] } From abf6c80e0b2479b47d47900e2e6d73450dead3e3 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 15 Oct 2024 14:30:51 -0700 Subject: [PATCH 22/37] Fix ListValue --- icu4c/source/test/intltest/messageformat2test_custom.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu4c/source/test/intltest/messageformat2test_custom.cpp b/icu4c/source/test/intltest/messageformat2test_custom.cpp index 8a076f295dcf..34a453a37393 100644 --- a/icu4c/source/test/intltest/messageformat2test_custom.cpp +++ b/icu4c/source/test/intltest/messageformat2test_custom.cpp @@ -583,7 +583,7 @@ message2::ListValue::ListValue(const Locale& locale, return; } - FunctionOptionsMap opt = options.getOptions(); + FunctionOptionsMap opt = opts.getOptions(); UListFormatterType type = UListFormatterType::ULISTFMT_TYPE_AND; if (hasStringOption(opt, "type", "OR")) { type = UListFormatterType::ULISTFMT_TYPE_OR; From 81d9acf45ededa619521b9d186dded9fa92acc4a Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 15 Oct 2024 14:30:59 -0700 Subject: [PATCH 23/37] Add u-options tests --- icu4c/source/test/intltest/messageformat2test_read_json.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/icu4c/source/test/intltest/messageformat2test_read_json.cpp b/icu4c/source/test/intltest/messageformat2test_read_json.cpp index 4c3aacb42500..5ef9be49c710 100644 --- a/icu4c/source/test/intltest/messageformat2test_read_json.cpp +++ b/icu4c/source/test/intltest/messageformat2test_read_json.cpp @@ -309,6 +309,9 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) { runTestsFromJsonFile(*this, "spec/functions/time.json", errorCode); // Other tests (non-spec) + // TODO: move this into the spec tests when + // https://github.com/unicode-org/message-format-wg/pull/846 lands + runTestsFromJsonFile(*this, "u-options.json", errorCode); runTestsFromJsonFile(*this, "more-functions.json", errorCode); runTestsFromJsonFile(*this, "valid-tests.json", errorCode); runTestsFromJsonFile(*this, "resolution-errors.json", errorCode); From a51dc3e29c47e27ea781e47642e3f14c09319e90 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 15 Oct 2024 14:31:08 -0700 Subject: [PATCH 24/37] Fix output checking --- icu4c/source/test/intltest/messageformat2test_utils.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/icu4c/source/test/intltest/messageformat2test_utils.h b/icu4c/source/test/intltest/messageformat2test_utils.h index c4ad251c7f48..c2f40767b3c5 100644 --- a/icu4c/source/test/intltest/messageformat2test_utils.h +++ b/icu4c/source/test/intltest/messageformat2test_utils.h @@ -274,6 +274,9 @@ class TestUtils { if (!testCase.lineNumberAndOffsetMatch(parseError.line, parseError.offset)) { failWrongOffset(tmsg, testCase, parseError.line, parseError.offset); } + if (testCase.expectSuccess() && !testCase.outputMatches(result)) { + failWrongOutput(tmsg, testCase, result); + } if (U_FAILURE(errorCode) && !testCase.expectSuccess() && testCase.expectedErrorCode() != U_MF_SYNTAX_ERROR) { // Re-run the formatter if there was an error, From 1abcef05e3101292997ca0d6a46224e9df916033 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 15 Oct 2024 16:45:44 -0700 Subject: [PATCH 25/37] Function context and u:options --- icu4c/source/i18n/messageformat2.cpp | 50 ++++++- .../source/i18n/messageformat2_evaluation.cpp | 8 +- .../i18n/messageformat2_formattable.cpp | 54 ++++++-- .../i18n/messageformat2_function_registry.cpp | 69 ++++++---- ...essageformat2_function_registry_internal.h | 43 +++--- icu4c/source/i18n/messageformat2_macros.h | 3 +- icu4c/source/i18n/unicode/messageformat2.h | 1 + .../messageformat2_function_registry.h | 56 +++++++- .../source/test/intltest/messageformat2test.h | 27 ++-- .../intltest/messageformat2test_custom.cpp | 47 +++---- testdata/message2/u-options.json | 126 ++++++++++++++++++ 11 files changed, 379 insertions(+), 105 deletions(-) create mode 100644 testdata/message2/u-options.json diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index 067291a80c36..c6fc76bc7a07 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -11,6 +11,7 @@ #include "unicode/messageformat2_data_model.h" #include "unicode/messageformat2_formattable.h" #include "unicode/messageformat2.h" +#include "unicode/ubidi.h" #include "unicode/unistr.h" #include "messageformat2_allocation.h" #include "messageformat2_evaluation.h" @@ -153,6 +154,48 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, return FunctionOptions(std::move(*optionsVector), status); } +static UBiDiDirection getBiDiDirection(const Locale& locale, + const UnicodeString& s) { + if (s.isEmpty()) { + return locale.isRightToLeft() ? UBIDI_RTL : UBIDI_LTR; + } + if (s == u"ltr") { + return UBIDI_LTR; + } + if (s == u"rtl") { + return UBIDI_RTL; + } + if (s == u"auto") { + return UBIDI_MIXED; + } + return UBIDI_NEUTRAL; +} + +FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& options) const { + // Look up "u:locale", "u:dir", and "u:id" in the options + UnicodeString localeStr = options.getStringFunctionOption(UnicodeString("u:locale")); + + // Use default locale from context, unless "u:locale" is provided + Locale localeToUse; + if (localeStr.isEmpty()) { + localeToUse = locale; + } else { + UErrorCode localStatus = U_ZERO_ERROR; + std::string u8; + Locale l = Locale::forLanguageTag(localeStr.toUTF8String(u8), localStatus); + if (U_SUCCESS(localStatus)) { + localeToUse = l; + } else { + localeToUse = locale; + } + } + UBiDiDirection dir = getBiDiDirection(localeToUse, + options.getStringFunctionOption(UnicodeString("u:dir"))); + UnicodeString id = options.getStringFunctionOption(UnicodeString("u:id")); + + return FunctionContext(localeToUse, dir, id); +} + // Looks up `functionName` and applies it to an operand and options, // handling errors if the function is unbound [[nodiscard]] InternalValue MessageFormatter::apply(const FunctionName& functionName, @@ -180,13 +223,16 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, context.getErrors().setUnknownFunction(functionName, status); return InternalValue::fallback(fallbackStr); } - LocalPointer function(functionFactory->createFunction(locale, status)); + LocalPointer function(functionFactory->createFunction(status)); // Value is not a fallback, so we can safely call takeValue() LocalPointer functionArg(rand.takeValue(status)); U_ASSERT(U_SUCCESS(status)); // Call the function LocalPointer - functionResult(function->call(*functionArg, std::move(options), status)); + functionResult(function->call(makeFunctionContext(options), + *functionArg, + std::move(options), + status)); // Handle any errors signaled by the function // (and use the fallback value) if (status == U_MF_OPERAND_MISMATCH_ERROR) { diff --git a/icu4c/source/i18n/messageformat2_evaluation.cpp b/icu4c/source/i18n/messageformat2_evaluation.cpp index b09ec3fa634d..745f56bd2907 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.cpp +++ b/icu4c/source/i18n/messageformat2_evaluation.cpp @@ -7,6 +7,7 @@ #if !UCONFIG_NO_MF2 +#include "unicode/ubidi.h" #include "messageformat2_allocation.h" #include "messageformat2_evaluation.h" #include "messageformat2_macros.h" @@ -34,10 +35,13 @@ BaseValue::BaseValue(const Locale& loc, const Formattable& source) return message2::create(BaseValue(locale, source), errorCode); } -extern UnicodeString formattableToString(const Locale&, const Formattable&, UErrorCode&); +extern UnicodeString formattableToString(const Locale&, const UBiDiDirection, const Formattable&, UErrorCode&); UnicodeString BaseValue::formatToString(UErrorCode& errorCode) const { - return formattableToString(locale, operand, errorCode); + return formattableToString(locale, + UBIDI_NEUTRAL, + operand, + errorCode); } BaseValue& BaseValue::operator=(BaseValue&& other) noexcept { diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index 23219268de37..9a2daaec8881 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -9,8 +9,10 @@ #include "unicode/messageformat2_formattable.h" #include "unicode/smpdtfmt.h" +#include "unicode/ubidi.h" #include "messageformat2_allocation.h" #include "messageformat2_macros.h" +#include "ubidiimp.h" #include "limits.h" @@ -184,9 +186,37 @@ namespace message2 { df->format(date, result, 0, errorCode); } + static UnicodeString& handleBiDi(const Locale& locale, + UBiDiDirection dir, + UnicodeString& result) { + switch (dir) { + case UBIDI_LTR: + if (locale.isRightToLeft()) { + result.insert(0, LRI_CHAR); + result.insert(result.length(), PDI_CHAR); + } + break; + case UBIDI_RTL: + result.insert(0, RLI_CHAR); + result.insert(result.length(), PDI_CHAR); + break; + case UBIDI_NEUTRAL: + // Do nothing + break; + case UBIDI_MIXED: + // mixed = auto + result.insert(0, FSI_CHAR); + result.insert(result.length(), PDI_CHAR); + break; + } + + return result; + } + UnicodeString formattableToString(const Locale& locale, - const Formattable& toFormat, - UErrorCode& status) { + UBiDiDirection dir, + const Formattable& toFormat, + UErrorCode& status) { EMPTY_ON_ERROR(status); // Try as decimal number first @@ -205,33 +235,37 @@ namespace message2 { } UFormattableType type = toFormat.getType(); + UnicodeString result; + switch (type) { case UFMT_DATE: { - UnicodeString result; UDate d = toFormat.getDate(status); U_ASSERT(U_SUCCESS(status)); formatDateWithDefaults(locale, d, result, status); - return result; + break; } case UFMT_DOUBLE: { double d = toFormat.getDouble(status); U_ASSERT(U_SUCCESS(status)); - return formatNumberWithDefaults(locale, d, status).toString(status); + result = formatNumberWithDefaults(locale, d, status).toString(status); + break; } case UFMT_LONG: { int32_t l = toFormat.getLong(status); U_ASSERT(U_SUCCESS(status)); - return formatNumberWithDefaults(locale, l, status).toString(status); + result = formatNumberWithDefaults(locale, l, status).toString(status); + break; } case UFMT_INT64: { int64_t i = toFormat.getInt64Value(status); U_ASSERT(U_SUCCESS(status)); - return formatNumberWithDefaults(locale, i, status).toString(status); + result = formatNumberWithDefaults(locale, i, status).toString(status); + break; } case UFMT_STRING: { - const UnicodeString& s = toFormat.getString(status); + result = toFormat.getString(status); U_ASSERT(U_SUCCESS(status)); - return s; + break; } default: { // No default formatters for other types; use fallback @@ -242,6 +276,8 @@ namespace message2 { return {}; } } + + return handleBiDi(locale, dir, result); } } // namespace message2 diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index da6622b2d073..db5a38a70a42 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -251,10 +251,10 @@ StandardFunctions::NumberFactory::create(bool isInteger, } Function* -StandardFunctions::NumberFactory::createFunction(const Locale& locale, UErrorCode& errorCode) { +StandardFunctions::NumberFactory::createFunction(UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - Number* result = new Number(locale, isInteger); + Number* result = new Number(isInteger); if (result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; } @@ -274,8 +274,9 @@ StandardFunctions::Number::number(const Locale& loc, UErrorCode& success) { /* static */ StandardFunctions::Number* StandardFunctions::Number::create(const Locale& loc, bool isInteger, UErrorCode& success) { NULL_ON_ERROR(success); + (void) loc; - LocalPointer result(new Number(loc, isInteger)); + LocalPointer result(new Number(isInteger)); if (!result.isValid()) { success = U_MEMORY_ALLOCATION_ERROR; return nullptr; @@ -283,11 +284,12 @@ StandardFunctions::Number::create(const Locale& loc, bool isInteger, UErrorCode& return result.orphan(); } -FunctionValue* StandardFunctions::Number::call(FunctionValue& operand, +FunctionValue* StandardFunctions::Number::call(const FunctionContext& context, + FunctionValue& operand, FunctionOptions&& options, UErrorCode& errorCode) { LocalPointer - val(new NumberValue(*this, operand, std::move(options), errorCode)); + val(new NumberValue(*this, context, operand, std::move(options), errorCode)); if (val.isValid()) { return val.orphan(); } @@ -296,6 +298,7 @@ FunctionValue* StandardFunctions::Number::call(FunctionValue& operand, } /* static */ number::LocalizedNumberFormatter StandardFunctions::formatterForOptions(const Number& number, + const Locale& locale, const FunctionOptions& opts, UErrorCode& status) { number::UnlocalizedNumberFormatter nf; @@ -438,7 +441,7 @@ FunctionValue* StandardFunctions::Number::call(FunctionValue& operand, } } } - return nf.locale(number.locale); + return nf.locale(locale); } static double parseNumberLiteral(const UnicodeString& inputStr, UErrorCode& errorCode) { @@ -499,13 +502,13 @@ int32_t StandardFunctions::Number::digitSizeOption(const FunctionOptions& opts, UnicodeString formatted = opt->formatToString(localStatus); int64_t val = 0; if (U_SUCCESS(localStatus)) { - val = getInt64Value(locale, Formattable(formatted), localStatus); + val = getInt64Value(Locale("en-US"), Formattable(formatted), localStatus); } if (U_FAILURE(localStatus)) { localStatus = U_ZERO_ERROR; } // Next try the operand - val = getInt64Value(locale, opt->getOperand(), localStatus); + val = getInt64Value(Locale("en-US"), opt->getOperand(), localStatus); if (U_SUCCESS(localStatus)) { return static_cast(val); } @@ -557,6 +560,7 @@ bool StandardFunctions::Number::usePercent(const FunctionOptions& opts) const { } StandardFunctions::NumberValue::NumberValue(const Number& parent, + const FunctionContext& context, FunctionValue& arg, FunctionOptions&& options, UErrorCode& errorCode) { @@ -567,12 +571,12 @@ StandardFunctions::NumberValue::NumberValue(const Number& parent, return; } - locale = parent.locale; + locale = context.getLocale(); opts = options.mergeOptions(arg.getResolvedOptions(), errorCode); operand = arg.getOperand(); number::LocalizedNumberFormatter realFormatter; - realFormatter = formatterForOptions(parent, opts, errorCode); + realFormatter = formatterForOptions(parent, locale, opts, errorCode); if (U_SUCCESS(errorCode)) { switch (operand.getType()) { @@ -765,10 +769,10 @@ StandardFunctions::DateTimeFactory::create(DateTimeFactory::DateTimeType type, } Function* -StandardFunctions::DateTimeFactory::createFunction(const Locale& locale, UErrorCode& errorCode) { +StandardFunctions::DateTimeFactory::createFunction(UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - DateTime* result = new DateTime(locale, type); + DateTime* result = new DateTime(type); if (result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; } @@ -776,12 +780,11 @@ StandardFunctions::DateTimeFactory::createFunction(const Locale& locale, UErrorC } /* static */ StandardFunctions::DateTime* -StandardFunctions::DateTime::create(const Locale& loc, - DateTimeFactory::DateTimeType type, +StandardFunctions::DateTime::create(DateTimeFactory::DateTimeType type, UErrorCode& success) { NULL_ON_ERROR(success); - LocalPointer result(new DateTime(loc, type)); + LocalPointer result(new DateTime(type)); if (!result.isValid()) { success = U_MEMORY_ALLOCATION_ERROR; return nullptr; @@ -790,10 +793,13 @@ StandardFunctions::DateTime::create(const Locale& loc, } FunctionValue* -StandardFunctions::DateTime::call(FunctionValue& val, FunctionOptions&& opts, UErrorCode& errorCode) { +StandardFunctions::DateTime::call(const FunctionContext& context, + FunctionValue& val, + FunctionOptions&& opts, + UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - auto result = new DateTimeValue(locale, type, val, std::move(opts), errorCode); + auto result = new DateTimeValue(type, context, val, std::move(opts), errorCode); if (result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; } @@ -829,8 +835,8 @@ UnicodeString StandardFunctions::DateTimeValue::formatToString(UErrorCode& statu return formattedDate; } -StandardFunctions::DateTimeValue::DateTimeValue(const Locale& locale, - DateTimeFactory::DateTimeType type, +StandardFunctions::DateTimeValue::DateTimeValue(DateTimeFactory::DateTimeType type, + const FunctionContext& context, FunctionValue& val, FunctionOptions&& options, UErrorCode& errorCode) { @@ -842,6 +848,7 @@ StandardFunctions::DateTimeValue::DateTimeValue(const Locale& locale, return; } + const Locale& locale = context.getLocale(); operand = val.getOperand(); opts = options.mergeOptions(val.getResolvedOptions(), errorCode); @@ -1099,10 +1106,10 @@ StandardFunctions::StringFactory::string(UErrorCode& success) { } /* static */ StandardFunctions::String* -StandardFunctions::String::string(const Locale& loc, UErrorCode& success) { +StandardFunctions::String::string(UErrorCode& success) { NULL_ON_ERROR(success); - LocalPointer result(new String(loc)); + LocalPointer result(new String()); if (!result.isValid()) { success = U_MEMORY_ALLOCATION_ERROR; return nullptr; @@ -1111,21 +1118,27 @@ StandardFunctions::String::string(const Locale& loc, UErrorCode& success) { } Function* -StandardFunctions::StringFactory::createFunction(const Locale& locale, UErrorCode& errorCode) { +StandardFunctions::StringFactory::createFunction(UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - String* result = new String(locale); + String* result = new String(); if (result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; } return result; } -extern UnicodeString formattableToString(const Locale&, const Formattable&, UErrorCode&); +extern UnicodeString formattableToString(const Locale&, + const UBiDiDirection, + const Formattable&, + UErrorCode&); FunctionValue* -StandardFunctions::String::call(FunctionValue& val, FunctionOptions&& opts, UErrorCode& errorCode) { - return new StringValue(locale, val, std::move(opts), errorCode); +StandardFunctions::String::call(const FunctionContext& context, + FunctionValue& val, + FunctionOptions&& opts, + UErrorCode& errorCode) { + return new StringValue(context, val, std::move(opts), errorCode); } UnicodeString StandardFunctions::StringValue::formatToString(UErrorCode& errorCode) const { @@ -1134,7 +1147,7 @@ UnicodeString StandardFunctions::StringValue::formatToString(UErrorCode& errorCo return formattedString; } -StandardFunctions::StringValue::StringValue(const Locale& locale, +StandardFunctions::StringValue::StringValue(const FunctionContext& context, FunctionValue& val, FunctionOptions&& options, UErrorCode& status) { @@ -1142,7 +1155,7 @@ StandardFunctions::StringValue::StringValue(const Locale& locale, operand = val.getOperand(); opts = std::move(options); // No options // Convert to string - formattedString = formattableToString(locale, operand, status); + formattedString = formattableToString(context.getLocale(), context.getDirection(), operand, status); } void StandardFunctions::StringValue::selectKeys(const UnicodeString* keys, diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index bd46103cc85d..14b5f0ee8bbb 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -36,7 +36,7 @@ namespace message2 { class DateTimeFactory : public FunctionFactory { public: - Function* createFunction(const Locale& locale, UErrorCode& status) override; + Function* createFunction(UErrorCode& status) override; static DateTimeFactory* date(UErrorCode&); static DateTimeFactory* time(UErrorCode&); static DateTimeFactory* dateTime(UErrorCode&); @@ -61,7 +61,8 @@ namespace message2 { class DateTime : public Function { public: - FunctionValue* call(FunctionValue& operand, + FunctionValue* call(const FunctionContext& context, + FunctionValue& operand, FunctionOptions&& options, UErrorCode& errorCode) override; virtual ~DateTime(); @@ -70,13 +71,10 @@ namespace message2 { friend class DateTimeFactory; friend class DateTimeValue; - Locale locale; const DateTimeFactory::DateTimeType type; - static DateTime* create(const Locale&, - DateTimeFactory::DateTimeType, + static DateTime* create(DateTimeFactory::DateTimeType, UErrorCode&); - DateTime(const Locale& l, DateTimeFactory::DateTimeType t) - : locale(l), type(t) {} + DateTime(DateTimeFactory::DateTimeType t) : type(t) {} const LocalPointer icuFormatter; }; @@ -84,7 +82,7 @@ namespace message2 { class NumberFactory : public FunctionFactory { public: - Function* createFunction(const Locale& locale, UErrorCode& status) override; + Function* createFunction(UErrorCode& status) override; static NumberFactory* integer(UErrorCode& success); static NumberFactory* number(UErrorCode& success); virtual ~NumberFactory(); @@ -99,7 +97,8 @@ namespace message2 { static Number* integer(const Locale& loc, UErrorCode& success); static Number* number(const Locale& loc, UErrorCode& success); - FunctionValue* call(FunctionValue& operand, + FunctionValue* call(const FunctionContext& context, + FunctionValue& operand, FunctionOptions&& options, UErrorCode& errorCode) override; virtual ~Number(); @@ -115,7 +114,7 @@ namespace message2 { } PluralType; static Number* create(const Locale&, bool, UErrorCode&); - Number(const Locale& loc, bool isInt) : locale(loc), isInteger(isInt), icuFormatter(number::NumberFormatter::withLocale(loc)) {} + Number(bool isInt) : isInteger(isInt) /*, icuFormatter(number::NumberFormatter::withLocale(loc))*/ {} // These options have their own accessor methods, since they have different default values. int32_t digitSizeOption(const FunctionOptions&, const UnicodeString&) const; @@ -126,7 +125,6 @@ namespace message2 { int32_t minimumIntegerDigits(const FunctionOptions& options) const; bool usePercent(const FunctionOptions& options) const; - Locale locale; const bool isInteger = false; const number::LocalizedNumberFormatter icuFormatter; @@ -134,6 +132,7 @@ namespace message2 { }; static number::LocalizedNumberFormatter formatterForOptions(const Number& number, + const Locale& locale, const FunctionOptions& opts, UErrorCode& status); @@ -154,7 +153,11 @@ namespace message2 { Locale locale; number::FormattedNumber formattedNumber; - NumberValue(const Number&, FunctionValue&, FunctionOptions&&, UErrorCode&); + NumberValue(const Number&, + const FunctionContext&, + FunctionValue&, + FunctionOptions&&, + UErrorCode&); }; // class NumberValue class DateTimeValue : public FunctionValue { @@ -166,13 +169,13 @@ namespace message2 { friend class DateTime; UnicodeString formattedDate; - DateTimeValue(const Locale& locale, DateTimeFactory::DateTimeType type, + DateTimeValue(DateTimeFactory::DateTimeType type, const FunctionContext& context, FunctionValue&, FunctionOptions&&, UErrorCode&); }; // class DateTimeValue class StringFactory : public FunctionFactory { public: - Function* createFunction(const Locale& locale, UErrorCode& status) override; + Function* createFunction(UErrorCode& status) override; static StringFactory* string(UErrorCode& status); virtual ~StringFactory(); private: @@ -180,19 +183,17 @@ namespace message2 { class String : public Function { public: - FunctionValue* call(FunctionValue& val, + FunctionValue* call(const FunctionContext& context, + FunctionValue& val, FunctionOptions&& opts, UErrorCode& errorCode) override; - static String* string(const Locale& locale, UErrorCode& status); + static String* string(UErrorCode& status); virtual ~String(); private: friend class StringFactory; - // Formatting `value` to a string might require the locale - Locale locale; - - String(const Locale& l) : locale(l) {} + String() {} }; class StringValue : public FunctionValue { @@ -209,7 +210,7 @@ namespace message2 { friend class String; UnicodeString formattedString; - StringValue(const Locale&, FunctionValue&, FunctionOptions&&, UErrorCode&); + StringValue(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&); }; // class StringValue }; diff --git a/icu4c/source/i18n/messageformat2_macros.h b/icu4c/source/i18n/messageformat2_macros.h index f06ed1a5a977..d4504eec40f9 100644 --- a/icu4c/source/i18n/messageformat2_macros.h +++ b/icu4c/source/i18n/messageformat2_macros.h @@ -17,6 +17,7 @@ #include "unicode/format.h" #include "unicode/unistr.h" #include "plurrule_impl.h" +#include "ubidiimp.h" U_NAMESPACE_BEGIN @@ -30,8 +31,6 @@ using namespace pluralimpl; #define LEFT_CURLY_BRACE ((UChar32)0x007B) #define RIGHT_CURLY_BRACE ((UChar32)0x007D) #define HTAB ((UChar32)0x0009) -#define CR ((UChar32)0x000D) -#define LF ((UChar32)0x000A) #define IDEOGRAPHIC_SPACE ((UChar32)0x3000) #define PIPE ((UChar32)0x007C) diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index f1ecfa91fb21..6cf29c191adb 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -352,6 +352,7 @@ namespace message2 { // Formatting methods [[nodiscard]] InternalValue evalLiteral(const data_model::Literal&, UErrorCode&) const; void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; + FunctionContext makeFunctionContext(const FunctionOptions&) const; [[nodiscard]] InternalValue apply(const FunctionName&, InternalValue&&, FunctionOptions&&, MessageContext&, UErrorCode&) const; [[nodiscard]] InternalValue evalExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const; diff --git a/icu4c/source/i18n/unicode/messageformat2_function_registry.h b/icu4c/source/i18n/unicode/messageformat2_function_registry.h index bf07d22096e9..d8c636144592 100644 --- a/icu4c/source/i18n/unicode/messageformat2_function_registry.h +++ b/icu4c/source/i18n/unicode/messageformat2_function_registry.h @@ -14,6 +14,7 @@ #include "unicode/messageformat2_data_model_names.h" #include "unicode/messageformat2_formattable.h" +#include "unicode/ubidi.h" #ifndef U_HIDE_DEPRECATED_API @@ -228,6 +229,54 @@ namespace message2 { class Function; + /** + * Class implementing data from contextual options. + * See https://github.com/unicode-org/message-format-wg/pull/846 + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API FunctionContext : public UObject { + public: + /** + * Returns the locale from this context. + * + * @return Locale the context was created with. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + const Locale& getLocale() const { return locale; } + /** + * Returns the text direction from this context. + * + * @return A UBiDiDirection indicating the text direction. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + UBiDiDirection getDirection() const { return dir; } + /** + * Returns the ID from this context. + * + * @return A string to be used in formatting to parts. + * (Formatting to parts is not yet implemented.) + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + const UnicodeString& getID() const { return id; } + private: + friend class MessageFormatter; + + Locale locale; + UBiDiDirection dir; + UnicodeString id; + + FunctionContext(const Locale& loc, UBiDiDirection d, UnicodeString i) + : locale(loc), dir(d), id(i) {} + }; // class FunctionContext + /** * Interface that function factory classes must implement. * @@ -240,14 +289,13 @@ namespace message2 { * Constructs a new function object. This method is not const; * function factories with local state may be defined. * - * @param locale Locale to be used by the function. * @param status Input/output error code. * @return The new Formatter, which is non-null if U_SUCCESS(status). * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - virtual Function* createFunction(const Locale& locale, UErrorCode& status) = 0; + virtual Function* createFunction(UErrorCode& status) = 0; /** * Destructor. * @@ -278,6 +326,7 @@ namespace message2 { * Calls this Function on a FunctionValue operand and its FunctionOptions options, * returning a new pointer to a FunctionValue (which is adopted by the caller). * + * @param context The context of this function, based on its contextual options * @param operand The unnamed argument to the function. * @param options Resolved options for this function. * @param status Input/output error code @@ -285,7 +334,8 @@ namespace message2 { * @internal ICU 77 technology preview * @deprecated This API is for technology preview only. */ - virtual FunctionValue* call(FunctionValue& operand, + virtual FunctionValue* call(const FunctionContext& context, + FunctionValue& operand, FunctionOptions&& options, UErrorCode& status) = 0; /** diff --git a/icu4c/source/test/intltest/messageformat2test.h b/icu4c/source/test/intltest/messageformat2test.h index f675e4a22431..f8b3f2b4f82b 100644 --- a/icu4c/source/test/intltest/messageformat2test.h +++ b/icu4c/source/test/intltest/messageformat2test.h @@ -113,19 +113,18 @@ class Person : public FormattableObject { }; class PersonNameFactory : public FunctionFactory { - Function* createFunction(const Locale& locale, UErrorCode& status) override; + Function* createFunction(UErrorCode& status) override; virtual ~PersonNameFactory(); }; class PersonNameFunction : public Function { public: - FunctionValue* call(FunctionValue&, FunctionOptions&&, UErrorCode&) override; + FunctionValue* call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; virtual ~PersonNameFunction(); private: friend class PersonNameFactory; - const Locale locale; - PersonNameFunction(const Locale& loc) : locale(loc) {} + PersonNameFunction() {} }; class PersonNameValue : public FunctionValue { @@ -153,13 +152,13 @@ class FormattableProperties : public FormattableObject { }; class GrammarCasesFactory : public FunctionFactory { - Function* createFunction(const Locale& locale, UErrorCode& status) override; + Function* createFunction(UErrorCode& status) override; virtual ~GrammarCasesFactory(); }; class GrammarCasesFunction : public Function { public: - FunctionValue* call(FunctionValue&, FunctionOptions&&, UErrorCode&) override; + FunctionValue* call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; static MFFunctionRegistry customRegistry(UErrorCode&); }; @@ -177,18 +176,16 @@ class GrammarCasesValue : public FunctionValue { }; // class GrammarCasesValue class ListFactory : public FunctionFactory { - Function* createFunction(const Locale& locale, UErrorCode& status) override; + Function* createFunction(UErrorCode& status) override; virtual ~ListFactory(); }; class ListFunction : public Function { public: - FunctionValue* call(FunctionValue&, FunctionOptions&&, UErrorCode&) override; + FunctionValue* call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; static MFFunctionRegistry customRegistry(UErrorCode&); - ListFunction(const Locale& loc) : locale(loc) {} + ListFunction() {} virtual ~ListFunction(); - private: - Locale locale; }; class ListValue : public FunctionValue { @@ -206,7 +203,7 @@ class ListValue : public FunctionValue { }; // class ListValue class NounFunctionFactory : public FunctionFactory { - Function* createFunction(const Locale& locale, UErrorCode& status) override; + Function* createFunction(UErrorCode& status) override; virtual ~NounFunctionFactory(); }; @@ -225,7 +222,7 @@ class NounValue : public FunctionValue { }; // class NounValue class AdjectiveFunctionFactory : public FunctionFactory { - Function* createFunction(const Locale& locale, UErrorCode& status) override; + Function* createFunction(UErrorCode& status) override; virtual ~AdjectiveFunctionFactory(); }; @@ -266,14 +263,14 @@ class ResourceManager : public Formatter { class NounFunction : public Function { public: - FunctionValue* call(FunctionValue&, FunctionOptions&&, UErrorCode&) override; + FunctionValue* call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; NounFunction() { } virtual ~NounFunction(); }; class AdjectiveFunction : public Function { public: - FunctionValue* call(FunctionValue&, FunctionOptions&&, UErrorCode&) override; + FunctionValue* call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; AdjectiveFunction() { } virtual ~AdjectiveFunction(); }; diff --git a/icu4c/source/test/intltest/messageformat2test_custom.cpp b/icu4c/source/test/intltest/messageformat2test_custom.cpp index 34a453a37393..fa071a83aa1e 100644 --- a/icu4c/source/test/intltest/messageformat2test_custom.cpp +++ b/icu4c/source/test/intltest/messageformat2test_custom.cpp @@ -273,21 +273,24 @@ static bool hasStringOption(const FunctionOptionsMap& opt, return getStringOption(opt, k) == v; } -Function* PersonNameFactory::createFunction(const Locale& locale, UErrorCode& errorCode) { +Function* PersonNameFactory::createFunction(UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - PersonNameFunction* result = new PersonNameFunction(locale); + PersonNameFunction* result = new PersonNameFunction(); if (result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; } return result; } -FunctionValue* PersonNameFunction::call(FunctionValue& arg, +FunctionValue* PersonNameFunction::call(const FunctionContext& context, + FunctionValue& arg, FunctionOptions&& opts, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); + (void) context; + PersonNameValue* v = new PersonNameValue(arg, std::move(opts), errorCode); if (U_SUCCESS(errorCode) && v == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; @@ -398,11 +401,9 @@ PersonNameValue::~PersonNameValue() {} result += postfix; } -Function* GrammarCasesFactory::createFunction(const Locale& locale, UErrorCode& errorCode) { +Function* GrammarCasesFactory::createFunction(UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - (void) locale; - GrammarCasesFunction* result = new GrammarCasesFunction(); if (result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; @@ -410,11 +411,14 @@ Function* GrammarCasesFactory::createFunction(const Locale& locale, UErrorCode& return result; } -FunctionValue* GrammarCasesFunction::call(FunctionValue& arg, +FunctionValue* GrammarCasesFunction::call(const FunctionContext& context, + FunctionValue& arg, FunctionOptions&& opts, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); + (void) context; + GrammarCasesValue* v = new GrammarCasesValue(arg, std::move(opts), errorCode); if (U_SUCCESS(errorCode) && v == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; @@ -534,12 +538,10 @@ GrammarCasesValue::~GrammarCasesValue() {} See ICU4J: CustomFormatterListTest.java */ -Function* ListFactory::createFunction(const Locale& locale, UErrorCode& errorCode) { +Function* ListFactory::createFunction(UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - (void) locale; - - ListFunction* result = new ListFunction(locale); + ListFunction* result = new ListFunction(); if (result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; } @@ -547,12 +549,13 @@ Function* ListFactory::createFunction(const Locale& locale, UErrorCode& errorCod } -FunctionValue* ListFunction::call(FunctionValue& arg, +FunctionValue* ListFunction::call(const FunctionContext& context, + FunctionValue& arg, FunctionOptions&& opts, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - ListValue* v = new ListValue(locale, arg, std::move(opts), errorCode); + ListValue* v = new ListValue(context.getLocale(), arg, std::move(opts), errorCode); if (U_SUCCESS(errorCode) && v == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; } @@ -872,11 +875,9 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { } #endif -Function* NounFunctionFactory::createFunction(const Locale& locale, UErrorCode& errorCode) { +Function* NounFunctionFactory::createFunction(UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - (void) locale; - NounFunction* result = new NounFunction(); if (result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; @@ -884,7 +885,8 @@ Function* NounFunctionFactory::createFunction(const Locale& locale, UErrorCode& return result; } -FunctionValue* NounFunction::call(FunctionValue& arg, +FunctionValue* NounFunction::call(const FunctionContext&, + FunctionValue& arg, FunctionOptions&& opts, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); @@ -939,11 +941,9 @@ NounValue::NounValue(FunctionValue& arg, } } -Function* AdjectiveFunctionFactory::createFunction(const Locale& locale, UErrorCode& errorCode) { +Function* AdjectiveFunctionFactory::createFunction(UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - (void) locale; - AdjectiveFunction* result = new AdjectiveFunction(); if (result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; @@ -951,9 +951,10 @@ Function* AdjectiveFunctionFactory::createFunction(const Locale& locale, UErrorC return result; } -FunctionValue* AdjectiveFunction::call(FunctionValue& arg, - FunctionOptions&& opts, - UErrorCode& errorCode) { +FunctionValue* AdjectiveFunction::call(const FunctionContext&, + FunctionValue& arg, + FunctionOptions&& opts, + UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); AdjectiveValue* v = new AdjectiveValue(arg, std::move(opts), errorCode); diff --git a/testdata/message2/u-options.json b/testdata/message2/u-options.json new file mode 100644 index 000000000000..3e13b30a2479 --- /dev/null +++ b/testdata/message2/u-options.json @@ -0,0 +1,126 @@ +{ + "$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json", + "scenario": "u: Options", + "description": "Common options affecting the function context", + "defaultTestProperties": { + "locale": "en-US" + }, + "tests": [ + { + "src": "{#tag u:id=x}content{/ns:tag u:id=x}", + "exp": "content", + "expParts": [ + { + "type": "markup", + "kind": "open", + "id": "x", + "name": "tag" + }, + { + "type": "literal", + "value": "content" + }, + { + "type": "markup", + "kind": "close", + "id": "x", + "name": "tag" + } + ] + }, + { + "src": "{#tag u:dir=rtl u:locale=ar}content{/ns:tag}", + "exp": "content", + "expErrors": [{ "type": "bad-option" }, { "type": "bad-option" }], + "expParts": [ + { + "type": "markup", + "kind": "open", + "name": "tag" + }, + { + "type": "literal", + "value": "content" + }, + { + "type": "markup", + "kind": "close", + "name": "tag" + } + ] + }, + { + "src": "hello {4.2 :number u:locale=fr}", + "exp": "hello 4,2" + }, + { + "src": "hello {world :string u:dir=ltr u:id=foo}", + "exp": "hello world", + "expParts": [ + { + "type": "literal", + "value": "hello " + }, + { + "type": "string", + "source": "|world|", + "dir": "ltr", + "id": "foo", + "value": "world" + } + ] + }, + { + "src": "hello {world :string u:dir=rtl}", + "exp": "hello \u2067world\u2069", + "expParts": [ + { + "type": "literal", + "value": "hello " + }, + { + "type": "string", + "source": "|world|", + "dir": "rtl", + "value": "world" + } + ] + }, + { + "src": "hello {world :string u:dir=auto}", + "exp": "hello \u2068world\u2069", + "expParts": [ + { + "type": "literal", + "value": "hello " + }, + { + "type": "string", + "source": "|world|", + "dir": "auto", + "value": "world" + } + ] + }, + { + "locale": "ar", + "src": "أهلاً {بالعالم :string u:dir=rtl}", + "exp": "أهلاً \u2067بالعالم\u2069" + }, + { + "locale": "ar", + "src": "أهلاً {بالعالم :string u:dir=auto}", + "exp": "أهلاً \u2068بالعالم\u2069" + }, + { + "locale": "ar", + "src": "أهلاً {world :string u:dir=ltr}", + "exp": "أهلاً \u2066world\u2069" + }, + { + "locale": "ar", + "src": "أهلاً {بالعالم :string}", + "exp": "أهلاً \u2067بالعالم\u2069" + } + ] +} From b12bf3030f018d85253907263aa34206b6241bd4 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 15 Oct 2024 16:56:02 -0700 Subject: [PATCH 26/37] Fix date/time style bug --- icu4c/source/i18n/messageformat2_function_registry.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index db5a38a70a42..9b369e2b94a5 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -809,6 +809,9 @@ StandardFunctions::DateTime::call(const FunctionContext& context, static DateFormat::EStyle stringToStyle(UnicodeString option, UErrorCode& errorCode) { if (U_SUCCESS(errorCode)) { UnicodeString upper = option.toUpper(); + if (upper.isEmpty()) { + return DateFormat::EStyle::kShort; + } if (upper == UnicodeString("FULL")) { return DateFormat::EStyle::kFull; } @@ -821,7 +824,7 @@ static DateFormat::EStyle stringToStyle(UnicodeString option, UErrorCode& errorC if (upper == UnicodeString("SHORT")) { return DateFormat::EStyle::kShort; } - if (upper.isEmpty() || upper == UnicodeString("DEFAULT")) { + if (upper == UnicodeString("DEFAULT")) { return DateFormat::EStyle::kDefault; } errorCode = U_ILLEGAL_ARGUMENT_ERROR; From 0cf0fc866178b038c54abe1c6d61e877758c5eb9 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 15 Oct 2024 17:05:45 -0700 Subject: [PATCH 27/37] Fix test --- testdata/message2/more-functions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testdata/message2/more-functions.json b/testdata/message2/more-functions.json index 093678905b0c..7d21b69e93eb 100644 --- a/testdata/message2/more-functions.json +++ b/testdata/message2/more-functions.json @@ -130,7 +130,7 @@ }, { "comment": "Modified from icu4j test", - "srcs": [ + "src": [ ".input {$exp :datetime timeStyle=short}\n", ".input {$user :string}\n", ".local $longExp = {$exp :datetime dateStyle=long}\n", From 50aea15cbda83a0020c00f3e10367cfa64f17c36 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 15 Oct 2024 17:18:06 -0700 Subject: [PATCH 28/37] Fix datetime options bug --- icu4c/source/i18n/messageformat2_function_registry.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 9b369e2b94a5..b21a97c4d25c 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -870,7 +870,7 @@ StandardFunctions::DateTimeValue::DateTimeValue(DateTimeFactory::DateTimeType ty UnicodeString dateStyleOption = opts.getStringFunctionOption(dateStyleName); UnicodeString timeStyleOption = opts.getStringFunctionOption(timeStyleName); bool hasDateStyleOption = dateStyleOption.length() > 0; - bool hasTimeStyleOption = dateStyleOption.length() > 0; + bool hasTimeStyleOption = timeStyleOption.length() > 0; bool noOptions = opts.optionsCount() == 0; using DateTimeType = DateTimeFactory::DateTimeType; From 6f140bb81bde5ba5fc7388177599c16eab7f5302 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Wed, 16 Oct 2024 08:20:34 -0700 Subject: [PATCH 29/37] Remove FunctionFactory again --- icu4c/source/i18n/messageformat2.cpp | 3 +- .../source/i18n/messageformat2_formatter.cpp | 32 ++-- .../i18n/messageformat2_function_registry.cpp | 142 +++--------------- ...essageformat2_function_registry_internal.h | 70 +++------ icu4c/source/i18n/unicode/messageformat2.h | 2 +- .../messageformat2_function_registry.h | 51 +------ .../test/intltest/messageformat2test.cpp | 6 +- .../source/test/intltest/messageformat2test.h | 28 ---- .../intltest/messageformat2test_custom.cpp | 84 ++--------- 9 files changed, 82 insertions(+), 336 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index c6fc76bc7a07..ec68fefdad5c 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -216,14 +216,13 @@ FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& opt } // Look up the function name - FunctionFactory* functionFactory = lookupFunctionFactory(functionName, status); + Function* function = lookupFunction(functionName, status); if (U_FAILURE(status)) { // Function is unknown -- set error and use the fallback value status = U_ZERO_ERROR; context.getErrors().setUnknownFunction(functionName, status); return InternalValue::fallback(fallbackStr); } - LocalPointer function(functionFactory->createFunction(status)); // Value is not a fallback, so we can safely call takeValue() LocalPointer functionArg(rand.takeValue(status)); U_ASSERT(U_SUCCESS(status)); diff --git a/icu4c/source/i18n/messageformat2_formatter.cpp b/icu4c/source/i18n/messageformat2_formatter.cpp index e73a8ec827f0..b4fe6bd55ae6 100644 --- a/icu4c/source/i18n/messageformat2_formatter.cpp +++ b/icu4c/source/i18n/messageformat2_formatter.cpp @@ -122,22 +122,22 @@ namespace message2 { // Set up the standard function registry MFFunctionRegistry::Builder standardFunctionsBuilder(success); - LocalPointer dateTime(StandardFunctions::DateTimeFactory::dateTime(success)); - LocalPointer date(StandardFunctions::DateTimeFactory::date(success)); - LocalPointer time(StandardFunctions::DateTimeFactory::time(success)); - LocalPointer number(StandardFunctions::NumberFactory::number(success)); - LocalPointer integer(StandardFunctions::NumberFactory::integer(success)); - LocalPointer string(StandardFunctions::StringFactory::string(success)); + LocalPointer dateTime(StandardFunctions::DateTime::dateTime(success)); + LocalPointer date(StandardFunctions::DateTime::date(success)); + LocalPointer time(StandardFunctions::DateTime::time(success)); + LocalPointer number(StandardFunctions::Number::number(success)); + LocalPointer integer(StandardFunctions::Number::integer(success)); + LocalPointer string(StandardFunctions::String::string(success)); CHECK_ERROR(success); - standardFunctionsBuilder.adoptFunctionFactory(FunctionName(UnicodeString("datetime")), + standardFunctionsBuilder.adoptFunction(FunctionName(UnicodeString("datetime")), dateTime.orphan(), success) - .adoptFunctionFactory(FunctionName(UnicodeString("date")), date.orphan(), success) - .adoptFunctionFactory(FunctionName(UnicodeString("time")), time.orphan(), success) - .adoptFunctionFactory(FunctionName(UnicodeString("number")), + .adoptFunction(FunctionName(UnicodeString("date")), date.orphan(), success) + .adoptFunction(FunctionName(UnicodeString("time")), time.orphan(), success) + .adoptFunction(FunctionName(UnicodeString("number")), number.orphan(), success) - .adoptFunctionFactory(FunctionName(UnicodeString("integer")), + .adoptFunction(FunctionName(UnicodeString("integer")), integer.orphan(), success) - .adoptFunctionFactory(FunctionName(UnicodeString("string")), + .adoptFunction(FunctionName(UnicodeString("string")), string.orphan(), success); CHECK_ERROR(success); standardMFFunctionRegistry = standardFunctionsBuilder.build(); @@ -225,9 +225,9 @@ namespace message2 { return standardMFFunctionRegistry.hasFunction(functionName); } - FunctionFactory* - MessageFormatter::lookupFunctionFactory(const FunctionName& functionName, - UErrorCode& status) const { + Function* + MessageFormatter::lookupFunction(const FunctionName& functionName, + UErrorCode& status) const { NULL_ON_ERROR(status); if (isBuiltInFunction(functionName)) { @@ -235,7 +235,7 @@ namespace message2 { } if (hasCustomMFFunctionRegistry()) { const MFFunctionRegistry& customMFFunctionRegistry = getCustomMFFunctionRegistry(); - FunctionFactory* function = customMFFunctionRegistry.getFunction(functionName); + Function* function = customMFFunctionRegistry.getFunction(functionName); if (function != nullptr) { return function; } diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index b21a97c4d25c..c1321c03cac0 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -38,7 +38,6 @@ namespace message2 { // Function registry implementation -FunctionFactory::~FunctionFactory() {} Function::~Function() {} FunctionValue::~FunctionValue() {} @@ -52,9 +51,9 @@ MFFunctionRegistry MFFunctionRegistry::Builder::build() { } MFFunctionRegistry::Builder& -MFFunctionRegistry::Builder::adoptFunctionFactory(const FunctionName& functionName, - FunctionFactory* function, - UErrorCode& errorCode) { +MFFunctionRegistry::Builder::adoptFunction(const FunctionName& functionName, + Function* function, + UErrorCode& errorCode) { if (U_SUCCESS(errorCode)) { U_ASSERT(functions != nullptr); functions->put(functionName, function, errorCode); @@ -100,9 +99,9 @@ MFFunctionRegistry::Builder::~Builder() { // Returns non-owned pointer. Returns pointer rather than reference because it can fail. // Returns non-const because Function is mutable. -FunctionFactory* MFFunctionRegistry::getFunction(const FunctionName& functionName) const { +Function* MFFunctionRegistry::getFunction(const FunctionName& functionName) const { U_ASSERT(functions != nullptr); - return static_cast(functions->get(functionName)); + return static_cast(functions->get(functionName)); } UBool MFFunctionRegistry::getDefaultFormatterNameByType(const UnicodeString& type, FunctionName& name) const { @@ -227,54 +226,19 @@ MFFunctionRegistry::~MFFunctionRegistry() { // --------- Number -/* static */ StandardFunctions::NumberFactory* -StandardFunctions::NumberFactory::integer(UErrorCode& success) { - return NumberFactory::create(true, success); -} - -/* static */ StandardFunctions::NumberFactory* -StandardFunctions::NumberFactory::number(UErrorCode& success) { - return NumberFactory::create(false, success); -} - -/* static */ StandardFunctions::NumberFactory* -StandardFunctions::NumberFactory::create(bool isInteger, - UErrorCode& success) { - NULL_ON_ERROR(success); - - LocalPointer result(new NumberFactory(isInteger)); - if (!result.isValid()) { - success = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - return result.orphan(); -} - -Function* -StandardFunctions::NumberFactory::createFunction(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - Number* result = new Number(isInteger); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - /* static */ StandardFunctions::Number* -StandardFunctions::Number::integer(const Locale& loc, UErrorCode& success) { - return create(loc, true, success); +StandardFunctions::Number::integer(UErrorCode& success) { + return create(true, success); } /* static */ StandardFunctions::Number* -StandardFunctions::Number::number(const Locale& loc, UErrorCode& success) { - return create(loc, false, success); +StandardFunctions::Number::number(UErrorCode& success) { + return create(false, success); } /* static */ StandardFunctions::Number* -StandardFunctions::Number::create(const Locale& loc, bool isInteger, UErrorCode& success) { +StandardFunctions::Number::create(bool isInteger, UErrorCode& success) { NULL_ON_ERROR(success); - (void) loc; LocalPointer result(new Number(isInteger)); if (!result.isValid()) { @@ -622,7 +586,6 @@ UnicodeString StandardFunctions::NumberValue::formatToString(UErrorCode& errorCo return formattedNumber.toString(errorCode); } -StandardFunctions::NumberFactory::~NumberFactory() {} StandardFunctions::Number::~Number() {} StandardFunctions::NumberValue::~NumberValue() {} @@ -728,59 +691,23 @@ void StandardFunctions::NumberValue::selectKeys(const UnicodeString* keys, // --------- DateTime -/* -// Date/time options only -static UnicodeString defaultForOption(const UnicodeString& optionName) { - if (optionName == UnicodeString("dateStyle") - || optionName == UnicodeString("timeStyle") - || optionName == UnicodeString("style")) { - return UnicodeString("short"); - } - return {}; // Empty string is default -} -*/ - -/* static */ StandardFunctions::DateTimeFactory* -StandardFunctions::DateTimeFactory::date(UErrorCode& success) { - return DateTimeFactory::create(DateTimeType::kDate, success); -} - -/* static */ StandardFunctions::DateTimeFactory* -StandardFunctions::DateTimeFactory::time(UErrorCode& success) { - return DateTimeFactory::create(DateTimeType::kTime, success); -} - -/* static */ StandardFunctions::DateTimeFactory* -StandardFunctions::DateTimeFactory::dateTime(UErrorCode& success) { - return DateTimeFactory::create(DateTimeType::kDateTime, success); +/* static */ StandardFunctions::DateTime* +StandardFunctions::DateTime::date(UErrorCode& success) { + return DateTime::create(DateTimeType::kDate, success); } -/* static */ StandardFunctions::DateTimeFactory* -StandardFunctions::DateTimeFactory::create(DateTimeFactory::DateTimeType type, - UErrorCode& success) { - NULL_ON_ERROR(success); - - LocalPointer result(new DateTimeFactory(type)); - if (!result.isValid()) { - success = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - return result.orphan(); +/* static */ StandardFunctions::DateTime* +StandardFunctions::DateTime::time(UErrorCode& success) { + return DateTime::create(DateTimeType::kTime, success); } -Function* -StandardFunctions::DateTimeFactory::createFunction(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - DateTime* result = new DateTime(type); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; +/* static */ StandardFunctions::DateTime* +StandardFunctions::DateTime::dateTime(UErrorCode& success) { + return DateTime::create(DateTimeType::kDateTime, success); } /* static */ StandardFunctions::DateTime* -StandardFunctions::DateTime::create(DateTimeFactory::DateTimeType type, +StandardFunctions::DateTime::create(DateTime::DateTimeType type, UErrorCode& success) { NULL_ON_ERROR(success); @@ -838,7 +765,7 @@ UnicodeString StandardFunctions::DateTimeValue::formatToString(UErrorCode& statu return formattedDate; } -StandardFunctions::DateTimeValue::DateTimeValue(DateTimeFactory::DateTimeType type, +StandardFunctions::DateTimeValue::DateTimeValue(DateTime::DateTimeType type, const FunctionContext& context, FunctionValue& val, FunctionOptions&& options, @@ -873,7 +800,7 @@ StandardFunctions::DateTimeValue::DateTimeValue(DateTimeFactory::DateTimeType ty bool hasTimeStyleOption = timeStyleOption.length() > 0; bool noOptions = opts.optionsCount() == 0; - using DateTimeType = DateTimeFactory::DateTimeType; + using DateTimeType = DateTime::DateTimeType; bool useStyle = (type == DateTimeType::kDateTime && (hasDateStyleOption || hasTimeStyleOption @@ -1090,24 +1017,11 @@ StandardFunctions::DateTimeValue::DateTimeValue(DateTimeFactory::DateTimeType ty formattedDate = result; } -StandardFunctions::DateTimeFactory::~DateTimeFactory() {} StandardFunctions::DateTime::~DateTime() {} StandardFunctions::DateTimeValue::~DateTimeValue() {} // --------- String -/* static */ StandardFunctions::StringFactory* -StandardFunctions::StringFactory::string(UErrorCode& success) { - NULL_ON_ERROR(success); - - LocalPointer result(new StringFactory()); - if (!result.isValid()) { - success = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - return result.orphan(); -} - /* static */ StandardFunctions::String* StandardFunctions::String::string(UErrorCode& success) { NULL_ON_ERROR(success); @@ -1120,17 +1034,6 @@ StandardFunctions::String::string(UErrorCode& success) { return result.orphan(); } -Function* -StandardFunctions::StringFactory::createFunction(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - String* result = new String(); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - extern UnicodeString formattableToString(const Locale&, const UBiDiDirection, const Formattable&, @@ -1185,7 +1088,6 @@ void StandardFunctions::StringValue::selectKeys(const UnicodeString* keys, } } -StandardFunctions::StringFactory::~StringFactory() {} StandardFunctions::String::~String() {} StandardFunctions::StringValue::~StringValue() {} diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index 14b5f0ee8bbb..b117510a4588 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -34,33 +34,12 @@ namespace message2 { class DateTime; class DateTimeValue; - class DateTimeFactory : public FunctionFactory { - public: - Function* createFunction(UErrorCode& status) override; - static DateTimeFactory* date(UErrorCode&); - static DateTimeFactory* time(UErrorCode&); - static DateTimeFactory* dateTime(UErrorCode&); - virtual ~DateTimeFactory(); - private: - friend class DateTime; - friend class DateTimeValue; - - typedef enum DateTimeType { - kDate, - kTime, - kDateTime - } DateTimeType; - - DateTimeType type; - - static DateTimeFactory* create(const DateTimeType, - UErrorCode&); - - DateTimeFactory(const DateTimeType t) : type(t) {} - }; // class DateTimeFactory - class DateTime : public Function { public: + static DateTime* date(UErrorCode&); + static DateTime* time(UErrorCode&); + static DateTime* dateTime(UErrorCode&); + FunctionValue* call(const FunctionContext& context, FunctionValue& operand, FunctionOptions&& options, @@ -71,31 +50,24 @@ namespace message2 { friend class DateTimeFactory; friend class DateTimeValue; - const DateTimeFactory::DateTimeType type; - static DateTime* create(DateTimeFactory::DateTimeType, - UErrorCode&); - DateTime(DateTimeFactory::DateTimeType t) : type(t) {} + typedef enum DateTimeType { + kDate, + kTime, + kDateTime + } DateTimeType; + + const DateTimeType type; + static DateTime* create(DateTimeType, UErrorCode&); + DateTime(DateTimeType t) : type(t) {} const LocalPointer icuFormatter; }; class NumberValue; - class NumberFactory : public FunctionFactory { - public: - Function* createFunction(UErrorCode& status) override; - static NumberFactory* integer(UErrorCode& success); - static NumberFactory* number(UErrorCode& success); - virtual ~NumberFactory(); - private: - static NumberFactory* create(bool, UErrorCode&); - NumberFactory(bool isInt) : isInteger(isInt) {} - bool isInteger; - }; // class NumberFactory - class Number : public Function { public: - static Number* integer(const Locale& loc, UErrorCode& success); - static Number* number(const Locale& loc, UErrorCode& success); + static Number* integer(UErrorCode& success); + static Number* number( UErrorCode& success); FunctionValue* call(const FunctionContext& context, FunctionValue& operand, @@ -113,7 +85,7 @@ namespace message2 { PLURAL_EXACT } PluralType; - static Number* create(const Locale&, bool, UErrorCode&); + static Number* create(bool, UErrorCode&); Number(bool isInt) : isInteger(isInt) /*, icuFormatter(number::NumberFormatter::withLocale(loc))*/ {} // These options have their own accessor methods, since they have different default values. @@ -169,18 +141,10 @@ namespace message2 { friend class DateTime; UnicodeString formattedDate; - DateTimeValue(DateTimeFactory::DateTimeType type, const FunctionContext& context, + DateTimeValue(DateTime::DateTimeType type, const FunctionContext& context, FunctionValue&, FunctionOptions&&, UErrorCode&); }; // class DateTimeValue - class StringFactory : public FunctionFactory { - public: - Function* createFunction(UErrorCode& status) override; - static StringFactory* string(UErrorCode& status); - virtual ~StringFactory(); - private: - }; // class StringFactory - class String : public Function { public: FunctionValue* call(const FunctionContext& context, diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index 6cf29c191adb..a725a37898df 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -376,7 +376,7 @@ namespace message2 { bool isFunction(const FunctionName& fn) const { return isBuiltInFunction(fn) || isCustomFunction(fn); } void setNotSelectableError(MessageContext&, const InternalValue&, UErrorCode&) const; // Result is not adopted - FunctionFactory* lookupFunctionFactory(const FunctionName&, UErrorCode&) const; + Function* lookupFunction(const FunctionName&, UErrorCode&) const; bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const; // Checking for resolution errors diff --git a/icu4c/source/i18n/unicode/messageformat2_function_registry.h b/icu4c/source/i18n/unicode/messageformat2_function_registry.h index d8c636144592..639958cd3100 100644 --- a/icu4c/source/i18n/unicode/messageformat2_function_registry.h +++ b/icu4c/source/i18n/unicode/messageformat2_function_registry.h @@ -29,7 +29,7 @@ namespace message2 { using namespace data_model; - class FunctionFactory; + class Function; /** * Defines mappings from names of formatters and selectors to functions implementing them. @@ -48,19 +48,19 @@ namespace message2 { public: /** - * Looks up a function factory by the name of the function. The result is non-const, + * Looks up a function by the name of the function. The result is non-const, * since functions may have local state. Returns the result by pointer * rather than by reference since it can fail. * * @param functionName Name of the desired function. - * @return A pointer to the function factory registered under `functionName`, or null + * @return A pointer to the function registered under `functionName`, or null * if no function was registered under that name. The pointer is not owned * by the caller. * - * @internal ICU 75 technology preview + * @internal ICU 77 technology preview * @deprecated This API is for technology preview only. */ - FunctionFactory* getFunction(const FunctionName& functionName) const; + Function* getFunction(const FunctionName& functionName) const; /** * Looks up a function by a type tag. This method gets the name of the default formatter registered * for that type. If no formatter was explicitly registered for this type, it returns false. @@ -125,9 +125,9 @@ namespace message2 { * @internal ICU 77 technology preview * @deprecated This API is for technology preview only. */ - Builder& adoptFunctionFactory(const data_model::FunctionName& functionName, - FunctionFactory* function, - UErrorCode& errorCode); + Builder& adoptFunction(const data_model::FunctionName& functionName, + Function* function, + UErrorCode& errorCode); /** * Registers a formatter factory to a given type tag. * (See `FormattableObject` for details on type tags.) @@ -277,41 +277,6 @@ namespace message2 { : locale(loc), dir(d), id(i) {} }; // class FunctionContext - /** - * Interface that function factory classes must implement. - * - * @internal ICU 77 technology preview - * @deprecated This API is for technology preview only. - */ - class U_I18N_API FunctionFactory : public UObject { - public: - /** - * Constructs a new function object. This method is not const; - * function factories with local state may be defined. - * - * @param status Input/output error code. - * @return The new Formatter, which is non-null if U_SUCCESS(status). - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual Function* createFunction(UErrorCode& status) = 0; - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~FunctionFactory(); - /** - * Copy constructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FunctionFactory& operator=(const FunctionFactory&) = delete; - }; // class FunctionFactory - class FunctionValue; /** diff --git a/icu4c/source/test/intltest/messageformat2test.cpp b/icu4c/source/test/intltest/messageformat2test.cpp index a7f6d727addb..0c42c83f0ecd 100644 --- a/icu4c/source/test/intltest/messageformat2test.cpp +++ b/icu4c/source/test/intltest/messageformat2test.cpp @@ -278,7 +278,7 @@ void TestMessageFormat2::testAPICustomFunctions() { // Set up custom function registry MFFunctionRegistry::Builder builder(errorCode); MFFunctionRegistry functionRegistry = - builder.adoptFunctionFactory(data_model::FunctionName("person"), new PersonNameFactory(), errorCode) + builder.adoptFunction(data_model::FunctionName("person"), new PersonNameFunction(), errorCode) .build(); Person* person = new Person(UnicodeString("Mr."), UnicodeString("John"), UnicodeString("Doe")); @@ -320,8 +320,8 @@ void TestMessageFormat2::testAPICustomFunctions() { MFFunctionRegistry::Builder builderByType(errorCode); FunctionName personFunctionName("person"); MFFunctionRegistry functionRegistryByType = - builderByType.adoptFunctionFactory(personFunctionName, - new PersonNameFactory(), + builderByType.adoptFunction(personFunctionName, + new PersonNameFunction(), errorCode) .setDefaultFormatterNameByType("person", personFunctionName, diff --git a/icu4c/source/test/intltest/messageformat2test.h b/icu4c/source/test/intltest/messageformat2test.h index f8b3f2b4f82b..b1a932ddcbd6 100644 --- a/icu4c/source/test/intltest/messageformat2test.h +++ b/icu4c/source/test/intltest/messageformat2test.h @@ -112,18 +112,10 @@ class Person : public FormattableObject { const UnicodeString tagName; }; -class PersonNameFactory : public FunctionFactory { - Function* createFunction(UErrorCode& status) override; - virtual ~PersonNameFactory(); -}; - class PersonNameFunction : public Function { public: FunctionValue* call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; virtual ~PersonNameFunction(); - private: - friend class PersonNameFactory; - PersonNameFunction() {} }; @@ -151,11 +143,6 @@ class FormattableProperties : public FormattableObject { const UnicodeString tagName; }; -class GrammarCasesFactory : public FunctionFactory { - Function* createFunction(UErrorCode& status) override; - virtual ~GrammarCasesFactory(); -}; - class GrammarCasesFunction : public Function { public: FunctionValue* call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; @@ -175,11 +162,6 @@ class GrammarCasesValue : public FunctionValue { void getDativeAndGenitive(const UnicodeString&, UnicodeString& result) const; }; // class GrammarCasesValue -class ListFactory : public FunctionFactory { - Function* createFunction(UErrorCode& status) override; - virtual ~ListFactory(); -}; - class ListFunction : public Function { public: FunctionValue* call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; @@ -202,11 +184,6 @@ class ListValue : public FunctionValue { UErrorCode&); }; // class ListValue -class NounFunctionFactory : public FunctionFactory { - Function* createFunction(UErrorCode& status) override; - virtual ~NounFunctionFactory(); -}; - class NounValue : public FunctionValue { public: UnicodeString formatToString(UErrorCode&) const override; @@ -221,11 +198,6 @@ class NounValue : public FunctionValue { UErrorCode&); }; // class NounValue -class AdjectiveFunctionFactory : public FunctionFactory { - Function* createFunction(UErrorCode& status) override; - virtual ~AdjectiveFunctionFactory(); -}; - class AdjectiveValue : public FunctionValue { public: UnicodeString formatToString(UErrorCode&) const override; diff --git a/icu4c/source/test/intltest/messageformat2test_custom.cpp b/icu4c/source/test/intltest/messageformat2test_custom.cpp index fa071a83aa1e..dfbd0cdb199a 100644 --- a/icu4c/source/test/intltest/messageformat2test_custom.cpp +++ b/icu4c/source/test/intltest/messageformat2test_custom.cpp @@ -32,9 +32,9 @@ void TestMessageFormat2::testPersonFormatter(IcuTestErrorCode& errorCode) { CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) - .adoptFunctionFactory(FunctionName("person"), - new PersonNameFactory(), - errorCode) + .adoptFunction(FunctionName("person"), + new PersonNameFunction(), + errorCode) .build()); UnicodeString name = "name"; LocalPointer person(new Person(UnicodeString("Mr."), UnicodeString("John"), UnicodeString("Doe"))); @@ -100,9 +100,9 @@ void TestMessageFormat2::testCustomFunctionsComplexMessage(IcuTestErrorCode& err CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) - .adoptFunctionFactory(FunctionName("person"), - new PersonNameFactory(), - errorCode) + .adoptFunction(FunctionName("person"), + new PersonNameFunction(), + errorCode) .build()); UnicodeString host = "host"; UnicodeString hostGender = "hostGender"; @@ -192,12 +192,12 @@ void TestMessageFormat2::testComplexOptions(IcuTestErrorCode& errorCode) { CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) - .adoptFunctionFactory(FunctionName("noun"), - new NounFunctionFactory(), - errorCode) - .adoptFunctionFactory(FunctionName("adjective"), - new AdjectiveFunctionFactory(), - errorCode) + .adoptFunction(FunctionName("noun"), + new NounFunction(), + errorCode) + .adoptFunction(FunctionName("adjective"), + new AdjectiveFunction(), + errorCode) .build()); UnicodeString name = "name"; TestCase::Builder testBuilder; @@ -273,16 +273,6 @@ static bool hasStringOption(const FunctionOptionsMap& opt, return getStringOption(opt, k) == v; } -Function* PersonNameFactory::createFunction(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - PersonNameFunction* result = new PersonNameFunction(); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - FunctionValue* PersonNameFunction::call(const FunctionContext& context, FunctionValue& arg, FunctionOptions&& opts, @@ -370,7 +360,6 @@ PersonNameValue::PersonNameValue(FunctionValue& arg, FormattableProperties::~FormattableProperties() {} Person::~Person() {} -PersonNameFactory::~PersonNameFactory() {} PersonNameValue::~PersonNameValue() {} /* @@ -401,16 +390,6 @@ PersonNameValue::~PersonNameValue() {} result += postfix; } -Function* GrammarCasesFactory::createFunction(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - GrammarCasesFunction* result = new GrammarCasesFunction(); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - FunctionValue* GrammarCasesFunction::call(const FunctionContext& context, FunctionValue& arg, FunctionOptions&& opts, @@ -479,7 +458,7 @@ void TestMessageFormat2::testGrammarCasesFormatter(IcuTestErrorCode& errorCode) CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry = MFFunctionRegistry::Builder(errorCode) - .adoptFunctionFactory(FunctionName("grammarBB"), new GrammarCasesFactory(), errorCode) + .adoptFunction(FunctionName("grammarBB"), new GrammarCasesFunction(), errorCode) .build(); TestCase::Builder testBuilder; @@ -531,24 +510,12 @@ void TestMessageFormat2::testGrammarCasesFormatter(IcuTestErrorCode& errorCode) TestUtils::runTestCase(*this, test, errorCode); } -GrammarCasesFactory::~GrammarCasesFactory() {} GrammarCasesValue::~GrammarCasesValue() {} /* See ICU4J: CustomFormatterListTest.java */ -Function* ListFactory::createFunction(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - ListFunction* result = new ListFunction(); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - - FunctionValue* ListFunction::call(const FunctionContext& context, FunctionValue& arg, FunctionOptions&& opts, @@ -633,7 +600,6 @@ message2::ListValue::ListValue(const Locale& locale, } } -ListFactory::~ListFactory() {} ListValue::~ListValue() {} ListFunction::~ListFunction() {} @@ -650,7 +616,7 @@ void TestMessageFormat2::testListFormatter(IcuTestErrorCode& errorCode) { TestCase::Builder testBuilder; MFFunctionRegistry reg = MFFunctionRegistry::Builder(errorCode) - .adoptFunctionFactory(FunctionName("listformat"), new ListFactory(), errorCode) + .adoptFunction(FunctionName("listformat"), new ListFunction(), errorCode) .build(); CHECK_ERROR(errorCode); @@ -875,16 +841,6 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { } #endif -Function* NounFunctionFactory::createFunction(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - NounFunction* result = new NounFunction(); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - FunctionValue* NounFunction::call(const FunctionContext&, FunctionValue& arg, FunctionOptions&& opts, @@ -941,16 +897,6 @@ NounValue::NounValue(FunctionValue& arg, } } -Function* AdjectiveFunctionFactory::createFunction(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - AdjectiveFunction* result = new AdjectiveFunction(); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - FunctionValue* AdjectiveFunction::call(const FunctionContext&, FunctionValue& arg, FunctionOptions&& opts, @@ -1017,8 +963,6 @@ AdjectiveValue::AdjectiveValue(FunctionValue& arg, } } -NounFunctionFactory::~NounFunctionFactory() {} -AdjectiveFunctionFactory::~AdjectiveFunctionFactory() {} NounFunction::~NounFunction() {} AdjectiveFunction::~AdjectiveFunction() {} NounValue::~NounValue() {} From c100e3558d27cc23f699a1a603a806b3a374e37e Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Wed, 16 Oct 2024 08:42:22 -0700 Subject: [PATCH 30/37] Re-add ResourceManager test --- .../source/test/intltest/messageformat2test.h | 31 ++++---- .../intltest/messageformat2test_custom.cpp | 78 +++++++++++-------- 2 files changed, 62 insertions(+), 47 deletions(-) diff --git a/icu4c/source/test/intltest/messageformat2test.h b/icu4c/source/test/intltest/messageformat2test.h index b1a932ddcbd6..87b5b48a3d17 100644 --- a/icu4c/source/test/intltest/messageformat2test.h +++ b/icu4c/source/test/intltest/messageformat2test.h @@ -61,7 +61,7 @@ class TestMessageFormat2: public IntlTest { void testCustomFunctionsComplexMessage(IcuTestErrorCode&); void testGrammarCasesFormatter(IcuTestErrorCode&); void testListFormatter(IcuTestErrorCode&); - // void testMessageRefFormatter(IcuTestErrorCode&); + void testMessageRefFormatter(IcuTestErrorCode&); void testComplexOptions(IcuTestErrorCode&); // Feature tests @@ -212,26 +212,31 @@ class AdjectiveValue : public FunctionValue { UErrorCode&); }; // class AdjectiveValue -/* -class ResourceManagerFactory : public FormatterFactory { - public: - Formatter* createFormatter(const Locale&, UErrorCode&) override; -}; -class ResourceManager : public Formatter { +class ResourceManager : public Function { public: - FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override; + FunctionValue* call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; static MFFunctionRegistry customRegistry(UErrorCode&); static Hashtable* properties(UErrorCode&); static UnicodeString propertiesAsString(const Hashtable&); static Hashtable* parseProperties(const UnicodeString&, UErrorCode&); + ResourceManager() {} + virtual ~ResourceManager(); +}; +class ResourceManagerValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const override; + ResourceManagerValue(); + virtual ~ResourceManagerValue(); private: - friend class ResourceManagerFactory; - ResourceManager(const Locale& loc) : locale(loc) {} - const Locale& locale; -}; -*/ + friend class ResourceManager; + + UnicodeString formattedString; + ResourceManagerValue(FunctionValue&, + FunctionOptions&&, + UErrorCode&); +}; // class ResourceManagerValue class NounFunction : public Function { public: diff --git a/icu4c/source/test/intltest/messageformat2test_custom.cpp b/icu4c/source/test/intltest/messageformat2test_custom.cpp index dfbd0cdb199a..f0748b185205 100644 --- a/icu4c/source/test/intltest/messageformat2test_custom.cpp +++ b/icu4c/source/test/intltest/messageformat2test_custom.cpp @@ -240,7 +240,7 @@ void TestMessageFormat2::testCustomFunctions() { testCustomFunctionsComplexMessage(errorCode); testGrammarCasesFormatter(errorCode); testListFormatter(errorCode); - // testMessageRefFormatter(errorCode); + testMessageRefFormatter(errorCode); testComplexOptions(errorCode); } @@ -641,7 +641,6 @@ void TestMessageFormat2::testListFormatter(IcuTestErrorCode& errorCode) { See ICU4J: CustomFormatterMessageRefTest.java */ -#if false /* static */ Hashtable* message2::ResourceManager::properties(UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); @@ -675,47 +674,54 @@ void TestMessageFormat2::testListFormatter(IcuTestErrorCode& errorCode) { return nullptr; } -Formatter* ResourceManagerFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return nullptr; - } - - Formatter* result = new ResourceManager(locale); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - using Arguments = MessageArguments; -// TODO: The next test is commented out because we need to write code -// to convert an options map to a MessageArguments (mapping FormattedPlaceholder -// back to Formattable) - static Arguments localToGlobal(const FunctionOptionsMap& opts, UErrorCode& status) { if (U_FAILURE(status)) { return {}; } std::map result; for (auto iter = opts.cbegin(); iter != opts.cend(); ++iter) { - result[iter->first] = iter->second->getSource(status); + result[iter->first] = iter->second->getOperand(); } return MessageArguments(result, status); } -message2::FormattedPlaceholder ResourceManager::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { +FunctionValue* ResourceManager::call(const FunctionContext&, + FunctionValue& arg, + FunctionOptions&& options, + UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + LocalPointer + result(new ResourceManagerValue(arg, std::move(options), errorCode)); + + if (U_SUCCESS(errorCode) && !result.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result.orphan(); +} + +UnicodeString message2::ResourceManagerValue::formatToString(UErrorCode&) const { + return formattedString; +} + +message2::ResourceManagerValue::ResourceManagerValue(FunctionValue& arg, + FunctionOptions&& options, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - return {}; + return; } - message2::FormattedPlaceholder errorVal = message2::FormattedPlaceholder("msgref"); + operand = arg.getOperand(); + opts = std::move(options); // Tests don't cover composition, so no need to merge options - const Formattable* toFormat = arg.getSource(errorCode); + const Formattable* toFormat = &operand; // Check for null or fallback if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { errorCode = U_MF_FORMATTING_ERROR; - return errorVal; + return; } UnicodeString in; switch (toFormat->getType()) { @@ -725,22 +731,25 @@ message2::FormattedPlaceholder ResourceManager::format(FormattedPlaceholder&& ar } default: { // Ignore non-strings - return errorVal; + return; } } - FunctionOptionsMap opt = FunctionOptions::getOptions(std::move(options)); - bool hasProperties = opt.count("resbundle") > 0 && opt["resbundle"].getValue().getType() == UFMT_OBJECT && opt["resbundle"].getValue().getObject(errorCode)->tag() == u"properties"; + FunctionOptionsMap opt = opts.getOptions(); + bool hasProperties = opt.count("resbundle") > 0 + && opt["resbundle"]->getOperand().getType() == UFMT_OBJECT + && opt["resbundle"]->getOperand().getObject(errorCode)->tag() == u"properties"; // If properties were provided, look up the given string in the properties, // yielding a message if (hasProperties) { - const FormattableProperties* properties = reinterpret_cast(opt["resbundle"].getValue().getObject(errorCode)); + const FormattableProperties* properties = reinterpret_cast + (opt["resbundle"]->getOperand().getObject(errorCode)); U_ASSERT(U_SUCCESS(errorCode)); UnicodeString* msg = static_cast(properties->properties->get(in)); if (msg == nullptr) { // No message given for this key -- error out errorCode = U_MF_FORMATTING_ERROR; - return errorVal; + return; } MessageFormatter::Builder mfBuilder(errorCode); UParseError parseErr; @@ -748,7 +757,7 @@ message2::FormattedPlaceholder ResourceManager::format(FormattedPlaceholder&& ar MessageFormatter mf = mfBuilder.setPattern(*msg, parseErr, errorCode).build(errorCode); Arguments arguments = localToGlobal(opt, errorCode); if (U_FAILURE(errorCode)) { - return errorVal; + return; } UErrorCode savedStatus = errorCode; @@ -759,14 +768,16 @@ message2::FormattedPlaceholder ResourceManager::format(FormattedPlaceholder&& ar if (U_FAILURE(errorCode)) { errorCode = savedStatus; } - return arg.withOutput(FormattedValue(std::move(result)), errorCode); + formattedString = result; } else { // Properties must be provided errorCode = U_MF_FORMATTING_ERROR; } - return errorVal; + return; } +ResourceManager::~ResourceManager() {} +ResourceManagerValue::~ResourceManagerValue() {} void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { CHECK_ERROR(errorCode); @@ -779,7 +790,7 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { return; } MFFunctionRegistry reg = MFFunctionRegistry::Builder(errorCode) - .adoptFormatter(FunctionName("msgRef"), new ResourceManagerFactory(), errorCode) + .adoptFunction(FunctionName("msgRef"), new ResourceManager(), errorCode) .build(); CHECK_ERROR(errorCode); @@ -839,7 +850,6 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { .build(); TestUtils::runTestCase(*this, test, errorCode); } -#endif FunctionValue* NounFunction::call(const FunctionContext&, FunctionValue& arg, From c199c1cf1957ccf5c66f6cbf53ef88e99b97d4e2 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Thu, 17 Oct 2024 10:17:20 -0700 Subject: [PATCH 31/37] Make Function::call() return a LocalPointer --- .../i18n/messageformat2_function_registry.cpp | 38 +++--- ...essageformat2_function_registry_internal.h | 6 +- .../messageformat2_function_registry.h | 12 +- .../source/test/intltest/messageformat2test.h | 12 +- .../intltest/messageformat2test_custom.cpp | 110 ++++++++++-------- 5 files changed, 105 insertions(+), 73 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index c1321c03cac0..075b7989ccd9 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -248,17 +248,19 @@ StandardFunctions::Number::create(bool isInteger, UErrorCode& success) { return result.orphan(); } -FunctionValue* StandardFunctions::Number::call(const FunctionContext& context, +LocalPointer StandardFunctions::Number::call(const FunctionContext& context, FunctionValue& operand, FunctionOptions&& options, UErrorCode& errorCode) { - LocalPointer + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + LocalPointer val(new NumberValue(*this, context, operand, std::move(options), errorCode)); - if (val.isValid()) { - return val.orphan(); + if (!val.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; } - errorCode = U_MEMORY_ALLOCATION_ERROR; - return nullptr; + return val; } /* static */ number::LocalizedNumberFormatter StandardFunctions::formatterForOptions(const Number& number, @@ -719,15 +721,17 @@ StandardFunctions::DateTime::create(DateTime::DateTimeType type, return result.orphan(); } -FunctionValue* +LocalPointer StandardFunctions::DateTime::call(const FunctionContext& context, FunctionValue& val, FunctionOptions&& opts, UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - auto result = new DateTimeValue(type, context, val, std::move(opts), errorCode); - if (result == nullptr) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + LocalPointer + result(new DateTimeValue(type, context, val, std::move(opts), errorCode)); + if (!result.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } return result; @@ -1039,12 +1043,20 @@ extern UnicodeString formattableToString(const Locale&, const Formattable&, UErrorCode&); -FunctionValue* +LocalPointer StandardFunctions::String::call(const FunctionContext& context, FunctionValue& val, FunctionOptions&& opts, UErrorCode& errorCode) { - return new StringValue(context, val, std::move(opts), errorCode); + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + LocalPointer + result(new StringValue(context, val, std::move(opts), errorCode)); + if (!result.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; } UnicodeString StandardFunctions::StringValue::formatToString(UErrorCode& errorCode) const { diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index b117510a4588..2ef20787357f 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -40,7 +40,7 @@ namespace message2 { static DateTime* time(UErrorCode&); static DateTime* dateTime(UErrorCode&); - FunctionValue* call(const FunctionContext& context, + LocalPointer call(const FunctionContext& context, FunctionValue& operand, FunctionOptions&& options, UErrorCode& errorCode) override; @@ -69,7 +69,7 @@ namespace message2 { static Number* integer(UErrorCode& success); static Number* number( UErrorCode& success); - FunctionValue* call(const FunctionContext& context, + LocalPointer call(const FunctionContext& context, FunctionValue& operand, FunctionOptions&& options, UErrorCode& errorCode) override; @@ -147,7 +147,7 @@ namespace message2 { class String : public Function { public: - FunctionValue* call(const FunctionContext& context, + LocalPointer call(const FunctionContext& context, FunctionValue& val, FunctionOptions&& opts, UErrorCode& errorCode) override; diff --git a/icu4c/source/i18n/unicode/messageformat2_function_registry.h b/icu4c/source/i18n/unicode/messageformat2_function_registry.h index 639958cd3100..1ace3e8274af 100644 --- a/icu4c/source/i18n/unicode/messageformat2_function_registry.h +++ b/icu4c/source/i18n/unicode/messageformat2_function_registry.h @@ -289,20 +289,22 @@ namespace message2 { public: /** * Calls this Function on a FunctionValue operand and its FunctionOptions options, - * returning a new pointer to a FunctionValue (which is adopted by the caller). + * returning a LocalPointer to a FunctionValue. * * @param context The context of this function, based on its contextual options * @param operand The unnamed argument to the function. * @param options Resolved options for this function. * @param status Input/output error code + * @return The function value that is the result of calling this function on + * the arguments. * * @internal ICU 77 technology preview * @deprecated This API is for technology preview only. */ - virtual FunctionValue* call(const FunctionContext& context, - FunctionValue& operand, - FunctionOptions&& options, - UErrorCode& status) = 0; + virtual LocalPointer call(const FunctionContext& context, + FunctionValue& operand, + FunctionOptions&& options, + UErrorCode& status) = 0; /** * Destructor. * diff --git a/icu4c/source/test/intltest/messageformat2test.h b/icu4c/source/test/intltest/messageformat2test.h index 87b5b48a3d17..83f901c36283 100644 --- a/icu4c/source/test/intltest/messageformat2test.h +++ b/icu4c/source/test/intltest/messageformat2test.h @@ -114,7 +114,7 @@ class Person : public FormattableObject { class PersonNameFunction : public Function { public: - FunctionValue* call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; + LocalPointer call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; virtual ~PersonNameFunction(); PersonNameFunction() {} }; @@ -145,7 +145,7 @@ class FormattableProperties : public FormattableObject { class GrammarCasesFunction : public Function { public: - FunctionValue* call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; + LocalPointer call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; static MFFunctionRegistry customRegistry(UErrorCode&); }; @@ -164,7 +164,7 @@ class GrammarCasesValue : public FunctionValue { class ListFunction : public Function { public: - FunctionValue* call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; + LocalPointer call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; static MFFunctionRegistry customRegistry(UErrorCode&); ListFunction() {} virtual ~ListFunction(); @@ -215,7 +215,7 @@ class AdjectiveValue : public FunctionValue { class ResourceManager : public Function { public: - FunctionValue* call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; + LocalPointer call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; static MFFunctionRegistry customRegistry(UErrorCode&); static Hashtable* properties(UErrorCode&); static UnicodeString propertiesAsString(const Hashtable&); @@ -240,14 +240,14 @@ class ResourceManagerValue : public FunctionValue { class NounFunction : public Function { public: - FunctionValue* call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; + LocalPointer call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; NounFunction() { } virtual ~NounFunction(); }; class AdjectiveFunction : public Function { public: - FunctionValue* call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; + LocalPointer call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; AdjectiveFunction() { } virtual ~AdjectiveFunction(); }; diff --git a/icu4c/source/test/intltest/messageformat2test_custom.cpp b/icu4c/source/test/intltest/messageformat2test_custom.cpp index f0748b185205..75a1af019ea3 100644 --- a/icu4c/source/test/intltest/messageformat2test_custom.cpp +++ b/icu4c/source/test/intltest/messageformat2test_custom.cpp @@ -273,16 +273,17 @@ static bool hasStringOption(const FunctionOptionsMap& opt, return getStringOption(opt, k) == v; } -FunctionValue* PersonNameFunction::call(const FunctionContext& context, - FunctionValue& arg, - FunctionOptions&& opts, - UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - +LocalPointer PersonNameFunction::call(const FunctionContext& context, + FunctionValue& arg, + FunctionOptions&& opts, + UErrorCode& errorCode) { (void) context; - PersonNameValue* v = new PersonNameValue(arg, std::move(opts), errorCode); - if (U_SUCCESS(errorCode) && v == nullptr) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + LocalPointer v(new PersonNameValue(arg, std::move(opts), errorCode)); + if (!v.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } return v; @@ -390,16 +391,19 @@ PersonNameValue::~PersonNameValue() {} result += postfix; } -FunctionValue* GrammarCasesFunction::call(const FunctionContext& context, - FunctionValue& arg, - FunctionOptions&& opts, - UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - +LocalPointer +GrammarCasesFunction::call(const FunctionContext& context, + FunctionValue& arg, + FunctionOptions&& opts, + UErrorCode& errorCode) { (void) context; - GrammarCasesValue* v = new GrammarCasesValue(arg, std::move(opts), errorCode); - if (U_SUCCESS(errorCode) && v == nullptr) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + + LocalPointer v(new GrammarCasesValue(arg, std::move(opts), errorCode)); + if (!v.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } return v; @@ -516,14 +520,18 @@ GrammarCasesValue::~GrammarCasesValue() {} See ICU4J: CustomFormatterListTest.java */ -FunctionValue* ListFunction::call(const FunctionContext& context, - FunctionValue& arg, - FunctionOptions&& opts, - UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); +LocalPointer +ListFunction::call(const FunctionContext& context, + FunctionValue& arg, + FunctionOptions&& opts, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } - ListValue* v = new ListValue(context.getLocale(), arg, std::move(opts), errorCode); - if (U_SUCCESS(errorCode) && v == nullptr) { + LocalPointer + v(new ListValue(context.getLocale(), arg, std::move(opts), errorCode)); + if (!v.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } return v; @@ -687,20 +695,22 @@ static Arguments localToGlobal(const FunctionOptionsMap& opts, UErrorCode& statu return MessageArguments(result, status); } -FunctionValue* ResourceManager::call(const FunctionContext&, - FunctionValue& arg, - FunctionOptions&& options, - UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); +LocalPointer +ResourceManager::call(const FunctionContext&, + FunctionValue& arg, + FunctionOptions&& options, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } - LocalPointer + LocalPointer result(new ResourceManagerValue(arg, std::move(options), errorCode)); - if (U_SUCCESS(errorCode) && !result.isValid()) { + if (!result.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; - return nullptr; } - return result.orphan(); + return result; } UnicodeString message2::ResourceManagerValue::formatToString(UErrorCode&) const { @@ -851,14 +861,18 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { TestUtils::runTestCase(*this, test, errorCode); } -FunctionValue* NounFunction::call(const FunctionContext&, - FunctionValue& arg, - FunctionOptions&& opts, - UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); +LocalPointer +NounFunction::call(const FunctionContext&, + FunctionValue& arg, + FunctionOptions&& opts, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } - NounValue* v = new NounValue(arg, std::move(opts), errorCode); - if (U_SUCCESS(errorCode) && v == nullptr) { + LocalPointer + v(new NounValue(arg, std::move(opts), errorCode)); + if (!v.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } return v; @@ -907,14 +921,18 @@ NounValue::NounValue(FunctionValue& arg, } } -FunctionValue* AdjectiveFunction::call(const FunctionContext&, - FunctionValue& arg, - FunctionOptions&& opts, - UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); +LocalPointer +AdjectiveFunction::call(const FunctionContext&, + FunctionValue& arg, + FunctionOptions&& opts, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } - AdjectiveValue* v = new AdjectiveValue(arg, std::move(opts), errorCode); - if (U_SUCCESS(errorCode) && v == nullptr) { + LocalPointer + v(new AdjectiveValue(arg, std::move(opts), errorCode)); + if (!v.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } return v; From f62fc644cbc246b3b9dc9c4edb08e668320fffbe Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Thu, 17 Oct 2024 10:35:28 -0700 Subject: [PATCH 32/37] Use an array of indices for `prefs` in `selectKeys()` --- icu4c/source/i18n/messageformat2.cpp | 7 ++++--- icu4c/source/i18n/messageformat2_function_registry.cpp | 10 +++++----- .../i18n/messageformat2_function_registry_internal.h | 4 ++-- .../i18n/unicode/messageformat2_function_registry.h | 8 +++++--- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index ec68fefdad5c..5a95ad63457f 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -420,12 +420,12 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, LocalArray adoptedKeys(keysArr); // Create an array to hold the output - UnicodeString* prefsArr = new UnicodeString[keysLen]; + int32_t* prefsArr = new int32_t[keysLen]; if (prefsArr == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; } - LocalArray adoptedPrefs(prefsArr); + LocalArray adoptedPrefs(prefsArr); int32_t prefsLen = 0; // Call the selector @@ -450,7 +450,8 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, // Copy the resulting keys (if there was no error) keysOut.removeAllElements(); for (int32_t i = 0; i < prefsLen; i++) { - UnicodeString* k = message2::create(std::move(prefsArr[i]), status); + UnicodeString* k = + message2::create(std::move(keysArr[prefsArr[i]]), status); if (k == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 075b7989ccd9..13edabf92824 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -608,7 +608,7 @@ StandardFunctions::Number::pluralType(const FunctionOptions& opts) { void StandardFunctions::NumberValue::selectKeys(const UnicodeString* keys, int32_t keysLen, - UnicodeString* prefs, + int32_t* prefs, int32_t& prefsLen, UErrorCode& errorCode) { CHECK_ERROR(errorCode); @@ -658,7 +658,7 @@ void StandardFunctions::NumberValue::selectKeys(const UnicodeString* keys, // 5i(a). If key and exact consist of the same sequence of Unicode code points, then if (exact == keys[i]) { // 5i(a)(a) Append key as the last element of the list resultExact. - prefs[prefsLen] = keys[i]; + prefs[prefsLen] = i; prefsLen++; break; } @@ -679,7 +679,7 @@ void StandardFunctions::NumberValue::selectKeys(const UnicodeString* keys, // 5ii(a). If key and keyword consist of the same sequence of Unicode code points, then if (keyword == keys[i]) { // 5ii(a)(a) Append key as the last element of the list resultKeyword. - prefs[prefsLen] = keys[i]; + prefs[prefsLen] = i; prefsLen++; } } @@ -1078,7 +1078,7 @@ StandardFunctions::StringValue::StringValue(const FunctionContext& context, void StandardFunctions::StringValue::selectKeys(const UnicodeString* keys, int32_t keysLen, - UnicodeString* prefs, + int32_t* prefs, int32_t& prefsLen, UErrorCode& errorCode) { CHECK_ERROR(errorCode); @@ -1093,7 +1093,7 @@ void StandardFunctions::StringValue::selectKeys(const UnicodeString* keys, for (int32_t i = 0; i < keysLen; i++) { if (keys[i] == formattedString) { - prefs[0] = keys[i]; + prefs[0] = i; prefsLen = 1; break; } diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index 2ef20787357f..18c448b732b3 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -114,7 +114,7 @@ namespace message2 { UnicodeString formatToString(UErrorCode&) const override; void selectKeys(const UnicodeString* keys, int32_t keysLen, - UnicodeString* prefs, + int32_t* prefs, int32_t& prefsLen, UErrorCode& status) override; UBool isSelectable() const override { return true; } @@ -165,7 +165,7 @@ namespace message2 { UnicodeString formatToString(UErrorCode&) const override; void selectKeys(const UnicodeString* keys, int32_t keysLen, - UnicodeString* prefs, + int32_t* prefs, int32_t& prefsLen, UErrorCode& status) override; UBool isSelectable() const override { return true; } diff --git a/icu4c/source/i18n/unicode/messageformat2_function_registry.h b/icu4c/source/i18n/unicode/messageformat2_function_registry.h index 1ace3e8274af..89b71b329977 100644 --- a/icu4c/source/i18n/unicode/messageformat2_function_registry.h +++ b/icu4c/source/i18n/unicode/messageformat2_function_registry.h @@ -406,9 +406,11 @@ namespace message2 { * * @param keys An array of strings to compare to the input. * @param keysLen The length of `keys`. - * @param prefs An array of strings with length `keysLen`. The contents of + * @param prefs An array of indices into `keys`. + * The initial contents of * the array is undefined. `selectKey()` should set the contents - * of `prefs` to a subset of `keys`, with the best match placed at the lowest index. + * of `prefs` to a subset of the indices in `keys`, + * with the best match placed at the lowest index in `prefs`. * @param prefsLen A reference that `selectKey()` should set to the length of `prefs`, * which must be less than or equal to `keysLen`. * @param status Input/output error code. @@ -418,7 +420,7 @@ namespace message2 { */ virtual void selectKeys(const UnicodeString* keys, int32_t keysLen, - UnicodeString* prefs, + int32_t* prefs, int32_t& prefsLen, UErrorCode& status) { (void) keys; From 603bffab04de2c916f11c143b393ad7b6620fcbb Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Thu, 17 Oct 2024 10:38:01 -0700 Subject: [PATCH 33/37] Add comment about isSelectable() --- .../source/i18n/unicode/messageformat2_function_registry.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/icu4c/source/i18n/unicode/messageformat2_function_registry.h b/icu4c/source/i18n/unicode/messageformat2_function_registry.h index 89b71b329977..b8f37d9986ab 100644 --- a/icu4c/source/i18n/unicode/messageformat2_function_registry.h +++ b/icu4c/source/i18n/unicode/messageformat2_function_registry.h @@ -385,7 +385,11 @@ namespace message2 { * @internal ICU 77 technology preview * @deprecated This API is for technology preview only. */ - virtual UBool isSelectable() const { return false; } + virtual UBool isSelectable() const { + // In the future, this function could return a capability + // indicating whether this function can format, select, or both. + return false; + } /** * Returns true if this value represents a null operand, that is, * the absence of an argument. This method should not be overridden. From 740ca4f4cd6ae9b0b9ac93ec453bb8ab53d34aac Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Thu, 17 Oct 2024 11:10:23 -0700 Subject: [PATCH 34/37] Remove use of std::string --- icu4c/source/i18n/messageformat2.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index 5a95ad63457f..3f2c75c3733d 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -181,8 +181,10 @@ FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& opt localeToUse = locale; } else { UErrorCode localStatus = U_ZERO_ERROR; - std::string u8; - Locale l = Locale::forLanguageTag(localeStr.toUTF8String(u8), localStatus); + int32_t len = localeStr.length(); + LocalArray temp(new char[len + 1]); + localeStr.extract(0, len, temp.getAlias(), len); + Locale l = Locale::forLanguageTag(StringPiece(temp.getAlias(), len), localStatus); if (U_SUCCESS(localStatus)) { localeToUse = l; } else { From e15e62bf497d813145226831dd1823e995945e34 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Thu, 17 Oct 2024 11:39:53 -0700 Subject: [PATCH 35/37] Remove uses of new/delete --- icu4c/source/i18n/messageformat2.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index 3f2c75c3733d..498a67550168 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -182,9 +182,10 @@ FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& opt } else { UErrorCode localStatus = U_ZERO_ERROR; int32_t len = localeStr.length(); - LocalArray temp(new char[len + 1]); - localeStr.extract(0, len, temp.getAlias(), len); - Locale l = Locale::forLanguageTag(StringPiece(temp.getAlias(), len), localStatus); + char* buf = static_cast(uprv_malloc(len + 1)); + localeStr.extract(0, len, buf, len); + Locale l = Locale::forLanguageTag(StringPiece(buf, len), localStatus); + uprv_free(buf); if (U_SUCCESS(localStatus)) { localeToUse = l; } else { @@ -422,18 +423,18 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, LocalArray adoptedKeys(keysArr); // Create an array to hold the output - int32_t* prefsArr = new int32_t[keysLen]; + int32_t* prefsArr = static_cast(uprv_malloc(keysLen * sizeof(int32_t))); if (prefsArr == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; } - LocalArray adoptedPrefs(prefsArr); + int32_t prefsLen = 0; // Call the selector // Already checked for fallback, so it's safe to call takeValue() LocalPointer rvVal(rv.takeValue(status)); - rvVal->selectKeys(adoptedKeys.getAlias(), keysLen, adoptedPrefs.getAlias(), prefsLen, + rvVal->selectKeys(adoptedKeys.getAlias(), keysLen, prefsArr, prefsLen, status); // Update errors @@ -461,6 +462,8 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, keysOut.adoptElement(k, status); CHECK_ERROR(status); } + + uprv_free(prefsArr); } // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences From 5343da5a23617166168317c708da4f07b5a10c1a Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Wed, 23 Oct 2024 15:05:49 -0700 Subject: [PATCH 36/37] Implement call-by-need; tests pass --- icu4c/source/i18n/messageformat2.cpp | 182 ++++++++----- icu4c/source/i18n/messageformat2_allocation.h | 12 + .../source/i18n/messageformat2_evaluation.cpp | 239 +++++++++++++----- icu4c/source/i18n/messageformat2_evaluation.h | 148 ++++++++--- .../i18n/messageformat2_function_registry.cpp | 38 +-- ...essageformat2_function_registry_internal.h | 26 +- icu4c/source/i18n/unicode/messageformat2.h | 16 +- .../i18n/unicode/messageformat2_formattable.h | 54 ++-- .../messageformat2_function_registry.h | 26 +- .../source/test/intltest/messageformat2test.h | 60 +++-- .../intltest/messageformat2test_custom.cpp | 121 ++++++--- testdata/message2/icu-test-functions.json | 9 + 12 files changed, 651 insertions(+), 280 deletions(-) diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index 498a67550168..d98561fa461a 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -27,6 +27,25 @@ using namespace data_model; // ------------------------------------------------------ // Formatting +static UnicodeString varFallback(const VariableName& var) { + UnicodeString str(DOLLAR); + str += var; + return str; +} + +static UnicodeString functionFallback(const InternalValue& operand, + const FunctionName& functionName) { + UnicodeString fallbackStr; + // Create the fallback string for this function call + if (operand.isNullOperand()) { + fallbackStr = UnicodeString(COLON); + fallbackStr += functionName; + } else { + fallbackStr = operand.asFallback(); + } + return fallbackStr; +} + // Assumes that `var` is a message argument; returns the argument's value. [[nodiscard]] InternalValue MessageFormatter::evalArgument(const VariableName& var, MessageContext& context, @@ -63,35 +82,60 @@ using namespace data_model; return {}; } -[[nodiscard]] InternalValue MessageFormatter::evalOperand(const Environment& env, - const Operand& rand, - MessageContext& context, - UErrorCode &status) const { +// InternalValues are passed as references into a global environment object +// that is live for the duration of one formatter call. +// They are mutable references so that they can be updated with a new value +// (when a closure is overwritten with the result of evaluating it), +// which can be shared across different references to the corresponding MF2 +// variable. +[[nodiscard]] InternalValue& MessageFormatter::evalOperand(Environment& env, + const Operand& rand, + MessageContext& context, + UErrorCode &status) const { if (U_FAILURE(status)) { - return {}; + return env.bogus(); } // Three cases: absent operand; variable; or literal // Absent (null) operand if (rand.isNull()) { - return InternalValue::null(status); + return env.createNull(status); } // Variable reference if (rand.isVariable()) { // Check if it's local or global // Note: there is no name shadowing; this is enforced by the parser const VariableName& var = rand.asVariable(); - // Currently, this code implements lazy evaluation of locals. + + // This code implements lazy call-by-need evaluation of locals. // That is, the environment binds names to a closure, not a resolved value. // The spec does not require either eager or lazy evaluation. // Look up the variable in the environment if (env.has(var)) { - // `var` is a local -- look it up - const Closure& rhs = env.lookup(var); - // Evaluate the expression using the environment from the closure - return evalExpression(rhs.getEnv(), rhs.getExpr(), context, status); + // `var` is a local -- look it up + InternalValue& rhs = env.lookup(var); + // Evaluate the expression using the environment from the closure + if (!rhs.isEvaluated()) { + Closure& c = rhs.asClosure(); + InternalValue& result = evalExpression(c.getEnv(), + c.getExpr(), + context, + status); + // Overwrite the closure with the result of evaluation + if (result.isFallback()) { + rhs.update(result.asFallback()); + } else { + U_ASSERT(result.isEvaluated()); + // Create an indirection to the result returned + // by evalExpression() + rhs.update(result); + } + return rhs; + } + // If it's already evaluated, just return the value + return rhs; } // Variable wasn't found in locals -- check if it's global InternalValue result = evalArgument(var, context, status); @@ -101,22 +145,20 @@ using namespace data_model; context.getErrors().setUnresolvedVariable(var, status); // Use fallback per // https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution - UnicodeString str(DOLLAR); - str += var; - return InternalValue::fallback(str); + return env.createFallback(varFallback(var), status); } // Looking up the global variable succeeded; return it - return result; + return env.createUnnamed(std::move(result), status); } // Literal else { U_ASSERT(rand.isLiteral()); - return evalLiteral(rand.asLiteral(), status); + return env.createUnnamed(evalLiteral(rand.asLiteral(), status), status); } } // Resolves a function's options -FunctionOptions MessageFormatter::resolveOptions(const Environment& env, +FunctionOptions MessageFormatter::resolveOptions(Environment& env, const OptionMap& options, MessageContext& context, UErrorCode& status) const { @@ -136,15 +178,16 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const Operand& v = opt.getValue(); // ...evaluate its right-hand side... - InternalValue rhsVal = evalOperand(env, v, context, status); + InternalValue& rhsVal = evalOperand(env, v, context, status); + // ...giving a FunctionValue. + const FunctionValue* optVal = rhsVal.getValue(status); + // Ignore fallback values if (U_FAILURE(status)) { - return {}; + continue; } - // ...giving a FunctionValue. - FunctionValue* optVal = rhsVal.takeValue(status); // The option is resolved; add it to the vector - ResolvedFunctionOption resolvedOpt(k, optVal); + ResolvedFunctionOption resolvedOpt(k, *optVal); LocalPointer p(create(std::move(resolvedOpt), status)); EMPTY_ON_ERROR(status); @@ -201,33 +244,31 @@ FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& opt // Looks up `functionName` and applies it to an operand and options, // handling errors if the function is unbound -[[nodiscard]] InternalValue MessageFormatter::apply(const FunctionName& functionName, - InternalValue&& rand, - FunctionOptions&& options, - MessageContext& context, - UErrorCode& status) const { - EMPTY_ON_ERROR(status); - - UnicodeString fallbackStr; - - // Create the fallback string for this function call - if (rand.isNullOperand()) { - fallbackStr = UnicodeString(COLON); - fallbackStr += functionName; - } else { - fallbackStr = rand.asFallback(); +[[nodiscard]] InternalValue& MessageFormatter::apply(Environment& env, + const FunctionName& functionName, + InternalValue& rand, + FunctionOptions&& options, + MessageContext& context, + UErrorCode& status) const { + if (U_FAILURE(status)) { + return env.bogus(); } + // Create the fallback string to use in case of an error + // calling the function + UnicodeString fallbackStr = functionFallback(rand, functionName); + // Look up the function name Function* function = lookupFunction(functionName, status); if (U_FAILURE(status)) { // Function is unknown -- set error and use the fallback value status = U_ZERO_ERROR; context.getErrors().setUnknownFunction(functionName, status); - return InternalValue::fallback(fallbackStr); + return env.createFallback(fallbackStr, status); } - // Value is not a fallback, so we can safely call takeValue() - LocalPointer functionArg(rand.takeValue(status)); + // Value is not a fallback (checked by the caller), + // so we can safely call getValue() + const FunctionValue* functionArg(rand.getValue(status)); U_ASSERT(U_SUCCESS(status)); // Call the function LocalPointer @@ -240,32 +281,32 @@ FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& opt if (status == U_MF_OPERAND_MISMATCH_ERROR) { status = U_ZERO_ERROR; context.getErrors().setOperandMismatchError(functionName, status); - return InternalValue::fallback(fallbackStr); + return env.createFallback(fallbackStr, status); } if (status == U_MF_FORMATTING_ERROR) { status = U_ZERO_ERROR; context.getErrors().setFormattingError(functionName, status); - return InternalValue::fallback(fallbackStr); + return env.createFallback(fallbackStr, status); } if (U_FAILURE(status)) { - return {}; + return env.bogus(); } // Success; return the result - return InternalValue(functionResult.orphan(), fallbackStr); + return env.createUnnamed(InternalValue(functionResult.orphan(), fallbackStr), status); } // Evaluates an expression using `globalEnv` for the values of variables -[[nodiscard]] InternalValue MessageFormatter::evalExpression(const Environment& globalEnv, - const Expression& expr, - MessageContext& context, - UErrorCode &status) const { +[[nodiscard]] InternalValue& MessageFormatter::evalExpression(Environment& globalEnv, + const Expression& expr, + MessageContext& context, + UErrorCode &status) const { if (U_FAILURE(status)) { - return {}; + return globalEnv.bogus(); } const Operand& rand = expr.getOperand(); // Evaluate the operand (evalOperand handles the case of a null operand) - InternalValue randVal = evalOperand(globalEnv, rand, context, status); + InternalValue& randVal = evalOperand(globalEnv, rand, context, status); // If there's no function, we check for an implicit formatter if (!expr.isFunctionCall()) { @@ -289,8 +330,9 @@ FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& opt return randVal; } // ... apply the implicit formatter - return apply(functionName, - std::move(randVal), + return apply(globalEnv, + functionName, + randVal, FunctionOptions(), context, status); @@ -313,14 +355,14 @@ FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& opt FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status); // Call the function with the operand and arguments - return apply(functionName, - std::move(randVal), std::move(resolvedOptions), context, status); + return apply(globalEnv, functionName, + randVal, std::move(resolvedOptions), context, status); } } // Formats each text and expression part of a pattern, appending the results to `result` void MessageFormatter::formatPattern(MessageContext& context, - const Environment& globalEnv, + Environment& globalEnv, const Pattern& pat, UErrorCode &status, UnicodeString& result) const { CHECK_ERROR(status); @@ -333,14 +375,14 @@ void MessageFormatter::formatPattern(MessageContext& context, // Markup is ignored } else { // Format the expression - InternalValue partVal = evalExpression(globalEnv, part.contents(), context, status); + InternalValue& partVal = evalExpression(globalEnv, part.contents(), context, status); if (partVal.isFallback()) { result += LEFT_CURLY_BRACE; result += partVal.asFallback(); result += RIGHT_CURLY_BRACE; } else { // Do final formatting (e.g. formatting numbers as strings) - LocalPointer val(partVal.takeValue(status)); + const FunctionValue* val = partVal.getValue(status); // Shouldn't be null or a fallback U_ASSERT(U_SUCCESS(status)); result += val->formatToString(status); @@ -363,7 +405,7 @@ void MessageFormatter::formatPattern(MessageContext& context, // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-selectors // `res` is a vector of ResolvedSelectors -void MessageFormatter::resolveSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UVector& res) const { +void MessageFormatter::resolveSelectors(MessageContext& context, Environment& env, UErrorCode &status, UVector& res) const { CHECK_ERROR(status); U_ASSERT(!dataModel.hasPattern()); @@ -373,7 +415,7 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme // 2. For each expression exp of the message's selectors for (int32_t i = 0; i < dataModel.numSelectors(); i++) { // 2i. Let rv be the resolved value of exp. - InternalValue rv = evalExpression(env, selectors[i], context, status); + InternalValue& rv = evalExpression(env, selectors[i], context, status); if (rv.isSelectable()) { // 2ii. If selection is supported for rv: // (True if this code has been reached) @@ -432,8 +474,9 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, int32_t prefsLen = 0; // Call the selector - // Already checked for fallback, so it's safe to call takeValue() - LocalPointer rvVal(rv.takeValue(status)); + // Caller checked for fallback, so it's safe to call getValue() + const FunctionValue* rvVal = rv.getValue(status); + U_ASSERT(U_SUCCESS(status)); rvVal->selectKeys(adoptedKeys.getAlias(), keysLen, prefsArr, prefsLen, status); @@ -469,7 +512,10 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences // `res` is a vector of FormattedPlaceholders; // `pref` is a vector of vectors of strings -void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res, UVector& pref, UErrorCode &status) const { +void MessageFormatter::resolvePreferences(MessageContext& context, + UVector& res, + UVector& pref, + UErrorCode &status) const { CHECK_ERROR(status); // 1. Let pref be a new empty list of lists of strings. @@ -640,7 +686,10 @@ void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCo // 7. Select the pattern of `var` } -void MessageFormatter::formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const { +void MessageFormatter::formatSelectors(MessageContext& context, + Environment& env, + UErrorCode &status, + UnicodeString& result) const { CHECK_ERROR(status); // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection @@ -779,7 +828,12 @@ void MessageFormatter::checkDeclarations(MessageContext& context, Environment*& // memoizing the value of localEnv up to this point // Add the LHS to the environment for checking the next declaration - env = Environment::create(decl.getVariable(), Closure(rhs, *env), env, status); + const VariableName& lhs = decl.getVariable(); + env = Environment::create(lhs, + Closure::create(rhs, *env, status), + varFallback(lhs), + env, + status); CHECK_ERROR(status); } } diff --git a/icu4c/source/i18n/messageformat2_allocation.h b/icu4c/source/i18n/messageformat2_allocation.h index 7be27e222520..e375ceca439b 100644 --- a/icu4c/source/i18n/messageformat2_allocation.h +++ b/icu4c/source/i18n/messageformat2_allocation.h @@ -131,6 +131,18 @@ namespace message2 { return result; } + template + inline T* create(const T& node, UErrorCode& status) { + if (U_FAILURE(status)) { + return nullptr; + } + T* result = new T(node); + if (result == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return result; + } + } // namespace message2 U_NAMESPACE_END diff --git a/icu4c/source/i18n/messageformat2_evaluation.cpp b/icu4c/source/i18n/messageformat2_evaluation.cpp index 745f56bd2907..05abaf11c8a2 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.cpp +++ b/icu4c/source/i18n/messageformat2_evaluation.cpp @@ -64,15 +64,10 @@ ResolvedFunctionOption::ResolvedFunctionOption(ResolvedFunctionOption&& other) { } ResolvedFunctionOption::ResolvedFunctionOption(const UnicodeString& n, - FunctionValue* f) : name(n), value(f) { - U_ASSERT(f != nullptr); -} + const FunctionValue& f) : name(n), value(&f) {} ResolvedFunctionOption::~ResolvedFunctionOption() { - if (value != nullptr) { - delete value; - value = nullptr; - } + value = nullptr; // value is not owned } @@ -98,7 +93,7 @@ FunctionOptions::getFunctionOption(const UnicodeString& key, for (int32_t i = 0; i < functionOptionsLen; i++) { const ResolvedFunctionOption& opt = options[i]; if (opt.getName() == key) { - return opt.getValue(); + return &opt.getValue(); } } status = U_ILLEGAL_ARGUMENT_ERROR; @@ -128,16 +123,22 @@ UnicodeString FunctionOptions::getStringFunctionOption(const UnicodeString& key) return result; } -FunctionOptions& FunctionOptions::operator=(FunctionOptions&& other) noexcept { - functionOptionsLen = other.functionOptionsLen; - options = other.options; - other.functionOptionsLen = 0; - other.options = nullptr; +FunctionOptions& FunctionOptions::operator=(FunctionOptions other) noexcept { + swap(*this, other); return *this; } -FunctionOptions::FunctionOptions(FunctionOptions&& other) { - *this = std::move(other); +FunctionOptions::FunctionOptions(const FunctionOptions& other) { + U_ASSERT(!other.bogus); + functionOptionsLen = other.functionOptionsLen; + options = nullptr; + if (functionOptionsLen != 0) { + UErrorCode localStatus = U_ZERO_ERROR; + options = copyArray(other.options, functionOptionsLen, localStatus); + if (U_FAILURE(localStatus)) { + bogus = true; + } + } } FunctionOptions::~FunctionOptions() { @@ -158,19 +159,22 @@ static bool containsOption(const UVector& opts, const ResolvedFunctionOption& op } // Options in `this` take precedence -// `this` can't be used after mergeOptions is called -FunctionOptions FunctionOptions::mergeOptions(FunctionOptions&& other, - UErrorCode& status) { +FunctionOptions FunctionOptions::mergeOptions(const FunctionOptions& other, + UErrorCode& status) const { UVector mergedOptions(status); mergedOptions.setDeleter(uprv_deleteUObject); if (U_FAILURE(status)) { return {}; } + if (bogus || other.bogus) { + status = U_MEMORY_ALLOCATION_ERROR; + return {}; + } // Create a new vector consisting of the options from this `FunctionOptions` for (int32_t i = 0; i < functionOptionsLen; i++) { - mergedOptions.adoptElement(create(std::move(options[i]), status), + mergedOptions.adoptElement(create(options[i], status), status); } @@ -178,16 +182,12 @@ FunctionOptions FunctionOptions::mergeOptions(FunctionOptions&& other, for (int i = 0; i < other.functionOptionsLen; i++) { // Note: this is quadratic in the length of `options` if (!containsOption(mergedOptions, other.options[i])) { - mergedOptions.adoptElement(create(std::move(other.options[i]), - status), + mergedOptions.adoptElement(create(other.options[i], + status), status); } } - delete[] options; - options = nullptr; - functionOptionsLen = 0; - return FunctionOptions(std::move(mergedOptions), status); } @@ -198,12 +198,8 @@ FunctionOptions FunctionOptions::mergeOptions(FunctionOptions&& other, InternalValue::~InternalValue() {} InternalValue& InternalValue::operator=(InternalValue&& other) { - isFallbackValue = other.isFallbackValue; fallbackString = other.fallbackString; - if (!isFallbackValue) { - U_ASSERT(other.val.isValid()); - val.adoptInstead(other.val.orphan()); - } + val = std::move(other.val); return *this; } @@ -214,48 +210,109 @@ InternalValue::InternalValue(InternalValue&& other) { InternalValue::InternalValue(UErrorCode& errorCode) { CHECK_ERROR(errorCode); - NullValue* nv = new NullValue(); - if (nv == nullptr) { + LocalPointer nv(new NullValue()); + if (!nv.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; return; } - val.adoptInstead(nv); + val = std::move(nv); } InternalValue::InternalValue(FunctionValue* v, const UnicodeString& fb) - : fallbackString(fb), val(v) { + : fallbackString(fb) { U_ASSERT(v != nullptr); + val = LocalPointer(v); } -FunctionValue* InternalValue::takeValue(UErrorCode& status) { +const FunctionValue* InternalValue::getValue(UErrorCode& status) const { if (U_FAILURE(status)) { - return {}; + return nullptr; } - if (isFallback()) { + // If this is a closure or fallback, error out + if (!isEvaluated()) { status = U_ILLEGAL_ARGUMENT_ERROR; - return {}; + return nullptr; } - U_ASSERT(val.isValid()); - return val.orphan(); + // Follow the indirection to get the value + if (isIndirection()) { + const InternalValue* other = *std::get_if(&val); + U_ASSERT(other != nullptr); + return other->getValue(status); + } + // Otherwise, return the contained FunctionValue + const LocalPointer* result = std::get_if>(&val); + U_ASSERT(result->isValid()); + return (*result).getAlias(); } -const FunctionValue* InternalValue::getValue(UErrorCode& status) const { - if (U_FAILURE(status)) { - return {}; +bool InternalValue::isSelectable() const { + UErrorCode localStatus = U_ZERO_ERROR; + const FunctionValue* val = getValue(localStatus); + if (U_FAILURE(localStatus)) { + return false; } - if (isFallback()) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return {}; + return val->isSelectable(); +} + +/* static */ LocalPointer InternalValue::null(UErrorCode& status) { + if (U_SUCCESS(status)) { + InternalValue* result = new InternalValue(status); + if (U_SUCCESS(status)) { + return LocalPointer(result); + } } - U_ASSERT(val.isValid()); - return val.getAlias(); + return LocalPointer(); } -bool InternalValue::isSelectable() const { - if (isFallbackValue) { +/* static */ LocalPointer InternalValue::fallback(const UnicodeString& s, + UErrorCode& status) { + if (U_SUCCESS(status)) { + InternalValue* result = new InternalValue(s); + if (U_SUCCESS(status)) { + return LocalPointer(result); + } + } + return LocalPointer(); +} + +/* static */ InternalValue InternalValue::closure(Closure* c, const UnicodeString& fb) { + U_ASSERT(c != nullptr); + return InternalValue(c, fb); +} + +bool InternalValue::isClosure() const { + return std::holds_alternative>(val); +} + +bool InternalValue::isEvaluated() const { + return std::holds_alternative>(val) || isIndirection(); +} + +bool InternalValue::isIndirection() const { + return std::holds_alternative(val); +} + +bool InternalValue::isNullOperand() const { + UErrorCode localStatus = U_ZERO_ERROR; + const FunctionValue* val = getValue(localStatus); + if (U_FAILURE(localStatus)) { return false; } - return val->isSelectable(); + return val->isNullOperand(); +} + +void InternalValue::update(InternalValue& newVal) { + fallbackString = newVal.fallbackString; + val = &newVal; +} + +void InternalValue::update(LocalPointer newVal) { + val = std::move(newVal); +} + +void InternalValue::update(const UnicodeString& fb) { + fallbackString = fb; + val = fb; } // PrioritizedVariant @@ -272,9 +329,11 @@ PrioritizedVariant::~PrioritizedVariant() {} // ---------------- Environments and closures - Environment* Environment::create(const VariableName& var, Closure&& c, Environment* parent, UErrorCode& errorCode) { + Environment* Environment::create(const VariableName& var, Closure* c, + const UnicodeString& fallbackStr, + Environment* parent, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - Environment* result = new NonEmptyEnvironment(var, std::move(c), parent); + Environment* result = new NonEmptyEnvironment(var, InternalValue::closure(c, fallbackStr), parent); if (result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; return nullptr; @@ -284,21 +343,20 @@ PrioritizedVariant::~PrioritizedVariant() {} Environment* Environment::create(UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - Environment* result = new EmptyEnvironment(); - if (result == nullptr) { + Environment* result = new EmptyEnvironment(errorCode); + if (U_SUCCESS(errorCode) && result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; return nullptr; } return result; } - const Closure& EmptyEnvironment::lookup(const VariableName& v) const { - (void) v; + InternalValue& EmptyEnvironment::lookup(const VariableName&) { U_ASSERT(false); UPRV_UNREACHABLE_EXIT; } - const Closure& NonEmptyEnvironment::lookup(const VariableName& v) const { + InternalValue& NonEmptyEnvironment::lookup(const VariableName& v) { if (v == var) { return rhs; } @@ -317,10 +375,75 @@ PrioritizedVariant::~PrioritizedVariant() {} return parent->has(v); } + InternalValue& EmptyEnvironment::createNull(UErrorCode& status) { + if (U_FAILURE(status)) { + return bogus(); + } + LocalPointer val(InternalValue::null(status)); + return addUnnamedValue(std::move(val), status); + } + + InternalValue& EmptyEnvironment::createFallback(const UnicodeString& s, UErrorCode& status) { + if (U_FAILURE(status)) { + return bogus(); + } + LocalPointer val(InternalValue::fallback(s, status)); + return addUnnamedValue(std::move(val), status); + } + + InternalValue& EmptyEnvironment::createUnnamed(InternalValue&& v, UErrorCode& status) { + if (U_FAILURE(status)) { + return bogus(); + } + LocalPointer val(new InternalValue(std::move(v))); + if (!val.isValid()) { + return bogus(); + } + return addUnnamedValue(std::move(val), status); + } + + InternalValue& NonEmptyEnvironment::createNull(UErrorCode& status) { + return parent->createNull(status); + } + + InternalValue& NonEmptyEnvironment::createFallback(const UnicodeString& s, UErrorCode& status) { + return parent->createFallback(s, status); + } + + InternalValue& NonEmptyEnvironment::createUnnamed(InternalValue&& v, UErrorCode& status) { + return parent->createUnnamed(std::move(v), status); + } + + InternalValue& EmptyEnvironment::addUnnamedValue(LocalPointer val, + UErrorCode& status) { + if (U_FAILURE(status)) { + return bogus(); + } + U_ASSERT(val.isValid()); + InternalValue* v = val.orphan(); + unnamedValues.adoptElement(v, status); + return *v; + } + + EmptyEnvironment::EmptyEnvironment(UErrorCode& status) : unnamedValues(UVector(status)) { + unnamedValues.setDeleter(uprv_deleteUObject); + } + Environment::~Environment() {} NonEmptyEnvironment::~NonEmptyEnvironment() {} EmptyEnvironment::~EmptyEnvironment() {} + /* static */ Closure* Closure::create(const Expression& expr, Environment& env, + UErrorCode& status) { + NULL_ON_ERROR(status); + + Closure* result = new Closure(expr, env); + if (result == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return result; + } + Closure::~Closure() {} // MessageContext methods diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index 6760b31c5d39..b3cae07c7b71 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -32,36 +32,86 @@ namespace message2 { using namespace data_model; + class Closure; + class Environment; + // InternalValue represents an intermediate value in the message // formatter. - // It can be either a FunctionValue or a "fallback value". A fallback value + // It has four possible states: + // 1. Fallback Value. A fallback value // is a string that serves as a replacement for expressions whose evaluation // caused an error. Fallback values are not passed to functions. + // 2. Closure, representing the unevaluated right-hand side of a declaration. + // 3. Evaluated Value (FunctionValue), representing an evaluated declaration. + // 4. Indirection (const InternalValue*), representing a shared reference to another + // InternalValue. Note that all InternalValues are owned by the global + // environment. + /* + Example: + + .local $x = {$y} + .local $z = {1 :number} + .local $a = {$z} + {{ {$x} {$z} {$a} }} + + If this message is formatted with no arguments, + initially, x, z and a are all bound to Closures. + When the value of x is demanded by the pattern, the contents of x's value + are updated to a Fallback Value (because its RHS contains an unbound variable). + When the value of z is demanded, the contents of z's value are updated to + an Evaluated Value representing the result of :number on the operand. + When the value of a is demanded, the contents of a's value are updated to + an Indirection, pointing to z's value. + + Indirections are used so that a FunctionValue can be uniquely owned by an + InternalValue. Since all InternalValues are owned by the global Environment, + it's safe to use these non-owned pointers. + */ class InternalValue : public UObject { public: - bool isFallback() const { return isFallbackValue; } - bool isNullOperand() const { return isFallback() ? false : val->isNullOperand(); } + bool isFallback() const { return std::holds_alternative(val); } + bool isNullOperand() const; + bool isEvaluated() const; + bool isClosure() const; bool isSelectable() const; - InternalValue() : isFallbackValue(true), fallbackString("") {} - static InternalValue null(UErrorCode& status) { return InternalValue(status); } - static InternalValue fallback(const UnicodeString& s) { return InternalValue(s); } - explicit InternalValue(FunctionValue* v, const UnicodeString& fb); - // Error code is set if this is a fallback - FunctionValue* takeValue(UErrorCode& status); + + Closure& asClosure() { + U_ASSERT(isClosure()); + return **std::get_if>(&val); + } const FunctionValue* getValue(UErrorCode& status) const; UnicodeString asFallback() const { return fallbackString; } - virtual ~InternalValue(); + + static LocalPointer null(UErrorCode& status); + static LocalPointer fallback(const UnicodeString& s, UErrorCode& status); + // Adopts `c` + static InternalValue closure(Closure* c, const UnicodeString& s); + + // Updates the mutable contents of this InternalValue + void update(InternalValue&); + void update(LocalPointer); + void update(const UnicodeString&); + + InternalValue() : val(UnicodeString()) {} + explicit InternalValue(FunctionValue* v, const UnicodeString& fb); InternalValue& operator=(InternalValue&&); InternalValue(InternalValue&&); + virtual ~InternalValue(); private: - bool isFallbackValue = false; UnicodeString fallbackString; - LocalPointer val; + std::variant, // Unevaluated thunk + LocalPointer, // Evaluated value + const InternalValue*> val; // Indirection to another value -- Not owned // Null operand constructor explicit InternalValue(UErrorCode& status); // Fallback constructor explicit InternalValue(const UnicodeString& fb) - : isFallbackValue(true), fallbackString(fb) {} + : fallbackString(fb), val(fb) {} + // Closure (unevaluated) constructor + explicit InternalValue(Closure* c, UnicodeString fallbackStr) + : fallbackString(fallbackStr), val(LocalPointer(c)) {} + bool isIndirection() const; }; // class InternalValue @@ -132,66 +182,106 @@ namespace message2 { const Expression& getExpr() const { return expr; } - const Environment& getEnv() const { + Environment& getEnv() const { return env; } - Closure(const Expression& expression, const Environment& environment) : expr(expression), env(environment) {} Closure(Closure&&) = default; + static Closure* create(const Expression&, Environment&, UErrorCode&); virtual ~Closure(); private: + Closure(const Expression& expression, Environment& environment) : expr(expression), env(environment) {} + // An unevaluated expression const Expression& expr; // The environment mapping names used in this // expression to other expressions - const Environment& env; + Environment& env; }; + class NonEmptyEnvironment; + // An environment is represented as a linked chain of // non-empty environments, terminating at an empty environment. // It's searched using linear search. class Environment : public UMemory { - public: - virtual bool has(const VariableName&) const = 0; - virtual const Closure& lookup(const VariableName&) const = 0; - static Environment* create(UErrorCode&); - static Environment* create(const VariableName&, Closure&&, Environment*, UErrorCode&); - virtual ~Environment(); + public: + virtual bool has(const VariableName&) const = 0; + virtual InternalValue& lookup(const VariableName&) = 0; + virtual InternalValue& bogus() = 0; + // For convenience so that InternalValue::getValue() can return a reference + // in error cases + FunctionValue& bogusFunctionValue() { return bogusFunctionVal; } + virtual InternalValue& createFallback(const UnicodeString&, UErrorCode&) = 0; + virtual InternalValue& createNull(UErrorCode&) = 0; + virtual InternalValue& createUnnamed(InternalValue&&, UErrorCode&) = 0; + static Environment* create(UErrorCode&); + static Environment* create(const VariableName&, Closure*, const UnicodeString&, + Environment*, UErrorCode&); + virtual ~Environment(); + + private: + FunctionValue bogusFunctionVal; }; - class NonEmptyEnvironment; + // The empty environment includes a "bogus" value to use when an + // InternalValue& is needed (e.g. error conditions), + // and a vector of "unnamed" values, so that the environment can + // own all InternalValues (even those arising from expressions + // that appear directly in a pattern and are not named). class EmptyEnvironment : public Environment { public: - EmptyEnvironment() = default; + EmptyEnvironment(UErrorCode& status); virtual ~EmptyEnvironment(); private: friend class Environment; bool has(const VariableName&) const override; - const Closure& lookup(const VariableName&) const override; + InternalValue& lookup(const VariableName&) override; + InternalValue& bogus() override { return bogusValue; } static EmptyEnvironment* create(UErrorCode&); - static NonEmptyEnvironment* create(const VariableName&, Closure&&, Environment*, UErrorCode&); + static NonEmptyEnvironment* create(const VariableName&, InternalValue, + Environment*, UErrorCode&); + + // Creates a fallback value owned by this Environment + InternalValue& createFallback(const UnicodeString&, UErrorCode&) override; + // Creates a null operand owned by this Environment + InternalValue& createNull(UErrorCode&) override; + // Creates an arbitrary value owned by this Environment + InternalValue& createUnnamed(InternalValue&&, UErrorCode&) override; + + InternalValue& addUnnamedValue(LocalPointer, UErrorCode&); + + InternalValue bogusValue; // Used in place of `nullptr` in error conditions + UVector unnamedValues; }; class NonEmptyEnvironment : public Environment { + public: + InternalValue* update(const VariableName&, InternalValue&&); private: friend class Environment; bool has(const VariableName&) const override; - const Closure& lookup(const VariableName&) const override; + InternalValue& lookup(const VariableName&) override; + InternalValue& bogus() override { return parent->bogus(); } static NonEmptyEnvironment* create(const VariableName&, Closure&&, const Environment*, UErrorCode&); virtual ~NonEmptyEnvironment(); private: friend class Environment; - NonEmptyEnvironment(const VariableName& v, Closure&& c, Environment* e) : var(v), rhs(std::move(c)), parent(e) {} + NonEmptyEnvironment(const VariableName& v, InternalValue c, Environment* e) : var(v), rhs(std::move(c)), parent(e) {} + + InternalValue& createFallback(const UnicodeString&, UErrorCode&) override; + InternalValue& createNull(UErrorCode&) override; + InternalValue& createUnnamed(InternalValue&&, UErrorCode&) override; // Maps VariableName onto Closure* // Chain of linked environments VariableName var; - Closure rhs; + InternalValue rhs; const LocalPointer parent; }; diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 13edabf92824..65bd1406dce7 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -249,14 +249,14 @@ StandardFunctions::Number::create(bool isInteger, UErrorCode& success) { } LocalPointer StandardFunctions::Number::call(const FunctionContext& context, - FunctionValue& operand, - FunctionOptions&& options, - UErrorCode& errorCode) { + const FunctionValue& operand, + const FunctionOptions& options, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return LocalPointer(); } LocalPointer - val(new NumberValue(*this, context, operand, std::move(options), errorCode)); + val(new NumberValue(*this, context, operand, options, errorCode)); if (!val.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } @@ -527,8 +527,8 @@ bool StandardFunctions::Number::usePercent(const FunctionOptions& opts) const { StandardFunctions::NumberValue::NumberValue(const Number& parent, const FunctionContext& context, - FunctionValue& arg, - FunctionOptions&& options, + const FunctionValue& arg, + const FunctionOptions& options, UErrorCode& errorCode) { CHECK_ERROR(errorCode); // Must have an argument @@ -610,7 +610,7 @@ void StandardFunctions::NumberValue::selectKeys(const UnicodeString* keys, int32_t keysLen, int32_t* prefs, int32_t& prefsLen, - UErrorCode& errorCode) { + UErrorCode& errorCode) const { CHECK_ERROR(errorCode); Number::PluralType type = Number::pluralType(opts); @@ -723,14 +723,14 @@ StandardFunctions::DateTime::create(DateTime::DateTimeType type, LocalPointer StandardFunctions::DateTime::call(const FunctionContext& context, - FunctionValue& val, - FunctionOptions&& opts, + const FunctionValue& val, + const FunctionOptions& opts, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return LocalPointer(); } LocalPointer - result(new DateTimeValue(type, context, val, std::move(opts), errorCode)); + result(new DateTimeValue(type, context, val, opts, errorCode)); if (!result.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } @@ -771,8 +771,8 @@ UnicodeString StandardFunctions::DateTimeValue::formatToString(UErrorCode& statu StandardFunctions::DateTimeValue::DateTimeValue(DateTime::DateTimeType type, const FunctionContext& context, - FunctionValue& val, - FunctionOptions&& options, + const FunctionValue& val, + const FunctionOptions& options, UErrorCode& errorCode) { CHECK_ERROR(errorCode); @@ -1045,14 +1045,14 @@ extern UnicodeString formattableToString(const Locale&, LocalPointer StandardFunctions::String::call(const FunctionContext& context, - FunctionValue& val, - FunctionOptions&& opts, + const FunctionValue& val, + const FunctionOptions& opts, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return LocalPointer(); } LocalPointer - result(new StringValue(context, val, std::move(opts), errorCode)); + result(new StringValue(context, val, opts, errorCode)); if (!result.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } @@ -1066,12 +1066,12 @@ UnicodeString StandardFunctions::StringValue::formatToString(UErrorCode& errorCo } StandardFunctions::StringValue::StringValue(const FunctionContext& context, - FunctionValue& val, - FunctionOptions&& options, + const FunctionValue& val, + const FunctionOptions&, UErrorCode& status) { CHECK_ERROR(status); operand = val.getOperand(); - opts = std::move(options); // No options + // No options // Convert to string formattedString = formattableToString(context.getLocale(), context.getDirection(), operand, status); } @@ -1080,7 +1080,7 @@ void StandardFunctions::StringValue::selectKeys(const UnicodeString* keys, int32_t keysLen, int32_t* prefs, int32_t& prefsLen, - UErrorCode& errorCode) { + UErrorCode& errorCode) const { CHECK_ERROR(errorCode); // Just compares the key and value as strings diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index 18c448b732b3..c99bc91340de 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -41,9 +41,9 @@ namespace message2 { static DateTime* dateTime(UErrorCode&); LocalPointer call(const FunctionContext& context, - FunctionValue& operand, - FunctionOptions&& options, - UErrorCode& errorCode) override; + const FunctionValue& operand, + const FunctionOptions& options, + UErrorCode& errorCode) override; virtual ~DateTime(); private: @@ -70,8 +70,8 @@ namespace message2 { static Number* number( UErrorCode& success); LocalPointer call(const FunctionContext& context, - FunctionValue& operand, - FunctionOptions&& options, + const FunctionValue& operand, + const FunctionOptions& options, UErrorCode& errorCode) override; virtual ~Number(); @@ -116,7 +116,7 @@ namespace message2 { int32_t keysLen, int32_t* prefs, int32_t& prefsLen, - UErrorCode& status) override; + UErrorCode& status) const override; UBool isSelectable() const override { return true; } NumberValue(); virtual ~NumberValue(); @@ -127,8 +127,8 @@ namespace message2 { number::FormattedNumber formattedNumber; NumberValue(const Number&, const FunctionContext&, - FunctionValue&, - FunctionOptions&&, + const FunctionValue&, + const FunctionOptions&, UErrorCode&); }; // class NumberValue @@ -142,14 +142,14 @@ namespace message2 { UnicodeString formattedDate; DateTimeValue(DateTime::DateTimeType type, const FunctionContext& context, - FunctionValue&, FunctionOptions&&, UErrorCode&); + const FunctionValue&, const FunctionOptions&, UErrorCode&); }; // class DateTimeValue class String : public Function { public: LocalPointer call(const FunctionContext& context, - FunctionValue& val, - FunctionOptions&& opts, + const FunctionValue& val, + const FunctionOptions& opts, UErrorCode& errorCode) override; static String* string(UErrorCode& status); virtual ~String(); @@ -167,14 +167,14 @@ namespace message2 { int32_t keysLen, int32_t* prefs, int32_t& prefsLen, - UErrorCode& status) override; + UErrorCode& status) const override; UBool isSelectable() const override { return true; } virtual ~StringValue(); private: friend class String; UnicodeString formattedString; - StringValue(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&); + StringValue(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&); }; // class StringValue }; diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index a725a37898df..a4f223029f1e 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -338,7 +338,7 @@ namespace message2 { // Selection methods // Takes a vector of FormattedPlaceholders - void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const; + void resolveSelectors(MessageContext&, Environment& env, UErrorCode&, UVector&) const; // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output) void filterVariants(const UVector&, UVector&, UErrorCode&) const; // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output) @@ -351,15 +351,15 @@ namespace message2 { // Formatting methods [[nodiscard]] InternalValue evalLiteral(const data_model::Literal&, UErrorCode&) const; - void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; + void formatPattern(MessageContext&, Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; FunctionContext makeFunctionContext(const FunctionOptions&) const; - [[nodiscard]] InternalValue apply(const FunctionName&, InternalValue&&, FunctionOptions&&, - MessageContext&, UErrorCode&) const; - [[nodiscard]] InternalValue evalExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const; - [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const; - [[nodiscard]] InternalValue evalOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const; + [[nodiscard]] InternalValue& apply(Environment&, const FunctionName&, InternalValue&, FunctionOptions&&, + MessageContext&, UErrorCode&) const; + [[nodiscard]] InternalValue& evalExpression(Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const; + [[nodiscard]] FunctionOptions resolveOptions(Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const; + [[nodiscard]] InternalValue& evalOperand(Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const; [[nodiscard]] InternalValue evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const; - void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const; + void formatSelectors(MessageContext& context, Environment& env, UErrorCode &status, UnicodeString& result) const; // Function registry methods bool hasCustomMFFunctionRegistry() const { diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index dcd9f5535f51..0298040f2f3b 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -439,7 +439,7 @@ namespace message2 { * a single named function option. It pairs the given name with the `Formattable` * value resulting from evaluating the option's value. * - * `ResolvedFunctionOption` is immutable and movable. It is not copyable. + * `ResolvedFunctionOption` is immutable, movable, and copyable. * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. @@ -450,25 +450,19 @@ class U_I18N_API ResolvedFunctionOption : public UObject { private: /* const */ UnicodeString name; - // This is a pointer because FunctionValue is an abstract class, - // and is a raw pointer because FunctionValue is forward-declared - /* const */ FunctionValue* value; + // owned by the global environment + const FunctionValue* value; public: const UnicodeString& getName() const { return name; } - FunctionValue* getValue() const { return value; } + const FunctionValue& getValue() const { return *value; } // Adopts `f` - ResolvedFunctionOption(const UnicodeString& n, FunctionValue* f); + ResolvedFunctionOption(const UnicodeString& n, const FunctionValue& f); ResolvedFunctionOption() {} ResolvedFunctionOption(ResolvedFunctionOption&&); - ResolvedFunctionOption& operator=(ResolvedFunctionOption&& other) noexcept { - name = std::move(other.name); - value = std::move(other.value); - other.value = nullptr; - return *this; - } - ResolvedFunctionOption& operator=(const ResolvedFunctionOption& other) = delete; - ResolvedFunctionOption(const ResolvedFunctionOption&) = delete; + ResolvedFunctionOption& operator=(ResolvedFunctionOption&& other) = default; + ResolvedFunctionOption& operator=(const ResolvedFunctionOption& other) = default; + ResolvedFunctionOption(const ResolvedFunctionOption&) = default; virtual ~ResolvedFunctionOption(); }; // class ResolvedFunctionOption #endif @@ -485,7 +479,7 @@ using FunctionOptionsMap = std::map call(const FunctionContext& context, - FunctionValue& operand, - FunctionOptions&& options, + const FunctionValue& operand, + const FunctionOptions& options, UErrorCode& status) = 0; /** * Destructor. @@ -321,6 +319,12 @@ namespace message2 { * Adding a new custom function requires adding a new class that * implements this interface. * + * FunctionValues are assumed to be immutable (the call() method on + * Function takes a const FunctionValue&, and the formatToString() + * and selectKeys() methods are const.) Feedback on whether internal + * mutable state in classes implementing FunctionValue is welcomed + * during the Technology Preview period. + * * @internal ICU 77 technology preview * @deprecated This API is for technology preview only. */ @@ -354,18 +358,6 @@ namespace message2 { * @deprecated This API is for technology preview only. */ virtual const Formattable& getOperand() const { return operand; } - /** - * Returns the resolved options that were used to construct this value. - * `this` may not be used after calling this method. This overload - * is provided so that mergeOptions(), which passes its `this` argument - * by move, can be called. - * - * @return The resolved options for this value. - * - * @internal ICU 77 technology preview - * @deprecated This API is for technology preview only. - */ - virtual FunctionOptions getResolvedOptions() { return std::move(opts); } /** * Returns a reference to the resolved options for this value. * @@ -426,7 +418,7 @@ namespace message2 { int32_t keysLen, int32_t* prefs, int32_t& prefsLen, - UErrorCode& status) { + UErrorCode& status) const { (void) keys; (void) keysLen; (void) prefs; diff --git a/icu4c/source/test/intltest/messageformat2test.h b/icu4c/source/test/intltest/messageformat2test.h index 83f901c36283..d4f6b2dce2e9 100644 --- a/icu4c/source/test/intltest/messageformat2test.h +++ b/icu4c/source/test/intltest/messageformat2test.h @@ -63,6 +63,7 @@ class TestMessageFormat2: public IntlTest { void testListFormatter(IcuTestErrorCode&); void testMessageRefFormatter(IcuTestErrorCode&); void testComplexOptions(IcuTestErrorCode&); + void testSingleEvaluation(IcuTestErrorCode&); // Feature tests void testEmptyMessage(message2::TestCase::Builder&, IcuTestErrorCode&); @@ -114,7 +115,10 @@ class Person : public FormattableObject { class PersonNameFunction : public Function { public: - LocalPointer call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; + LocalPointer call(const FunctionContext&, + const FunctionValue&, + const FunctionOptions&, + UErrorCode&) override; virtual ~PersonNameFunction(); PersonNameFunction() {} }; @@ -128,7 +132,7 @@ class PersonNameValue : public FunctionValue { friend class PersonNameFunction; UnicodeString formattedString; - PersonNameValue(FunctionValue&, FunctionOptions&&, UErrorCode&); + PersonNameValue(const FunctionValue&, const FunctionOptions&, UErrorCode&); }; // class PersonNameValue class FormattableProperties : public FormattableObject { @@ -145,7 +149,7 @@ class FormattableProperties : public FormattableObject { class GrammarCasesFunction : public Function { public: - LocalPointer call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; static MFFunctionRegistry customRegistry(UErrorCode&); }; @@ -158,13 +162,13 @@ class GrammarCasesValue : public FunctionValue { friend class GrammarCasesFunction; UnicodeString formattedString; - GrammarCasesValue(FunctionValue&, FunctionOptions&&, UErrorCode&); + GrammarCasesValue(const FunctionValue&, const FunctionOptions&, UErrorCode&); void getDativeAndGenitive(const UnicodeString&, UnicodeString& result) const; }; // class GrammarCasesValue class ListFunction : public Function { public: - LocalPointer call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; static MFFunctionRegistry customRegistry(UErrorCode&); ListFunction() {} virtual ~ListFunction(); @@ -179,8 +183,8 @@ class ListValue : public FunctionValue { UnicodeString formattedString; ListValue(const Locale&, - FunctionValue&, - FunctionOptions&&, + const FunctionValue&, + const FunctionOptions&, UErrorCode&); }; // class ListValue @@ -193,8 +197,8 @@ class NounValue : public FunctionValue { friend class NounFunction; UnicodeString formattedString; - NounValue(FunctionValue&, - FunctionOptions&&, + NounValue(const FunctionValue&, + const FunctionOptions&, UErrorCode&); }; // class NounValue @@ -207,15 +211,15 @@ class AdjectiveValue : public FunctionValue { friend class AdjectiveFunction; UnicodeString formattedString; - AdjectiveValue(FunctionValue&, - FunctionOptions&&, + AdjectiveValue(const FunctionValue&, + const FunctionOptions&, UErrorCode&); }; // class AdjectiveValue class ResourceManager : public Function { public: - LocalPointer call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; static MFFunctionRegistry customRegistry(UErrorCode&); static Hashtable* properties(UErrorCode&); static UnicodeString propertiesAsString(const Hashtable&); @@ -233,25 +237,49 @@ class ResourceManagerValue : public FunctionValue { friend class ResourceManager; UnicodeString formattedString; - ResourceManagerValue(FunctionValue&, - FunctionOptions&&, + ResourceManagerValue(const FunctionValue&, + const FunctionOptions&, UErrorCode&); }; // class ResourceManagerValue class NounFunction : public Function { public: - LocalPointer call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; NounFunction() { } virtual ~NounFunction(); }; class AdjectiveFunction : public Function { public: - LocalPointer call(const FunctionContext&, FunctionValue&, FunctionOptions&&, UErrorCode&) override; + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; AdjectiveFunction() { } virtual ~AdjectiveFunction(); }; +class CounterFunction : public Function { + public: + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; + CounterFunction() { } + virtual ~CounterFunction(); + private: + int32_t count = 0; // Number of times the function was called +}; + +class CounterFunctionValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const override; + CounterFunctionValue(); + virtual ~CounterFunctionValue(); + private: + friend class CounterFunction; + int32_t& count; + + CounterFunctionValue(int32_t&, + const FunctionValue&, + const FunctionOptions&, + UErrorCode&); +}; // class ResourceManagerValue + } // namespace message2 U_NAMESPACE_END diff --git a/icu4c/source/test/intltest/messageformat2test_custom.cpp b/icu4c/source/test/intltest/messageformat2test_custom.cpp index 75a1af019ea3..1d031560e231 100644 --- a/icu4c/source/test/intltest/messageformat2test_custom.cpp +++ b/icu4c/source/test/intltest/messageformat2test_custom.cpp @@ -9,6 +9,7 @@ #include "plurrule_impl.h" #include "unicode/listformatter.h" +#include "unicode/numberformatter.h" #include "messageformat2test.h" #include "hash.h" #include "intltest.h" @@ -242,6 +243,7 @@ void TestMessageFormat2::testCustomFunctions() { testListFormatter(errorCode); testMessageRefFormatter(errorCode); testComplexOptions(errorCode); + testSingleEvaluation(errorCode); } @@ -274,8 +276,8 @@ static bool hasStringOption(const FunctionOptionsMap& opt, } LocalPointer PersonNameFunction::call(const FunctionContext& context, - FunctionValue& arg, - FunctionOptions&& opts, + const FunctionValue& arg, + const FunctionOptions& opts, UErrorCode& errorCode) { (void) context; @@ -294,14 +296,14 @@ UnicodeString PersonNameValue::formatToString(UErrorCode& status) const { return formattedString; } -PersonNameValue::PersonNameValue(FunctionValue& arg, - FunctionOptions&& options, +PersonNameValue::PersonNameValue(const FunctionValue& arg, + const FunctionOptions& options, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; } operand = arg.getOperand(); - opts = std::move(options); // Tests don't cover composition, so no need to merge options + opts = options; const Formattable* toFormat = &operand; if (U_FAILURE(errorCode)) { @@ -393,8 +395,8 @@ PersonNameValue::~PersonNameValue() {} LocalPointer GrammarCasesFunction::call(const FunctionContext& context, - FunctionValue& arg, - FunctionOptions&& opts, + const FunctionValue& arg, + const FunctionOptions& opts, UErrorCode& errorCode) { (void) context; @@ -414,15 +416,15 @@ UnicodeString GrammarCasesValue::formatToString(UErrorCode& status) const { return formattedString; } -GrammarCasesValue::GrammarCasesValue(FunctionValue& val, - FunctionOptions&& options, +GrammarCasesValue::GrammarCasesValue(const FunctionValue& val, + const FunctionOptions& opts, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; } operand = val.getOperand(); - opts = std::move(options); // Tests don't cover composition, so no need to merge options + // Tests don't cover composition, so no need to merge options const Formattable* toFormat = &operand; UnicodeString result; @@ -522,8 +524,8 @@ GrammarCasesValue::~GrammarCasesValue() {} LocalPointer ListFunction::call(const FunctionContext& context, - FunctionValue& arg, - FunctionOptions&& opts, + const FunctionValue& arg, + const FunctionOptions& opts, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return LocalPointer(); @@ -544,15 +546,15 @@ UnicodeString ListValue::formatToString(UErrorCode& errorCode) const { } message2::ListValue::ListValue(const Locale& locale, - FunctionValue& val, - FunctionOptions&& options, + const FunctionValue& val, + const FunctionOptions& opts, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; } operand = val.getOperand(); - opts = std::move(options); // Tests don't cover composition, so no need to merge options + // Tests don't cover composition, so no need to merge options const Formattable* toFormat = &operand; if (U_FAILURE(errorCode)) { @@ -697,8 +699,8 @@ static Arguments localToGlobal(const FunctionOptionsMap& opts, UErrorCode& statu LocalPointer ResourceManager::call(const FunctionContext&, - FunctionValue& arg, - FunctionOptions&& options, + const FunctionValue& arg, + const FunctionOptions& options, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return LocalPointer(); @@ -717,15 +719,15 @@ UnicodeString message2::ResourceManagerValue::formatToString(UErrorCode&) const return formattedString; } -message2::ResourceManagerValue::ResourceManagerValue(FunctionValue& arg, - FunctionOptions&& options, +message2::ResourceManagerValue::ResourceManagerValue(const FunctionValue& arg, + const FunctionOptions& opts, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; } operand = arg.getOperand(); - opts = std::move(options); // Tests don't cover composition, so no need to merge options + // Tests don't cover composition, so no need to merge options const Formattable* toFormat = &operand; // Check for null or fallback @@ -863,8 +865,8 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { LocalPointer NounFunction::call(const FunctionContext&, - FunctionValue& arg, - FunctionOptions&& opts, + const FunctionValue& arg, + const FunctionOptions& opts, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return LocalPointer(); @@ -884,15 +886,15 @@ UnicodeString NounValue::formatToString(UErrorCode& status) const { return formattedString; } -NounValue::NounValue(FunctionValue& arg, - FunctionOptions&& options, +NounValue::NounValue(const FunctionValue& arg, + const FunctionOptions& options, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; } operand = arg.getOperand(); - opts = std::move(options); + opts = options.mergeOptions(arg.getResolvedOptions(), errorCode); const Formattable* toFormat = &operand; FunctionOptionsMap opt = opts.getOptions(); @@ -923,8 +925,8 @@ NounValue::NounValue(FunctionValue& arg, LocalPointer AdjectiveFunction::call(const FunctionContext&, - FunctionValue& arg, - FunctionOptions&& opts, + const FunctionValue& arg, + const FunctionOptions& opts, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return LocalPointer(); @@ -944,15 +946,15 @@ UnicodeString AdjectiveValue::formatToString(UErrorCode& status) const { return formattedString; } -AdjectiveValue::AdjectiveValue(FunctionValue& arg, - FunctionOptions&& options, +AdjectiveValue::AdjectiveValue(const FunctionValue& arg, + const FunctionOptions& options, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; } operand = arg.getOperand(); - opts = std::move(options); + opts = options.mergeOptions(arg.getResolvedOptions(), errorCode); const Formattable* toFormat = &operand; @@ -996,6 +998,65 @@ AdjectiveFunction::~AdjectiveFunction() {} NounValue::~NounValue() {} AdjectiveValue::~AdjectiveValue() {} +void TestMessageFormat2::testSingleEvaluation(IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) + .adoptFunction(FunctionName("counter"), + new CounterFunction(), + errorCode) + .build()); + UnicodeString name = "name"; + TestCase::Builder testBuilder; + testBuilder.setName("testSingleEvaluation"); + testBuilder.setLocale(Locale("en")); + testBuilder.setFunctionRegistry(&customRegistry); + + // Test that the RHS of each declaration is evaluated at most once + TestCase test = testBuilder.setPattern(".local $x = {:counter}\ + {{{$x} {$x}}}") + .setExpected("1 1") + .build(); + TestUtils::runTestCase(*this, test, errorCode); +} + +LocalPointer +CounterFunction::call(const FunctionContext&, + const FunctionValue& arg, + const FunctionOptions& opts, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + + LocalPointer + v(new CounterFunctionValue(count, arg, std::move(opts), errorCode)); + if (!v.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + count++; + return v; +} + +CounterFunctionValue::CounterFunctionValue(int32_t& c, + const FunctionValue&, + const FunctionOptions&, + UErrorCode&) : count(c) { + // No operand, no options +} + +UnicodeString CounterFunctionValue::formatToString(UErrorCode& status) const { + if (U_FAILURE(status)) { + return {}; + } + number::UnlocalizedNumberFormatter nf = number::NumberFormatter::with(); + number::FormattedNumber formattedNumber = nf.locale("en-US").formatInt(count, status); + return formattedNumber.toString(status); +} + +CounterFunction::~CounterFunction() {} +CounterFunctionValue::~CounterFunctionValue() {} + #endif /* #if !UCONFIG_NO_MF2 */ #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/testdata/message2/icu-test-functions.json b/testdata/message2/icu-test-functions.json index 2d2185c71b05..4f4be286537f 100644 --- a/testdata/message2/icu-test-functions.json +++ b/testdata/message2/icu-test-functions.json @@ -119,6 +119,15 @@ "exp": "Expires at 7:23:45 PM GMT+03:30", "ignoreCpp": "ICU-22754 Time zones not working yet (bug)" }, + { + "comment": "Horibly long, but I really wanted to test multiple declarations with overrides, and you can't join strings in JSON", + "src": [ + ".input {$exp :datetime timeStyle=short}\n", + "{{Hello John, or even '{$exp :datetime dateStyle=full}'?}}" + ], + "exp": "Hello John, or even 'Saturday, August 3, 2024 at 9:43 PM'?", + "params": [{"name": "exp", "value": { "date": 1722746637000 }}] + }, { "comment": "Horibly long, but I really wanted to test multiple declarations with overrides, and you can't join strings in JSON", "src": [ From 745987233d11cf95c91f504e8d71ed752bae1fab Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Fri, 25 Oct 2024 15:49:42 -0700 Subject: [PATCH 37/37] depstest: add messageformat2_evaluation.o to ignore list (for spurious reference to std::exception) --- icu4c/source/test/depstest/depstest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/icu4c/source/test/depstest/depstest.py b/icu4c/source/test/depstest/depstest.py index fba45a079815..f993308fbd38 100755 --- a/icu4c/source/test/depstest/depstest.py +++ b/icu4c/source/test/depstest/depstest.py @@ -123,6 +123,9 @@ def _ReadLibrary(root_path, library_name): ("i18n/messageformat2_data_model.o", "typeinfo for std::exception"), ("i18n/messageformat2_data_model.o", "vtable for std::exception"), ("i18n/messageformat2_data_model.o", "std::exception::~exception()"), + ("i18n/messageformat2_evaluation.o", "typeinfo for std::exception"), + ("i18n/messageformat2_evaluation.o", "vtable for std::exception"), + ("i18n/messageformat2_evaluation.o", "std::exception::~exception()"), ("i18n/messageformat2_formattable.o", "typeinfo for std::exception"), ("i18n/messageformat2_formattable.o", "vtable for std::exception"), ("i18n/messageformat2_formattable.o", "std::exception::~exception()"),