diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index 73f7fa45e69f..d98561fa461a 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -11,6 +11,7 @@ #include "unicode/messageformat2_data_model.h" #include "unicode/messageformat2_formattable.h" #include "unicode/messageformat2.h" +#include "unicode/ubidi.h" #include "unicode/unistr.h" #include "messageformat2_allocation.h" #include "messageformat2_evaluation.h" @@ -26,84 +27,148 @@ using namespace data_model; // ------------------------------------------------------ // Formatting -// The result of formatting a literal is just itself. -static Formattable evalLiteral(const Literal& lit) { - return Formattable(lit.unquoted()); +static UnicodeString varFallback(const VariableName& var) { + UnicodeString str(DOLLAR); + str += var; + return str; +} + +static UnicodeString functionFallback(const InternalValue& operand, + const FunctionName& functionName) { + UnicodeString fallbackStr; + // Create the fallback string for this function call + if (operand.isNullOperand()) { + fallbackStr = UnicodeString(COLON); + fallbackStr += functionName; + } else { + fallbackStr = operand.asFallback(); + } + return fallbackStr; } // Assumes that `var` is a message argument; returns the argument's value. -[[nodiscard]] FormattedPlaceholder MessageFormatter::evalArgument(const VariableName& var, MessageContext& context, UErrorCode& errorCode) const { +[[nodiscard]] InternalValue MessageFormatter::evalArgument(const VariableName& var, + MessageContext& context, + UErrorCode& errorCode) const { if (U_SUCCESS(errorCode)) { // The fallback for a variable name is itself. UnicodeString str(DOLLAR); str += var; + // Look up the variable in the global environment const Formattable* val = context.getGlobal(var, errorCode); if (U_SUCCESS(errorCode)) { - return (FormattedPlaceholder(*val, str)); + // If it exists, create a BaseValue (FunctionValue) for it + LocalPointer result(BaseValue::create(locale, *val, errorCode)); + // Add fallback and return an InternalValue + if (U_SUCCESS(errorCode)) { + return InternalValue(result.orphan(), str); + } } } return {}; } // Returns the contents of the literal -[[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const Literal& lit) const { - // The fallback for a literal is itself. - return FormattedPlaceholder(evalLiteral(lit), lit.quoted()); +[[nodiscard]] InternalValue MessageFormatter::evalLiteral(const Literal& lit, + UErrorCode& errorCode) const { + // Create a BaseValue (FunctionValue) that wraps the literal + LocalPointer val(BaseValue::create(locale, + Formattable(lit.unquoted()), + errorCode)); + if (U_SUCCESS(errorCode)) { + // The fallback for a literal is itself. + return InternalValue(val.orphan(), lit.quoted()); + } + return {}; } -[[nodiscard]] FormattedPlaceholder MessageFormatter::formatOperand(const Environment& env, - const Operand& rand, - MessageContext& context, - UErrorCode &status) const { +// InternalValues are passed as references into a global environment object +// that is live for the duration of one formatter call. +// They are mutable references so that they can be updated with a new value +// (when a closure is overwritten with the result of evaluating it), +// which can be shared across different references to the corresponding MF2 +// variable. +[[nodiscard]] InternalValue& MessageFormatter::evalOperand(Environment& env, + const Operand& rand, + MessageContext& context, + UErrorCode &status) const { if (U_FAILURE(status)) { - return {}; + return env.bogus(); } + // Three cases: absent operand; variable; or literal + + // Absent (null) operand if (rand.isNull()) { - return FormattedPlaceholder(); + return env.createNull(status); } + // Variable reference if (rand.isVariable()) { // Check if it's local or global // Note: there is no name shadowing; this is enforced by the parser const VariableName& var = rand.asVariable(); - // TODO: Currently, this code implements lazy evaluation of locals. + + // This code implements lazy call-by-need evaluation of locals. // That is, the environment binds names to a closure, not a resolved value. - // Eager vs. lazy evaluation is an open issue: - // see https://github.com/unicode-org/message-format-wg/issues/299 + // The spec does not require either eager or lazy evaluation. // Look up the variable in the environment if (env.has(var)) { - // `var` is a local -- look it up - const Closure& rhs = env.lookup(var); - // Format the expression using the environment from the closure - return formatExpression(rhs.getEnv(), rhs.getExpr(), context, status); + // `var` is a local -- look it up + InternalValue& rhs = env.lookup(var); + // Evaluate the expression using the environment from the closure + if (!rhs.isEvaluated()) { + Closure& c = rhs.asClosure(); + InternalValue& result = evalExpression(c.getEnv(), + c.getExpr(), + context, + status); + // Overwrite the closure with the result of evaluation + if (result.isFallback()) { + rhs.update(result.asFallback()); + } else { + U_ASSERT(result.isEvaluated()); + // Create an indirection to the result returned + // by evalExpression() + rhs.update(result); + } + return rhs; + } + // If it's already evaluated, just return the value + return rhs; } // Variable wasn't found in locals -- check if it's global - FormattedPlaceholder result = evalArgument(var, context, status); + InternalValue result = evalArgument(var, context, status); if (status == U_ILLEGAL_ARGUMENT_ERROR) { status = U_ZERO_ERROR; // Unbound variable -- set a resolution error context.getErrors().setUnresolvedVariable(var, status); // Use fallback per // https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution - UnicodeString str(DOLLAR); - str += var; - return FormattedPlaceholder(str); + return env.createFallback(varFallback(var), status); } - return result; - } else { + // Looking up the global variable succeeded; return it + return env.createUnnamed(std::move(result), status); + } + // Literal + else { U_ASSERT(rand.isLiteral()); - return formatLiteral(rand.asLiteral()); + return env.createUnnamed(evalLiteral(rand.asLiteral(), status), status); } } // Resolves a function's options -FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const OptionMap& options, MessageContext& context, UErrorCode& status) const { +FunctionOptions MessageFormatter::resolveOptions(Environment& env, + const OptionMap& options, + MessageContext& context, + UErrorCode& status) const { + // Create a vector of options LocalPointer optionsVector(createUVector(status)); if (U_FAILURE(status)) { return {}; } LocalPointer resolvedOpt; + // For each option... for (int i = 0; i < options.size(); i++) { const Option& opt = options.getOption(i, status); if (U_FAILURE(status)) { @@ -112,159 +177,176 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O const UnicodeString& k = opt.getName(); const Operand& v = opt.getValue(); - // Options are fully evaluated before calling the function - // Format the operand - FormattedPlaceholder rhsVal = formatOperand(env, v, context, status); + // ...evaluate its right-hand side... + InternalValue& rhsVal = evalOperand(env, v, context, status); + // ...giving a FunctionValue. + const FunctionValue* optVal = rhsVal.getValue(status); + // Ignore fallback values if (U_FAILURE(status)) { - return {}; - } - if (!rhsVal.isFallback()) { - resolvedOpt.adoptInstead(create(ResolvedFunctionOption(k, rhsVal.asFormattable()), status)); - if (U_FAILURE(status)) { - return {}; - } - optionsVector->adoptElement(resolvedOpt.orphan(), status); + continue; } - } + // The option is resolved; add it to the vector + ResolvedFunctionOption resolvedOpt(k, *optVal); + LocalPointer + p(create(std::move(resolvedOpt), status)); + EMPTY_ON_ERROR(status); + optionsVector->adoptElement(p.orphan(), status); + } + // Return a new FunctionOptions constructed from the vector of options return FunctionOptions(std::move(*optionsVector), status); } -// Overload that dispatches on argument type. Syntax doesn't provide for options in this case. -[[nodiscard]] FormattedPlaceholder MessageFormatter::evalFormatterCall(FormattedPlaceholder&& argument, - MessageContext& context, - UErrorCode& status) const { - if (U_FAILURE(status)) { - return {}; +static UBiDiDirection getBiDiDirection(const Locale& locale, + const UnicodeString& s) { + if (s.isEmpty()) { + return locale.isRightToLeft() ? UBIDI_RTL : UBIDI_LTR; + } + if (s == u"ltr") { + return UBIDI_LTR; + } + if (s == u"rtl") { + return UBIDI_RTL; + } + if (s == u"auto") { + return UBIDI_MIXED; } + return UBIDI_NEUTRAL; +} - // These cases should have been checked for already - U_ASSERT(!argument.isFallback() && !argument.isNullOperand()); +FunctionContext MessageFormatter::makeFunctionContext(const FunctionOptions& options) const { + // Look up "u:locale", "u:dir", and "u:id" in the options + UnicodeString localeStr = options.getStringFunctionOption(UnicodeString("u:locale")); - const Formattable& toFormat = argument.asFormattable(); - switch (toFormat.getType()) { - case UFMT_OBJECT: { - const FormattableObject* obj = toFormat.getObject(status); - U_ASSERT(U_SUCCESS(status)); - U_ASSERT(obj != nullptr); - const UnicodeString& type = obj->tag(); - FunctionName functionName; - if (!getDefaultFormatterNameByType(type, functionName)) { - // No formatter for this type -- follow default behavior - break; + // Use default locale from context, unless "u:locale" is provided + Locale localeToUse; + if (localeStr.isEmpty()) { + localeToUse = locale; + } else { + UErrorCode localStatus = U_ZERO_ERROR; + int32_t len = localeStr.length(); + char* buf = static_cast(uprv_malloc(len + 1)); + localeStr.extract(0, len, buf, len); + Locale l = Locale::forLanguageTag(StringPiece(buf, len), localStatus); + uprv_free(buf); + if (U_SUCCESS(localStatus)) { + localeToUse = l; + } else { + localeToUse = locale; } - return evalFormatterCall(functionName, - std::move(argument), - FunctionOptions(), - context, - status); - } - default: { - // TODO: The array case isn't handled yet; not sure whether it's desirable - // to have a default list formatter - break; - } } - // No formatter for this type, or it's a primitive type (which will be formatted later) - // -- just return the argument itself - return std::move(argument); + UBiDiDirection dir = getBiDiDirection(localeToUse, + options.getStringFunctionOption(UnicodeString("u:dir"))); + UnicodeString id = options.getStringFunctionOption(UnicodeString("u:id")); + + return FunctionContext(localeToUse, dir, id); } -// Overload that dispatches on function name -[[nodiscard]] FormattedPlaceholder MessageFormatter::evalFormatterCall(const FunctionName& functionName, - FormattedPlaceholder&& argument, - FunctionOptions&& options, - MessageContext& context, - UErrorCode& status) const { +// Looks up `functionName` and applies it to an operand and options, +// handling errors if the function is unbound +[[nodiscard]] InternalValue& MessageFormatter::apply(Environment& env, + const FunctionName& functionName, + InternalValue& rand, + FunctionOptions&& options, + MessageContext& context, + UErrorCode& status) const { if (U_FAILURE(status)) { - return {}; + return env.bogus(); } - DynamicErrors& errs = context.getErrors(); + // Create the fallback string to use in case of an error + // calling the function + UnicodeString fallbackStr = functionFallback(rand, functionName); - UnicodeString fallback(COLON); - fallback += functionName; - if (!argument.isNullOperand()) { - fallback = argument.fallback; + // Look up the function name + Function* function = lookupFunction(functionName, status); + if (U_FAILURE(status)) { + // Function is unknown -- set error and use the fallback value + status = U_ZERO_ERROR; + context.getErrors().setUnknownFunction(functionName, status); + return env.createFallback(fallbackStr, status); + } + // Value is not a fallback (checked by the caller), + // so we can safely call getValue() + const FunctionValue* functionArg(rand.getValue(status)); + U_ASSERT(U_SUCCESS(status)); + // Call the function + LocalPointer + functionResult(function->call(makeFunctionContext(options), + *functionArg, + std::move(options), + status)); + // Handle any errors signaled by the function + // (and use the fallback value) + if (status == U_MF_OPERAND_MISMATCH_ERROR) { + status = U_ZERO_ERROR; + context.getErrors().setOperandMismatchError(functionName, status); + return env.createFallback(fallbackStr, status); } - - if (isFormatter(functionName)) { - LocalPointer formatterImpl(getFormatter(functionName, status)); - if (U_FAILURE(status)) { - if (status == U_MF_FORMATTING_ERROR) { - errs.setFormattingError(functionName, status); - status = U_ZERO_ERROR; - return {}; - } - if (status == U_MF_UNKNOWN_FUNCTION_ERROR) { - errs.setUnknownFunction(functionName, status); - status = U_ZERO_ERROR; - return {}; - } - // Other errors are non-recoverable - return {}; - } - U_ASSERT(formatterImpl != nullptr); - - UErrorCode savedStatus = status; - FormattedPlaceholder result = formatterImpl->format(std::move(argument), std::move(options), status); - // Update errors - if (savedStatus != status) { - if (U_FAILURE(status)) { - if (status == U_MF_OPERAND_MISMATCH_ERROR) { - status = U_ZERO_ERROR; - errs.setOperandMismatchError(functionName, status); - } else { - status = U_ZERO_ERROR; - // Convey any error generated by the formatter - // as a formatting error, except for operand mismatch errors - errs.setFormattingError(functionName, status); - } - return FormattedPlaceholder(fallback); - } else { - // Ignore warnings - status = savedStatus; - } - } - // Ignore the output if any errors occurred - if (errs.hasFormattingError()) { - return FormattedPlaceholder(fallback); - } - return result; + if (status == U_MF_FORMATTING_ERROR) { + status = U_ZERO_ERROR; + context.getErrors().setFormattingError(functionName, status); + return env.createFallback(fallbackStr, status); } - // No formatter with this name -- set error - if (isSelector(functionName)) { - errs.setFormattingError(functionName, status); - } else { - errs.setUnknownFunction(functionName, status); + if (U_FAILURE(status)) { + return env.bogus(); } - return FormattedPlaceholder(fallback); + // Success; return the result + return env.createUnnamed(InternalValue(functionResult.orphan(), fallbackStr), status); } -// Formats an expression using `globalEnv` for the values of variables -[[nodiscard]] FormattedPlaceholder MessageFormatter::formatExpression(const Environment& globalEnv, - const Expression& expr, - MessageContext& context, - UErrorCode &status) const { +// Evaluates an expression using `globalEnv` for the values of variables +[[nodiscard]] InternalValue& MessageFormatter::evalExpression(Environment& globalEnv, + const Expression& expr, + MessageContext& context, + UErrorCode &status) const { if (U_FAILURE(status)) { - return {}; + return globalEnv.bogus(); } const Operand& rand = expr.getOperand(); - // Format the operand (formatOperand handles the case of a null operand) - FormattedPlaceholder randVal = formatOperand(globalEnv, rand, context, status); - - // Don't call the function on error values - if (randVal.isFallback()) { - return randVal; - } + // Evaluate the operand (evalOperand handles the case of a null operand) + InternalValue& randVal = evalOperand(globalEnv, rand, context, status); + // If there's no function, we check for an implicit formatter if (!expr.isFunctionCall()) { - // Dispatch based on type of `randVal` - return evalFormatterCall(std::move(randVal), - context, - status); + const FunctionValue* contained = randVal.getValue(status); + if (U_FAILURE(status)) { + // Fallback or null -- no implicit formatter + status = U_ZERO_ERROR; + return randVal; + } + const Formattable& toFormat = contained->getOperand(); + // If it has an object type, there might be an implicit formatter for it... + switch (toFormat.getType()) { + case UFMT_OBJECT: { + const FormattableObject* obj = toFormat.getObject(status); + U_ASSERT(U_SUCCESS(status)); + U_ASSERT(obj != nullptr); + const UnicodeString& type = obj->tag(); + FunctionName functionName; + if (!getDefaultFormatterNameByType(type, functionName)) { + // No formatter for this type -- follow default behavior + return randVal; + } + // ... apply the implicit formatter + return apply(globalEnv, + functionName, + randVal, + FunctionOptions(), + context, + status); + } + default: + // No formatters for other types, so just return the evaluated operand + return randVal; + } } else { + // Don't call the function on error values + if (randVal.isFallback()) { + return randVal; + } + // Get the function name and options from the operator const Operator* rator = expr.getOperator(status); U_ASSERT(U_SUCCESS(status)); const FunctionName& functionName = rator->getFunctionName(); @@ -272,25 +354,17 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O // Resolve the options FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status); - // Call the formatter function - // The fallback for a nullary function call is the function name - UnicodeString fallback; - if (rand.isNull()) { - fallback = UnicodeString(COLON); - fallback += functionName; - } else { - fallback = randVal.fallback; - } - return evalFormatterCall(functionName, - std::move(randVal), - std::move(resolvedOptions), - context, - status); + // Call the function with the operand and arguments + return apply(globalEnv, functionName, + randVal, std::move(resolvedOptions), context, status); } } // Formats each text and expression part of a pattern, appending the results to `result` -void MessageFormatter::formatPattern(MessageContext& context, const Environment& globalEnv, const Pattern& pat, UErrorCode &status, UnicodeString& result) const { +void MessageFormatter::formatPattern(MessageContext& context, + Environment& globalEnv, + const Pattern& pat, + UErrorCode &status, UnicodeString& result) const { CHECK_ERROR(status); for (int32_t i = 0; i < pat.numParts(); i++) { @@ -301,19 +375,26 @@ void MessageFormatter::formatPattern(MessageContext& context, const Environment& // Markup is ignored } else { // Format the expression - FormattedPlaceholder partVal = formatExpression(globalEnv, part.contents(), context, status); - // Force full evaluation, e.g. applying default formatters to - // unformatted input (or formatting numbers as strings) - UnicodeString partResult = partVal.formatToString(locale, status); - result += partResult; - // Handle formatting errors. `formatToString()` can't take a context and thus can't - // register an error directly - if (status == U_MF_FORMATTING_ERROR) { - status = U_ZERO_ERROR; - // TODO: The name of the formatter that failed is unavailable. - // Not ideal, but it's hard for `formatToString()` - // to pass along more detailed diagnostics - context.getErrors().setFormattingError(status); + InternalValue& partVal = evalExpression(globalEnv, part.contents(), context, status); + if (partVal.isFallback()) { + result += LEFT_CURLY_BRACE; + result += partVal.asFallback(); + result += RIGHT_CURLY_BRACE; + } else { + // Do final formatting (e.g. formatting numbers as strings) + const FunctionValue* val = partVal.getValue(status); + // Shouldn't be null or a fallback + U_ASSERT(U_SUCCESS(status)); + result += val->formatToString(status); + // Handle formatting errors. `formatToString()` can't take a context and thus can't + // register an error directly + if (status == U_MF_FORMATTING_ERROR) { + status = U_ZERO_ERROR; + // TODO: The name of the formatter that failed is unavailable. + // Not ideal, but it's hard for `formatToString()` + // to pass along more detailed diagnostics + context.getErrors().setFormattingError(status); + } } } } @@ -324,7 +405,7 @@ void MessageFormatter::formatPattern(MessageContext& context, const Environment& // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-selectors // `res` is a vector of ResolvedSelectors -void MessageFormatter::resolveSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UVector& res) const { +void MessageFormatter::resolveSelectors(MessageContext& context, Environment& env, UErrorCode &status, UVector& res) const { CHECK_ERROR(status); U_ASSERT(!dataModel.hasPattern()); @@ -334,8 +415,8 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme // 2. For each expression exp of the message's selectors for (int32_t i = 0; i < dataModel.numSelectors(); i++) { // 2i. Let rv be the resolved value of exp. - ResolvedSelector rv = formatSelectorExpression(env, selectors[i], context, status); - if (rv.hasSelector()) { + InternalValue& rv = evalExpression(env, selectors[i], context, status); + if (rv.isSelectable()) { // 2ii. If selection is supported for rv: // (True if this code has been reached) } else { @@ -344,15 +425,11 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme // Append nomatch as the last element of the list res. // Emit a Selection Error. // (Note: in this case, rv, being a fallback, serves as `nomatch`) - #if U_DEBUG - const DynamicErrors& err = context.getErrors(); - U_ASSERT(err.hasError()); - U_ASSERT(rv.argument().isFallback()); - #endif + context.getErrors().setSelectorError({}, status); } // 2ii(a). Append rv as the last element of the list res. // (Also fulfills 2iii) - LocalPointer v(create(std::move(rv), status)); + LocalPointer v(create(std::move(rv), status)); CHECK_ERROR(status); res.adoptElement(v.orphan(), status); } @@ -362,18 +439,15 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme // `keys` and `matches` are vectors of strings void MessageFormatter::matchSelectorKeys(const UVector& keys, MessageContext& context, - ResolvedSelector&& rv, + InternalValue&& rv, UVector& keysOut, UErrorCode& status) const { CHECK_ERROR(status); - if (!rv.hasSelector()) { - // Return an empty list of matches + if (!rv.isSelectable()) { return; } - auto selectorImpl = rv.getSelector(); - U_ASSERT(selectorImpl != nullptr); UErrorCode savedStatus = status; // Convert `keys` to an array @@ -391,24 +465,26 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, LocalArray adoptedKeys(keysArr); // Create an array to hold the output - UnicodeString* prefsArr = new UnicodeString[keysLen]; + int32_t* prefsArr = static_cast(uprv_malloc(keysLen * sizeof(int32_t))); if (prefsArr == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; } - LocalArray adoptedPrefs(prefsArr); + int32_t prefsLen = 0; // Call the selector - selectorImpl->selectKey(rv.takeArgument(), rv.takeOptions(), - adoptedKeys.getAlias(), keysLen, adoptedPrefs.getAlias(), prefsLen, - status); + // Caller checked for fallback, so it's safe to call getValue() + const FunctionValue* rvVal = rv.getValue(status); + U_ASSERT(U_SUCCESS(status)); + rvVal->selectKeys(adoptedKeys.getAlias(), keysLen, prefsArr, prefsLen, + status); // Update errors if (savedStatus != status) { if (U_FAILURE(status)) { status = U_ZERO_ERROR; - context.getErrors().setSelectorError(rv.getSelectorName(), status); + context.getErrors().setSelectorError({}, status); } else { // Ignore warnings status = savedStatus; @@ -420,7 +496,8 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, // Copy the resulting keys (if there was no error) keysOut.removeAllElements(); for (int32_t i = 0; i < prefsLen; i++) { - UnicodeString* k = message2::create(std::move(prefsArr[i]), status); + UnicodeString* k = + message2::create(std::move(keysArr[prefsArr[i]]), status); if (k == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; @@ -428,12 +505,17 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, keysOut.adoptElement(k, status); CHECK_ERROR(status); } + + uprv_free(prefsArr); } // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences // `res` is a vector of FormattedPlaceholders; // `pref` is a vector of vectors of strings -void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res, UVector& pref, UErrorCode &status) const { +void MessageFormatter::resolvePreferences(MessageContext& context, + UVector& res, + UVector& pref, + UErrorCode &status) const { CHECK_ERROR(status); // 1. Let pref be a new empty list of lists of strings. @@ -471,7 +553,7 @@ void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res, } // 2iii. Let `rv` be the resolved value at index `i` of `res`. U_ASSERT(i < res.size()); - ResolvedSelector rv = std::move(*(static_cast(res[i]))); + InternalValue rv = std::move(*(static_cast(res[i]))); // 2iv. Let matches be the result of calling the method MatchSelectorKeys(rv, keys) LocalPointer matches(createUVector(status)); matchSelectorKeys(*keys, context, std::move(rv), *matches, status); @@ -604,117 +686,10 @@ void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCo // 7. Select the pattern of `var` } - -// Evaluate the operand -ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, const Operand& rand, MessageContext& context, UErrorCode &status) const { - if (U_FAILURE(status)) { - return {}; - } - - if (rand.isNull()) { - return ResolvedSelector(FormattedPlaceholder()); - } - - if (rand.isLiteral()) { - return ResolvedSelector(formatLiteral(rand.asLiteral())); - } - - // Must be variable - const VariableName& var = rand.asVariable(); - // Resolve the variable - if (env.has(var)) { - const Closure& referent = env.lookup(var); - // Resolve the referent - return resolveVariables(referent.getEnv(), referent.getExpr(), context, status); - } - // Either this is a global var or an unbound var -- - // either way, it can't be bound to a function call. - // Check globals - FormattedPlaceholder val = evalArgument(var, context, status); - if (status == U_ILLEGAL_ARGUMENT_ERROR) { - status = U_ZERO_ERROR; - // Unresolved variable -- could be a previous warning. Nothing to resolve - U_ASSERT(context.getErrors().hasUnresolvedVariableError()); - return ResolvedSelector(FormattedPlaceholder(var)); - } - // Pass through other errors - return ResolvedSelector(std::move(val)); -} - -// Evaluate the expression except for not performing the top-level function call -// (which is expected to be a selector, but may not be, in error cases) -ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, - const Expression& expr, - MessageContext& context, - UErrorCode &status) const { - if (U_FAILURE(status)) { - return {}; - } - - // Function call -- resolve the operand and options - if (expr.isFunctionCall()) { - const Operator* rator = expr.getOperator(status); - U_ASSERT(U_SUCCESS(status)); - // Already checked that rator is non-reserved - const FunctionName& selectorName = rator->getFunctionName(); - if (isSelector(selectorName)) { - auto selector = getSelector(context, selectorName, status); - if (U_SUCCESS(status)) { - FunctionOptions resolvedOptions = resolveOptions(env, rator->getOptionsInternal(), context, status); - // Operand may be the null argument, but resolveVariables() handles that - FormattedPlaceholder argument = formatOperand(env, expr.getOperand(), context, status); - return ResolvedSelector(selectorName, selector, std::move(resolvedOptions), std::move(argument)); - } - } else if (isFormatter(selectorName)) { - context.getErrors().setSelectorError(selectorName, status); - } else { - context.getErrors().setUnknownFunction(selectorName, status); - } - // Non-selector used as selector; an error would have been recorded earlier - UnicodeString fallback(COLON); - fallback += selectorName; - if (!expr.getOperand().isNull()) { - fallback = formatOperand(env, expr.getOperand(), context, status).fallback; - } - return ResolvedSelector(FormattedPlaceholder(fallback)); - } else { - // Might be a variable reference, so expand one more level of variable - return resolveVariables(env, expr.getOperand(), context, status); - } -} - -ResolvedSelector MessageFormatter::formatSelectorExpression(const Environment& globalEnv, const Expression& expr, MessageContext& context, UErrorCode &status) const { - if (U_FAILURE(status)) { - return {}; - } - - // Resolve expression to determine if it's a function call - ResolvedSelector exprResult = resolveVariables(globalEnv, expr, context, status); - - DynamicErrors& err = context.getErrors(); - - // If there is a selector, then `resolveVariables()` recorded it in the context - if (exprResult.hasSelector()) { - // Check if there was an error - if (exprResult.argument().isFallback()) { - // Use a null expression if it's a syntax or data model warning; - // create a valid (non-fallback) formatted placeholder from the - // fallback string otherwise - if (err.hasSyntaxError() || err.hasDataModelError()) { - return ResolvedSelector(FormattedPlaceholder()); // Null operand - } else { - return ResolvedSelector(exprResult.takeArgument()); - } - } - return exprResult; - } - - // No selector was found; error should already have been set - U_ASSERT(err.hasMissingSelectorAnnotationError() || err.hasUnknownFunctionError() || err.hasSelectorError()); - return ResolvedSelector(FormattedPlaceholder(exprResult.argument().fallback)); -} - -void MessageFormatter::formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const { +void MessageFormatter::formatSelectors(MessageContext& context, + Environment& env, + UErrorCode &status, + UnicodeString& result) const { CHECK_ERROR(status); // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection @@ -770,19 +745,17 @@ UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments checkDeclarations(context, env, status); LocalPointer globalEnv(env); + DynamicErrors& err = context.getErrors(); UnicodeString result; - if (dataModel.hasPattern()) { - formatPattern(context, *globalEnv, dataModel.getPattern(), status, result); - } else { - // Check for errors/warnings -- if so, then the result of pattern selection is the fallback value - // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection - const DynamicErrors& err = context.getErrors(); - if (err.hasSyntaxError() || err.hasDataModelError()) { - result += REPLACEMENT; + + if (!(err.hasSyntaxError() || err.hasDataModelError())) { + if (dataModel.hasPattern()) { + formatPattern(context, *globalEnv, dataModel.getPattern(), status, result); } else { formatSelectors(context, *globalEnv, status, result); } } + // Update status according to all errors seen while formatting if (signalErrors) { context.checkErrors(status); @@ -855,7 +828,12 @@ void MessageFormatter::checkDeclarations(MessageContext& context, Environment*& // memoizing the value of localEnv up to this point // Add the LHS to the environment for checking the next declaration - env = Environment::create(decl.getVariable(), Closure(rhs, *env), env, status); + const VariableName& lhs = decl.getVariable(); + env = Environment::create(lhs, + Closure::create(rhs, *env, status), + varFallback(lhs), + env, + status); CHECK_ERROR(status); } } diff --git a/icu4c/source/i18n/messageformat2_allocation.h b/icu4c/source/i18n/messageformat2_allocation.h index 7be27e222520..e375ceca439b 100644 --- a/icu4c/source/i18n/messageformat2_allocation.h +++ b/icu4c/source/i18n/messageformat2_allocation.h @@ -131,6 +131,18 @@ namespace message2 { return result; } + template + inline T* create(const T& node, UErrorCode& status) { + if (U_FAILURE(status)) { + return nullptr; + } + T* result = new T(node); + if (result == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return result; + } + } // namespace message2 U_NAMESPACE_END diff --git a/icu4c/source/i18n/messageformat2_evaluation.cpp b/icu4c/source/i18n/messageformat2_evaluation.cpp index 41e4c9a8020a..05abaf11c8a2 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.cpp +++ b/icu4c/source/i18n/messageformat2_evaluation.cpp @@ -7,6 +7,7 @@ #if !UCONFIG_NO_MF2 +#include "unicode/ubidi.h" #include "messageformat2_allocation.h" #include "messageformat2_evaluation.h" #include "messageformat2_macros.h" @@ -20,15 +21,54 @@ namespace message2 { using namespace data_model; +// BaseValue +// --------- + +BaseValue::BaseValue(const Locale& loc, const Formattable& source) + : locale(loc) { + operand = source; +} + +/* static */ BaseValue* BaseValue::create(const Locale& locale, + const Formattable& source, + UErrorCode& errorCode) { + return message2::create(BaseValue(locale, source), errorCode); +} + +extern UnicodeString formattableToString(const Locale&, const UBiDiDirection, const Formattable&, UErrorCode&); + +UnicodeString BaseValue::formatToString(UErrorCode& errorCode) const { + return formattableToString(locale, + UBIDI_NEUTRAL, + operand, + errorCode); +} + +BaseValue& BaseValue::operator=(BaseValue&& other) noexcept { + operand = std::move(other.operand); + opts = std::move(other.opts); + locale = other.locale; + + return *this; +} + +BaseValue::BaseValue(BaseValue&& other) { + *this = std::move(other); +} + // Functions // ------------- ResolvedFunctionOption::ResolvedFunctionOption(ResolvedFunctionOption&& other) { - name = std::move(other.name); - value = std::move(other.value); + *this = std::move(other); } -ResolvedFunctionOption::~ResolvedFunctionOption() {} +ResolvedFunctionOption::ResolvedFunctionOption(const UnicodeString& n, + const FunctionValue& f) : name(n), value(&f) {} + +ResolvedFunctionOption::~ResolvedFunctionOption() { + value = nullptr; // value is not owned +} const ResolvedFunctionOption* FunctionOptions::getResolvedFunctionOptions(int32_t& len) const { @@ -44,79 +84,236 @@ FunctionOptions::FunctionOptions(UVector&& optionsVector, UErrorCode& status) { options = moveVectorToArray(optionsVector, status); } -UBool FunctionOptions::getFunctionOption(const UnicodeString& key, Formattable& option) const { +const FunctionValue* +FunctionOptions::getFunctionOption(const UnicodeString& key, + UErrorCode& status) const { if (options == nullptr) { U_ASSERT(functionOptionsLen == 0); } for (int32_t i = 0; i < functionOptionsLen; i++) { const ResolvedFunctionOption& opt = options[i]; if (opt.getName() == key) { - option = opt.getValue(); - return true; + return &opt.getValue(); } } - return false; + status = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; } -UnicodeString FunctionOptions::getStringFunctionOption(const UnicodeString& key) const { - Formattable option; - if (getFunctionOption(key, option)) { - if (option.getType() == UFMT_STRING) { - UErrorCode localErrorCode = U_ZERO_ERROR; - UnicodeString val = option.getString(localErrorCode); - U_ASSERT(U_SUCCESS(localErrorCode)); - return val; - } - } - // For anything else, including non-string values, return "". - // Alternately, could try to stringify the non-string option. - // (Currently, no tests require that.) + +UnicodeString +FunctionOptions::getStringFunctionOption(const UnicodeString& k, UErrorCode& errorCode) const { + const FunctionValue* option = getFunctionOption(k, errorCode); + if (U_SUCCESS(errorCode)) { + UnicodeString result = option->formatToString(errorCode); + if (U_SUCCESS(errorCode)) { + return result; + } + } return {}; } -FunctionOptions& FunctionOptions::operator=(FunctionOptions&& other) noexcept { - functionOptionsLen = other.functionOptionsLen; - options = other.options; - other.functionOptionsLen = 0; - other.options = nullptr; +UnicodeString FunctionOptions::getStringFunctionOption(const UnicodeString& key) const { + UErrorCode localStatus = U_ZERO_ERROR; + + UnicodeString result = getStringFunctionOption(key, localStatus); + if (U_FAILURE(localStatus)) { + return {}; + } + return result; +} + +FunctionOptions& FunctionOptions::operator=(FunctionOptions other) noexcept { + swap(*this, other); return *this; } -FunctionOptions::FunctionOptions(FunctionOptions&& other) { - *this = std::move(other); +FunctionOptions::FunctionOptions(const FunctionOptions& other) { + U_ASSERT(!other.bogus); + functionOptionsLen = other.functionOptionsLen; + options = nullptr; + if (functionOptionsLen != 0) { + UErrorCode localStatus = U_ZERO_ERROR; + options = copyArray(other.options, functionOptionsLen, localStatus); + if (U_FAILURE(localStatus)) { + bogus = true; + } + } } FunctionOptions::~FunctionOptions() { if (options != nullptr) { delete[] options; + options = nullptr; } } -// ResolvedSelector -// ---------------- -ResolvedSelector::ResolvedSelector(const FunctionName& fn, - Selector* sel, - FunctionOptions&& opts, - FormattedPlaceholder&& val) - : selectorName(fn), selector(sel), options(std::move(opts)), value(std::move(val)) { - U_ASSERT(sel != nullptr); +static bool containsOption(const UVector& opts, const ResolvedFunctionOption& opt) { + for (int32_t i = 0; i < opts.size(); i++) { + if (static_cast(opts[i])->getName() + == opt.getName()) { + return true; + } + } + return false; +} + +// Options in `this` take precedence +FunctionOptions FunctionOptions::mergeOptions(const FunctionOptions& other, + UErrorCode& status) const { + UVector mergedOptions(status); + mergedOptions.setDeleter(uprv_deleteUObject); + + if (U_FAILURE(status)) { + return {}; + } + if (bogus || other.bogus) { + status = U_MEMORY_ALLOCATION_ERROR; + return {}; + } + + // Create a new vector consisting of the options from this `FunctionOptions` + for (int32_t i = 0; i < functionOptionsLen; i++) { + mergedOptions.adoptElement(create(options[i], status), + status); + } + + // Add each option from `other` that doesn't appear in this `FunctionOptions` + for (int i = 0; i < other.functionOptionsLen; i++) { + // Note: this is quadratic in the length of `options` + if (!containsOption(mergedOptions, other.options[i])) { + mergedOptions.adoptElement(create(other.options[i], + status), + status); + } + } + + return FunctionOptions(std::move(mergedOptions), status); } -ResolvedSelector::ResolvedSelector(FormattedPlaceholder&& val) : value(std::move(val)) {} +// InternalValue +// ------------- + -ResolvedSelector& ResolvedSelector::operator=(ResolvedSelector&& other) noexcept { - selectorName = std::move(other.selectorName); - selector.adoptInstead(other.selector.orphan()); - options = std::move(other.options); - value = std::move(other.value); +InternalValue::~InternalValue() {} + +InternalValue& InternalValue::operator=(InternalValue&& other) { + fallbackString = other.fallbackString; + val = std::move(other.val); return *this; } -ResolvedSelector::ResolvedSelector(ResolvedSelector&& other) { +InternalValue::InternalValue(InternalValue&& other) { *this = std::move(other); } -ResolvedSelector::~ResolvedSelector() {} +InternalValue::InternalValue(UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer nv(new NullValue()); + if (!nv.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + val = std::move(nv); +} + +InternalValue::InternalValue(FunctionValue* v, const UnicodeString& fb) + : fallbackString(fb) { + U_ASSERT(v != nullptr); + val = LocalPointer(v); +} + +const FunctionValue* InternalValue::getValue(UErrorCode& status) const { + if (U_FAILURE(status)) { + return nullptr; + } + // If this is a closure or fallback, error out + if (!isEvaluated()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + // Follow the indirection to get the value + if (isIndirection()) { + const InternalValue* other = *std::get_if(&val); + U_ASSERT(other != nullptr); + return other->getValue(status); + } + // Otherwise, return the contained FunctionValue + const LocalPointer* result = std::get_if>(&val); + U_ASSERT(result->isValid()); + return (*result).getAlias(); +} + +bool InternalValue::isSelectable() const { + UErrorCode localStatus = U_ZERO_ERROR; + const FunctionValue* val = getValue(localStatus); + if (U_FAILURE(localStatus)) { + return false; + } + return val->isSelectable(); +} + +/* static */ LocalPointer InternalValue::null(UErrorCode& status) { + if (U_SUCCESS(status)) { + InternalValue* result = new InternalValue(status); + if (U_SUCCESS(status)) { + return LocalPointer(result); + } + } + return LocalPointer(); +} + +/* static */ LocalPointer InternalValue::fallback(const UnicodeString& s, + UErrorCode& status) { + if (U_SUCCESS(status)) { + InternalValue* result = new InternalValue(s); + if (U_SUCCESS(status)) { + return LocalPointer(result); + } + } + return LocalPointer(); +} + +/* static */ InternalValue InternalValue::closure(Closure* c, const UnicodeString& fb) { + U_ASSERT(c != nullptr); + return InternalValue(c, fb); +} + +bool InternalValue::isClosure() const { + return std::holds_alternative>(val); +} + +bool InternalValue::isEvaluated() const { + return std::holds_alternative>(val) || isIndirection(); +} + +bool InternalValue::isIndirection() const { + return std::holds_alternative(val); +} + +bool InternalValue::isNullOperand() const { + UErrorCode localStatus = U_ZERO_ERROR; + const FunctionValue* val = getValue(localStatus); + if (U_FAILURE(localStatus)) { + return false; + } + return val->isNullOperand(); +} + +void InternalValue::update(InternalValue& newVal) { + fallbackString = newVal.fallbackString; + val = &newVal; +} + +void InternalValue::update(LocalPointer newVal) { + val = std::move(newVal); +} + +void InternalValue::update(const UnicodeString& fb) { + fallbackString = fb; + val = fb; +} // PrioritizedVariant // ------------------ @@ -132,9 +329,11 @@ PrioritizedVariant::~PrioritizedVariant() {} // ---------------- Environments and closures - Environment* Environment::create(const VariableName& var, Closure&& c, Environment* parent, UErrorCode& errorCode) { + Environment* Environment::create(const VariableName& var, Closure* c, + const UnicodeString& fallbackStr, + Environment* parent, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - Environment* result = new NonEmptyEnvironment(var, std::move(c), parent); + Environment* result = new NonEmptyEnvironment(var, InternalValue::closure(c, fallbackStr), parent); if (result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; return nullptr; @@ -144,21 +343,20 @@ PrioritizedVariant::~PrioritizedVariant() {} Environment* Environment::create(UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - Environment* result = new EmptyEnvironment(); - if (result == nullptr) { + Environment* result = new EmptyEnvironment(errorCode); + if (U_SUCCESS(errorCode) && result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; return nullptr; } return result; } - const Closure& EmptyEnvironment::lookup(const VariableName& v) const { - (void) v; + InternalValue& EmptyEnvironment::lookup(const VariableName&) { U_ASSERT(false); UPRV_UNREACHABLE_EXIT; } - const Closure& NonEmptyEnvironment::lookup(const VariableName& v) const { + InternalValue& NonEmptyEnvironment::lookup(const VariableName& v) { if (v == var) { return rhs; } @@ -177,10 +375,75 @@ PrioritizedVariant::~PrioritizedVariant() {} return parent->has(v); } + InternalValue& EmptyEnvironment::createNull(UErrorCode& status) { + if (U_FAILURE(status)) { + return bogus(); + } + LocalPointer val(InternalValue::null(status)); + return addUnnamedValue(std::move(val), status); + } + + InternalValue& EmptyEnvironment::createFallback(const UnicodeString& s, UErrorCode& status) { + if (U_FAILURE(status)) { + return bogus(); + } + LocalPointer val(InternalValue::fallback(s, status)); + return addUnnamedValue(std::move(val), status); + } + + InternalValue& EmptyEnvironment::createUnnamed(InternalValue&& v, UErrorCode& status) { + if (U_FAILURE(status)) { + return bogus(); + } + LocalPointer val(new InternalValue(std::move(v))); + if (!val.isValid()) { + return bogus(); + } + return addUnnamedValue(std::move(val), status); + } + + InternalValue& NonEmptyEnvironment::createNull(UErrorCode& status) { + return parent->createNull(status); + } + + InternalValue& NonEmptyEnvironment::createFallback(const UnicodeString& s, UErrorCode& status) { + return parent->createFallback(s, status); + } + + InternalValue& NonEmptyEnvironment::createUnnamed(InternalValue&& v, UErrorCode& status) { + return parent->createUnnamed(std::move(v), status); + } + + InternalValue& EmptyEnvironment::addUnnamedValue(LocalPointer val, + UErrorCode& status) { + if (U_FAILURE(status)) { + return bogus(); + } + U_ASSERT(val.isValid()); + InternalValue* v = val.orphan(); + unnamedValues.adoptElement(v, status); + return *v; + } + + EmptyEnvironment::EmptyEnvironment(UErrorCode& status) : unnamedValues(UVector(status)) { + unnamedValues.setDeleter(uprv_deleteUObject); + } + Environment::~Environment() {} NonEmptyEnvironment::~NonEmptyEnvironment() {} EmptyEnvironment::~EmptyEnvironment() {} + /* static */ Closure* Closure::create(const Expression& expr, Environment& env, + UErrorCode& status) { + NULL_ON_ERROR(status); + + Closure* result = new Closure(expr, env); + if (result == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return result; + } + Closure::~Closure() {} // MessageContext methods diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index b8ae0242367d..b3cae07c7b71 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -32,6 +32,112 @@ namespace message2 { using namespace data_model; + class Closure; + class Environment; + + // InternalValue represents an intermediate value in the message + // formatter. + // It has four possible states: + // 1. Fallback Value. A fallback value + // is a string that serves as a replacement for expressions whose evaluation + // caused an error. Fallback values are not passed to functions. + // 2. Closure, representing the unevaluated right-hand side of a declaration. + // 3. Evaluated Value (FunctionValue), representing an evaluated declaration. + // 4. Indirection (const InternalValue*), representing a shared reference to another + // InternalValue. Note that all InternalValues are owned by the global + // environment. + /* + Example: + + .local $x = {$y} + .local $z = {1 :number} + .local $a = {$z} + {{ {$x} {$z} {$a} }} + + If this message is formatted with no arguments, + initially, x, z and a are all bound to Closures. + When the value of x is demanded by the pattern, the contents of x's value + are updated to a Fallback Value (because its RHS contains an unbound variable). + When the value of z is demanded, the contents of z's value are updated to + an Evaluated Value representing the result of :number on the operand. + When the value of a is demanded, the contents of a's value are updated to + an Indirection, pointing to z's value. + + Indirections are used so that a FunctionValue can be uniquely owned by an + InternalValue. Since all InternalValues are owned by the global Environment, + it's safe to use these non-owned pointers. + */ + class InternalValue : public UObject { + public: + bool isFallback() const { return std::holds_alternative(val); } + bool isNullOperand() const; + bool isEvaluated() const; + bool isClosure() const; + bool isSelectable() const; + + Closure& asClosure() { + U_ASSERT(isClosure()); + return **std::get_if>(&val); + } + const FunctionValue* getValue(UErrorCode& status) const; + UnicodeString asFallback() const { return fallbackString; } + + static LocalPointer null(UErrorCode& status); + static LocalPointer fallback(const UnicodeString& s, UErrorCode& status); + // Adopts `c` + static InternalValue closure(Closure* c, const UnicodeString& s); + + // Updates the mutable contents of this InternalValue + void update(InternalValue&); + void update(LocalPointer); + void update(const UnicodeString&); + + InternalValue() : val(UnicodeString()) {} + explicit InternalValue(FunctionValue* v, const UnicodeString& fb); + InternalValue& operator=(InternalValue&&); + InternalValue(InternalValue&&); + virtual ~InternalValue(); + private: + UnicodeString fallbackString; + std::variant, // Unevaluated thunk + LocalPointer, // Evaluated value + const InternalValue*> val; // Indirection to another value -- Not owned + // Null operand constructor + explicit InternalValue(UErrorCode& status); + // Fallback constructor + explicit InternalValue(const UnicodeString& fb) + : fallbackString(fb), val(fb) {} + // Closure (unevaluated) constructor + explicit InternalValue(Closure* c, UnicodeString fallbackStr) + : fallbackString(fallbackStr), val(LocalPointer(c)) {} + bool isIndirection() const; + }; // class InternalValue + + + // A BaseValue wraps a literal value or argument value so it can be used + // in a context that expects a FunctionValue. + class BaseValue : public FunctionValue { + public: + static BaseValue* create(const Locale&, const Formattable&, UErrorCode&); + // Apply default formatters to the argument value + UnicodeString formatToString(UErrorCode&) const override; + UBool isSelectable() const override { return true; } + BaseValue() {} + BaseValue(BaseValue&&); + BaseValue& operator=(BaseValue&&) noexcept; + private: + Locale locale; + + BaseValue(const Locale&, const Formattable&); + }; // class BaseValue + + // A NullValue represents the absence of an argument. + class NullValue : public FunctionValue { + public: + virtual UBool isNullOperand() const { return true; } + }; // class NullValue + // PrioritizedVariant // For how this class is used, see the references to (integer, variant) tuples @@ -63,38 +169,6 @@ namespace message2 { return 1; } - // Encapsulates a value to be scrutinized by a `match` with its resolved - // options and the name of the selector - class ResolvedSelector : public UObject { - public: - ResolvedSelector() {} - ResolvedSelector(const FunctionName& fn, - Selector* selector, - FunctionOptions&& options, - FormattedPlaceholder&& value); - // Used either for errors, or when selector isn't yet known - explicit ResolvedSelector(FormattedPlaceholder&& value); - bool hasSelector() const { return selector.isValid(); } - const FormattedPlaceholder& argument() const { return value; } - FormattedPlaceholder&& takeArgument() { return std::move(value); } - const Selector* getSelector() { - U_ASSERT(selector.isValid()); - return selector.getAlias(); - } - FunctionOptions&& takeOptions() { - return std::move(options); - } - const FunctionName& getSelectorName() const { return selectorName; } - virtual ~ResolvedSelector(); - ResolvedSelector& operator=(ResolvedSelector&&) noexcept; - ResolvedSelector(ResolvedSelector&&); - private: - FunctionName selectorName; // For error reporting - LocalPointer selector; - FunctionOptions options; - FormattedPlaceholder value; - }; // class ResolvedSelector - // Closures and environments // ------------------------- @@ -108,66 +182,106 @@ namespace message2 { const Expression& getExpr() const { return expr; } - const Environment& getEnv() const { + Environment& getEnv() const { return env; } - Closure(const Expression& expression, const Environment& environment) : expr(expression), env(environment) {} Closure(Closure&&) = default; + static Closure* create(const Expression&, Environment&, UErrorCode&); virtual ~Closure(); private: + Closure(const Expression& expression, Environment& environment) : expr(expression), env(environment) {} + // An unevaluated expression const Expression& expr; // The environment mapping names used in this // expression to other expressions - const Environment& env; + Environment& env; }; + class NonEmptyEnvironment; + // An environment is represented as a linked chain of // non-empty environments, terminating at an empty environment. // It's searched using linear search. class Environment : public UMemory { - public: - virtual bool has(const VariableName&) const = 0; - virtual const Closure& lookup(const VariableName&) const = 0; - static Environment* create(UErrorCode&); - static Environment* create(const VariableName&, Closure&&, Environment*, UErrorCode&); - virtual ~Environment(); + public: + virtual bool has(const VariableName&) const = 0; + virtual InternalValue& lookup(const VariableName&) = 0; + virtual InternalValue& bogus() = 0; + // For convenience so that InternalValue::getValue() can return a reference + // in error cases + FunctionValue& bogusFunctionValue() { return bogusFunctionVal; } + virtual InternalValue& createFallback(const UnicodeString&, UErrorCode&) = 0; + virtual InternalValue& createNull(UErrorCode&) = 0; + virtual InternalValue& createUnnamed(InternalValue&&, UErrorCode&) = 0; + static Environment* create(UErrorCode&); + static Environment* create(const VariableName&, Closure*, const UnicodeString&, + Environment*, UErrorCode&); + virtual ~Environment(); + + private: + FunctionValue bogusFunctionVal; }; - class NonEmptyEnvironment; + // The empty environment includes a "bogus" value to use when an + // InternalValue& is needed (e.g. error conditions), + // and a vector of "unnamed" values, so that the environment can + // own all InternalValues (even those arising from expressions + // that appear directly in a pattern and are not named). class EmptyEnvironment : public Environment { public: - EmptyEnvironment() = default; + EmptyEnvironment(UErrorCode& status); virtual ~EmptyEnvironment(); private: friend class Environment; bool has(const VariableName&) const override; - const Closure& lookup(const VariableName&) const override; + InternalValue& lookup(const VariableName&) override; + InternalValue& bogus() override { return bogusValue; } static EmptyEnvironment* create(UErrorCode&); - static NonEmptyEnvironment* create(const VariableName&, Closure&&, Environment*, UErrorCode&); + static NonEmptyEnvironment* create(const VariableName&, InternalValue, + Environment*, UErrorCode&); + + // Creates a fallback value owned by this Environment + InternalValue& createFallback(const UnicodeString&, UErrorCode&) override; + // Creates a null operand owned by this Environment + InternalValue& createNull(UErrorCode&) override; + // Creates an arbitrary value owned by this Environment + InternalValue& createUnnamed(InternalValue&&, UErrorCode&) override; + + InternalValue& addUnnamedValue(LocalPointer, UErrorCode&); + + InternalValue bogusValue; // Used in place of `nullptr` in error conditions + UVector unnamedValues; }; class NonEmptyEnvironment : public Environment { + public: + InternalValue* update(const VariableName&, InternalValue&&); private: friend class Environment; bool has(const VariableName&) const override; - const Closure& lookup(const VariableName&) const override; + InternalValue& lookup(const VariableName&) override; + InternalValue& bogus() override { return parent->bogus(); } static NonEmptyEnvironment* create(const VariableName&, Closure&&, const Environment*, UErrorCode&); virtual ~NonEmptyEnvironment(); private: friend class Environment; - NonEmptyEnvironment(const VariableName& v, Closure&& c, Environment* e) : var(v), rhs(std::move(c)), parent(e) {} + NonEmptyEnvironment(const VariableName& v, InternalValue c, Environment* e) : var(v), rhs(std::move(c)), parent(e) {} + + InternalValue& createFallback(const UnicodeString&, UErrorCode&) override; + InternalValue& createNull(UErrorCode&) override; + InternalValue& createUnnamed(InternalValue&&, UErrorCode&) override; // Maps VariableName onto Closure* // Chain of linked environments VariableName var; - Closure rhs; + InternalValue rhs; const LocalPointer parent; }; diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index 3152ccb44fd8..9a2daaec8881 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -9,7 +9,10 @@ #include "unicode/messageformat2_formattable.h" #include "unicode/smpdtfmt.h" +#include "unicode/ubidi.h" +#include "messageformat2_allocation.h" #include "messageformat2_macros.h" +#include "ubidiimp.h" #include "limits.h" @@ -17,17 +20,6 @@ U_NAMESPACE_BEGIN namespace message2 { - // Fallback values are enclosed in curly braces; - // see https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#formatting-fallback-values - - static UnicodeString fallbackToString(const UnicodeString& s) { - UnicodeString result; - result += LEFT_CURLY_BRACE; - result += s; - result += RIGHT_CURLY_BRACE; - return result; - } - Formattable& Formattable::operator=(Formattable other) noexcept { swap(*this, other); return *this; @@ -157,43 +149,6 @@ namespace message2 { FormattedMessage::~FormattedMessage() {} - FormattedValue::FormattedValue(const UnicodeString& s) { - type = kString; - stringOutput = std::move(s); - } - - FormattedValue::FormattedValue(number::FormattedNumber&& n) { - type = kNumber; - numberOutput = std::move(n); - } - - FormattedValue& FormattedValue::operator=(FormattedValue&& other) noexcept { - type = other.type; - if (type == kString) { - stringOutput = std::move(other.stringOutput); - } else { - numberOutput = std::move(other.numberOutput); - } - return *this; - } - - FormattedValue::~FormattedValue() {} - - FormattedPlaceholder& FormattedPlaceholder::operator=(FormattedPlaceholder&& other) noexcept { - type = other.type; - source = other.source; - if (type == kEvaluated) { - formatted = std::move(other.formatted); - previousOptions = std::move(other.previousOptions); - } - fallback = other.fallback; - return *this; - } - - const Formattable& FormattedPlaceholder::asFormattable() const { - return source; - } - // Default formatters // ------------------ @@ -231,15 +186,39 @@ namespace message2 { df->format(date, result, 0, errorCode); } - // Called when output is required and the contents are an unevaluated `Formattable`; - // formats the source `Formattable` to a string with defaults, if it can be - // formatted with a default formatter - static FormattedPlaceholder formatWithDefaults(const Locale& locale, const FormattedPlaceholder& input, UErrorCode& status) { - if (U_FAILURE(status)) { - return {}; + static UnicodeString& handleBiDi(const Locale& locale, + UBiDiDirection dir, + UnicodeString& result) { + switch (dir) { + case UBIDI_LTR: + if (locale.isRightToLeft()) { + result.insert(0, LRI_CHAR); + result.insert(result.length(), PDI_CHAR); + } + break; + case UBIDI_RTL: + result.insert(0, RLI_CHAR); + result.insert(result.length(), PDI_CHAR); + break; + case UBIDI_NEUTRAL: + // Do nothing + break; + case UBIDI_MIXED: + // mixed = auto + result.insert(0, FSI_CHAR); + result.insert(result.length(), PDI_CHAR); + break; } - const Formattable& toFormat = input.asFormattable(); + return result; + } + + UnicodeString formattableToString(const Locale& locale, + UBiDiDirection dir, + const Formattable& toFormat, + UErrorCode& status) { + EMPTY_ON_ERROR(status); + // Try as decimal number first if (toFormat.isNumeric()) { // Note: the ICU Formattable has to be created here since the StringPiece @@ -251,38 +230,42 @@ namespace message2 { return {}; } if (asDecimal != nullptr) { - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, asDecimal, status))); + return formatNumberWithDefaults(locale, asDecimal, status).toString(status); } } UFormattableType type = toFormat.getType(); + UnicodeString result; + switch (type) { case UFMT_DATE: { - UnicodeString result; UDate d = toFormat.getDate(status); U_ASSERT(U_SUCCESS(status)); formatDateWithDefaults(locale, d, result, status); - return FormattedPlaceholder(input, FormattedValue(std::move(result))); + break; } case UFMT_DOUBLE: { double d = toFormat.getDouble(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, d, status))); + result = formatNumberWithDefaults(locale, d, status).toString(status); + break; } case UFMT_LONG: { int32_t l = toFormat.getLong(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, l, status))); + result = formatNumberWithDefaults(locale, l, status).toString(status); + break; } case UFMT_INT64: { int64_t i = toFormat.getInt64Value(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(formatNumberWithDefaults(locale, i, status))); + result = formatNumberWithDefaults(locale, i, status).toString(status); + break; } case UFMT_STRING: { - const UnicodeString& s = toFormat.getString(status); + result = toFormat.getString(status); U_ASSERT(U_SUCCESS(status)); - return FormattedPlaceholder(input, FormattedValue(UnicodeString(s))); + break; } default: { // No default formatters for other types; use fallback @@ -290,43 +273,11 @@ namespace message2 { // Note: it would be better to set an internal formatting error so that a string // (e.g. the type tag) can be provided. However, this method is called by the // public method formatToString() and thus can't take a MessageContext - return FormattedPlaceholder(input.getFallback()); - } - } - } - - // Called when string output is required; forces output to be produced - // if none is present (including formatting number output as a string) - UnicodeString FormattedPlaceholder::formatToString(const Locale& locale, - UErrorCode& status) const { - if (U_FAILURE(status)) { return {}; } - if (isFallback() || isNullOperand()) { - return fallbackToString(fallback); } - // Evaluated value: either just return the string, or format the number - // as a string and return it - if (isEvaluated()) { - if (formatted.isString()) { - return formatted.getString(); - } else { - return formatted.getNumber().toString(status); - } - } - // Unevaluated value: first evaluate it fully, then format - UErrorCode savedStatus = status; - FormattedPlaceholder evaluated = formatWithDefaults(locale, *this, status); - if (status == U_MF_FORMATTING_ERROR) { - U_ASSERT(evaluated.isFallback()); - return evaluated.getFallback(); - } - // Ignore U_USING_DEFAULT_WARNING - if (status == U_USING_DEFAULT_WARNING) { - status = savedStatus; - } - return evaluated.formatToString(locale, status); + return handleBiDi(locale, dir, result); } } // namespace message2 diff --git a/icu4c/source/i18n/messageformat2_formatter.cpp b/icu4c/source/i18n/messageformat2_formatter.cpp index 8d17ae49b99a..b4fe6bd55ae6 100644 --- a/icu4c/source/i18n/messageformat2_formatter.cpp +++ b/icu4c/source/i18n/messageformat2_formatter.cpp @@ -122,19 +122,23 @@ namespace message2 { // Set up the standard function registry MFFunctionRegistry::Builder standardFunctionsBuilder(success); - FormatterFactory* dateTime = StandardFunctions::DateTimeFactory::dateTime(success); - FormatterFactory* date = StandardFunctions::DateTimeFactory::date(success); - FormatterFactory* time = StandardFunctions::DateTimeFactory::time(success); - FormatterFactory* number = new StandardFunctions::NumberFactory(); - FormatterFactory* integer = new StandardFunctions::IntegerFactory(); - standardFunctionsBuilder.adoptFormatter(FunctionName(UnicodeString("datetime")), dateTime, success) - .adoptFormatter(FunctionName(UnicodeString("date")), date, success) - .adoptFormatter(FunctionName(UnicodeString("time")), time, success) - .adoptFormatter(FunctionName(UnicodeString("number")), number, success) - .adoptFormatter(FunctionName(UnicodeString("integer")), integer, success) - .adoptSelector(FunctionName(UnicodeString("number")), new StandardFunctions::PluralFactory(UPLURAL_TYPE_CARDINAL), success) - .adoptSelector(FunctionName(UnicodeString("integer")), new StandardFunctions::PluralFactory(StandardFunctions::PluralFactory::integer()), success) - .adoptSelector(FunctionName(UnicodeString("string")), new StandardFunctions::TextFactory(), success); + LocalPointer dateTime(StandardFunctions::DateTime::dateTime(success)); + LocalPointer date(StandardFunctions::DateTime::date(success)); + LocalPointer time(StandardFunctions::DateTime::time(success)); + LocalPointer number(StandardFunctions::Number::number(success)); + LocalPointer integer(StandardFunctions::Number::integer(success)); + LocalPointer string(StandardFunctions::String::string(success)); + CHECK_ERROR(success); + standardFunctionsBuilder.adoptFunction(FunctionName(UnicodeString("datetime")), + dateTime.orphan(), success) + .adoptFunction(FunctionName(UnicodeString("date")), date.orphan(), success) + .adoptFunction(FunctionName(UnicodeString("time")), time.orphan(), success) + .adoptFunction(FunctionName(UnicodeString("number")), + number.orphan(), success) + .adoptFunction(FunctionName(UnicodeString("integer")), + integer.orphan(), success) + .adoptFunction(FunctionName(UnicodeString("string")), + string.orphan(), success); CHECK_ERROR(success); standardMFFunctionRegistry = standardFunctionsBuilder.build(); CHECK_ERROR(success); @@ -214,135 +218,47 @@ namespace message2 { cleanup(); } - // Selector and formatter lookup - // ----------------------------- - - // Postcondition: selector != nullptr || U_FAILURE(status) - Selector* MessageFormatter::getSelector(MessageContext& context, const FunctionName& functionName, UErrorCode& status) const { - NULL_ON_ERROR(status); - U_ASSERT(isSelector(functionName)); - - const SelectorFactory* selectorFactory = lookupSelectorFactory(context, functionName, status); - NULL_ON_ERROR(status); - if (selectorFactory == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - // Create a specific instance of the selector - auto result = selectorFactory->createSelector(getLocale(), status); - NULL_ON_ERROR(status); - return result; - } - - // Returns an owned pointer - Formatter* MessageFormatter::getFormatter(const FunctionName& functionName, UErrorCode& status) const { - NULL_ON_ERROR(status); - - // Create the formatter - - // First, look up the formatter factory for this function - FormatterFactory* formatterFactory = lookupFormatterFactory(functionName, status); - NULL_ON_ERROR(status); - - U_ASSERT(formatterFactory != nullptr); - - // Create a specific instance of the formatter - Formatter* formatter = formatterFactory->createFormatter(locale, status); - NULL_ON_ERROR(status); - if (formatter == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - return formatter; - } - - bool MessageFormatter::getDefaultFormatterNameByType(const UnicodeString& type, FunctionName& name) const { - U_ASSERT(hasCustomMFFunctionRegistry()); - const MFFunctionRegistry& reg = getCustomMFFunctionRegistry(); - return reg.getDefaultFormatterNameByType(type, name); - } - // --------------------------------------------------- // Function registry - bool MessageFormatter::isBuiltInSelector(const FunctionName& functionName) const { - return standardMFFunctionRegistry.hasSelector(functionName); - } - - bool MessageFormatter::isBuiltInFormatter(const FunctionName& functionName) const { - return standardMFFunctionRegistry.hasFormatter(functionName); + bool MessageFormatter::isBuiltInFunction(const FunctionName& functionName) const { + return standardMFFunctionRegistry.hasFunction(functionName); } - // https://github.com/unicode-org/message-format-wg/issues/409 - // Unknown function = unknown function error - // Formatter used as selector = selector error - // Selector used as formatter = formatting error - const SelectorFactory* MessageFormatter::lookupSelectorFactory(MessageContext& context, const FunctionName& functionName, UErrorCode& status) const { - DynamicErrors& err = context.getErrors(); + Function* + MessageFormatter::lookupFunction(const FunctionName& functionName, + UErrorCode& status) const { + NULL_ON_ERROR(status); - if (isBuiltInSelector(functionName)) { - return standardMFFunctionRegistry.getSelector(functionName); - } - if (isBuiltInFormatter(functionName)) { - err.setSelectorError(functionName, status); - return nullptr; + if (isBuiltInFunction(functionName)) { + return standardMFFunctionRegistry.getFunction(functionName); } if (hasCustomMFFunctionRegistry()) { const MFFunctionRegistry& customMFFunctionRegistry = getCustomMFFunctionRegistry(); - const SelectorFactory* selectorFactory = customMFFunctionRegistry.getSelector(functionName); - if (selectorFactory != nullptr) { - return selectorFactory; - } - if (customMFFunctionRegistry.getFormatter(functionName) != nullptr) { - err.setSelectorError(functionName, status); - return nullptr; + Function* function = customMFFunctionRegistry.getFunction(functionName); + if (function != nullptr) { + return function; } } // Either there is no custom function registry and the function // isn't built-in, or the function doesn't exist in either the built-in // or custom registry. // Unknown function error - err.setUnknownFunction(functionName, status); + status = U_MF_UNKNOWN_FUNCTION_ERROR; return nullptr; } - FormatterFactory* MessageFormatter::lookupFormatterFactory(const FunctionName& functionName, - UErrorCode& status) const { - NULL_ON_ERROR(status); - - if (isBuiltInFormatter(functionName)) { - return standardMFFunctionRegistry.getFormatter(functionName); - } - if (isBuiltInSelector(functionName)) { - status = U_MF_FORMATTING_ERROR; - return nullptr; - } + bool MessageFormatter::getDefaultFormatterNameByType(const UnicodeString& tag, + FunctionName& result) const { if (hasCustomMFFunctionRegistry()) { const MFFunctionRegistry& customMFFunctionRegistry = getCustomMFFunctionRegistry(); - FormatterFactory* formatterFactory = customMFFunctionRegistry.getFormatter(functionName); - if (formatterFactory != nullptr) { - return formatterFactory; - } - if (customMFFunctionRegistry.getSelector(functionName) != nullptr) { - status = U_MF_FORMATTING_ERROR; - return nullptr; - } + return customMFFunctionRegistry.getDefaultFormatterNameByType(tag, result); } - // Either there is no custom function registry and the function - // isn't built-in, or the function doesn't exist in either the built-in - // or custom registry. - // Unknown function error - status = U_MF_UNKNOWN_FUNCTION_ERROR; - return nullptr; + return false; } - bool MessageFormatter::isCustomFormatter(const FunctionName& fn) const { - return hasCustomMFFunctionRegistry() && getCustomMFFunctionRegistry().getFormatter(fn) != nullptr; - } - - - bool MessageFormatter::isCustomSelector(const FunctionName& fn) const { - return hasCustomMFFunctionRegistry() && getCustomMFFunctionRegistry().getSelector(fn) != nullptr; + bool MessageFormatter::isCustomFunction(const FunctionName& fn) const { + return hasCustomMFFunctionRegistry() && getCustomMFFunctionRegistry().getFunction(fn) != nullptr; } } // namespace message2 diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 17955760ecfb..65bd1406dce7 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -38,82 +38,74 @@ namespace message2 { // Function registry implementation -Formatter::~Formatter() {} -Selector::~Selector() {} -FormatterFactory::~FormatterFactory() {} -SelectorFactory::~SelectorFactory() {} +Function::~Function() {} +FunctionValue::~FunctionValue() {} MFFunctionRegistry MFFunctionRegistry::Builder::build() { - U_ASSERT(formatters != nullptr && selectors != nullptr && formattersByType != nullptr); - MFFunctionRegistry result = MFFunctionRegistry(formatters, selectors, formattersByType); - formatters = nullptr; - selectors = nullptr; + U_ASSERT(functions != nullptr); + U_ASSERT(formattersByType != nullptr); + MFFunctionRegistry result = MFFunctionRegistry(functions, formattersByType); + functions = nullptr; formattersByType = nullptr; return result; } -MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::adoptSelector(const FunctionName& selectorName, SelectorFactory* selectorFactory, UErrorCode& errorCode) { +MFFunctionRegistry::Builder& +MFFunctionRegistry::Builder::adoptFunction(const FunctionName& functionName, + Function* function, + UErrorCode& errorCode) { if (U_SUCCESS(errorCode)) { - U_ASSERT(selectors != nullptr); - selectors->put(selectorName, selectorFactory, errorCode); + U_ASSERT(functions != nullptr); + functions->put(functionName, function, errorCode); } return *this; } -MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::adoptFormatter(const FunctionName& formatterName, FormatterFactory* formatterFactory, UErrorCode& errorCode) { - if (U_SUCCESS(errorCode)) { - U_ASSERT(formatters != nullptr); - formatters->put(formatterName, formatterFactory, errorCode); - } - return *this; -} - -MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::setDefaultFormatterNameByType(const UnicodeString& type, const FunctionName& functionName, UErrorCode& errorCode) { +MFFunctionRegistry::Builder& +MFFunctionRegistry::Builder::setDefaultFormatterNameByType(const UnicodeString& type, + const FunctionName& functionName, + UErrorCode& errorCode) { if (U_SUCCESS(errorCode)) { U_ASSERT(formattersByType != nullptr); FunctionName* f = create(FunctionName(functionName), errorCode); formattersByType->put(type, f, errorCode); - } - return *this; -} + } + return *this; + } MFFunctionRegistry::Builder::Builder(UErrorCode& errorCode) { CHECK_ERROR(errorCode); - formatters = new Hashtable(); - selectors = new Hashtable(); + functions = new Hashtable(); formattersByType = new Hashtable(); - if (!(formatters != nullptr && selectors != nullptr && formattersByType != nullptr)) { + if (functions == nullptr || formattersByType == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; } - formatters->setValueDeleter(uprv_deleteUObject); - selectors->setValueDeleter(uprv_deleteUObject); + + functions->setValueDeleter(uprv_deleteUObject); formattersByType->setValueDeleter(uprv_deleteUObject); } MFFunctionRegistry::Builder::~Builder() { - if (formatters != nullptr) { - delete formatters; - } - if (selectors != nullptr) { - delete selectors; + if (functions != nullptr) { + delete functions; + functions = nullptr; } if (formattersByType != nullptr) { delete formattersByType; + formattersByType = nullptr; } } // Returns non-owned pointer. Returns pointer rather than reference because it can fail. -// Returns non-const because FormatterFactory is mutable. -// TODO: This is unsafe because of the cached-formatters map -// (the caller could delete the resulting pointer) -FormatterFactory* MFFunctionRegistry::getFormatter(const FunctionName& formatterName) const { - U_ASSERT(formatters != nullptr); - return static_cast(formatters->get(formatterName)); +// Returns non-const because Function is mutable. +Function* MFFunctionRegistry::getFunction(const FunctionName& functionName) const { + U_ASSERT(functions != nullptr); + return static_cast(functions->get(functionName)); } UBool MFFunctionRegistry::getDefaultFormatterNameByType(const UnicodeString& type, FunctionName& name) const { - U_ASSERT(formatters != nullptr); + U_ASSERT(formattersByType != nullptr); const FunctionName* f = static_cast(formattersByType->get(type)); if (f != nullptr) { name = *f; @@ -122,48 +114,29 @@ UBool MFFunctionRegistry::getDefaultFormatterNameByType(const UnicodeString& typ return false; } -const SelectorFactory* MFFunctionRegistry::getSelector(const FunctionName& selectorName) const { - U_ASSERT(selectors != nullptr); - return static_cast(selectors->get(selectorName)); -} - -bool MFFunctionRegistry::hasFormatter(const FunctionName& f) const { - return getFormatter(f) != nullptr; +bool MFFunctionRegistry::hasFunction(const FunctionName& f) const { + return getFunction(f) != nullptr; } -bool MFFunctionRegistry::hasSelector(const FunctionName& s) const { - return getSelector(s) != nullptr; -} - -void MFFunctionRegistry::checkFormatter(const char* s) const { +void MFFunctionRegistry::checkFunction(const char* s) const { #if U_DEBUG - U_ASSERT(hasFormatter(FunctionName(UnicodeString(s)))); + U_ASSERT(hasFunction(FunctionName(UnicodeString(s)))); #else (void) s; #endif } -void MFFunctionRegistry::checkSelector(const char* s) const { -#if U_DEBUG - U_ASSERT(hasSelector(FunctionName(UnicodeString(s)))); -#else - (void) s; -#endif -} - // Debugging void MFFunctionRegistry::checkStandard() const { - checkFormatter("datetime"); - checkFormatter("date"); - checkFormatter("time"); - checkFormatter("number"); - checkFormatter("integer"); - checkSelector("number"); - checkSelector("integer"); - checkSelector("string"); + checkFunction("datetime"); + checkFunction("date"); + checkFunction("time"); + checkFunction("number"); + checkFunction("integer"); + checkFunction("string"); } -// Formatter/selector helpers +// Function/selector helpers // Converts `s` to a double, indicating failure via `errorCode` static void strToDouble(const UnicodeString& s, double& result, UErrorCode& errorCode) { @@ -215,33 +188,32 @@ static int64_t getInt64Value(const Locale& locale, const Formattable& value, UEr return 0; } -// Adopts its arguments -MFFunctionRegistry::MFFunctionRegistry(FormatterMap* f, SelectorMap* s, Hashtable* byType) : formatters(f), selectors(s), formattersByType(byType) { - U_ASSERT(f != nullptr && s != nullptr && byType != nullptr); +// Adopts its argument +MFFunctionRegistry::MFFunctionRegistry(FunctionMap* f, Hashtable* byType) + : functions(f), formattersByType(byType) { + U_ASSERT(f != nullptr); + U_ASSERT(byType != nullptr); } MFFunctionRegistry& MFFunctionRegistry::operator=(MFFunctionRegistry&& other) noexcept { cleanup(); - formatters = other.formatters; - selectors = other.selectors; + functions = other.functions; + other.functions = nullptr; formattersByType = other.formattersByType; - other.formatters = nullptr; - other.selectors = nullptr; other.formattersByType = nullptr; return *this; } void MFFunctionRegistry::cleanup() noexcept { - if (formatters != nullptr) { - delete formatters; - } - if (selectors != nullptr) { - delete selectors; + if (functions != nullptr) { + delete functions; + functions = nullptr; } if (formattersByType != nullptr) { delete formattersByType; + formattersByType = nullptr; } } @@ -250,11 +222,49 @@ MFFunctionRegistry::~MFFunctionRegistry() { cleanup(); } -// Specific formatter implementations +// Specific function implementations // --------- Number +/* static */ StandardFunctions::Number* +StandardFunctions::Number::integer(UErrorCode& success) { + return create(true, success); +} + +/* static */ StandardFunctions::Number* +StandardFunctions::Number::number(UErrorCode& success) { + return create(false, success); +} + +/* static */ StandardFunctions::Number* +StandardFunctions::Number::create(bool isInteger, UErrorCode& success) { + NULL_ON_ERROR(success); + + LocalPointer result(new Number(isInteger)); + if (!result.isValid()) { + success = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result.orphan(); +} + +LocalPointer StandardFunctions::Number::call(const FunctionContext& context, + const FunctionValue& operand, + const FunctionOptions& options, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + LocalPointer + val(new NumberValue(*this, context, operand, options, errorCode)); + if (!val.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return val; +} + /* static */ number::LocalizedNumberFormatter StandardFunctions::formatterForOptions(const Number& number, + const Locale& locale, const FunctionOptions& opts, UErrorCode& status) { number::UnlocalizedNumberFormatter nf; @@ -344,7 +354,9 @@ MFFunctionRegistry::~MFFunctionRegistry() { // All other options apply to both `:number` and `:integer` int32_t minIntegerDigits = number.minimumIntegerDigits(opts); - nf = nf.integerWidth(IntegerWidth::zeroFillTo(minIntegerDigits)); + if (minIntegerDigits != -1) { + nf = nf.integerWidth(IntegerWidth::zeroFillTo(minIntegerDigits)); + } // signDisplay UnicodeString sd = opts.getStringFunctionOption(UnicodeString("signDisplay")); @@ -395,44 +407,10 @@ MFFunctionRegistry::~MFFunctionRegistry() { } } } - return nf.locale(number.locale); -} - -Formatter* StandardFunctions::NumberFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - Formatter* result = new Number(locale); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - -Formatter* StandardFunctions::IntegerFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - Formatter* result = new Number(Number::integer(locale)); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - -StandardFunctions::IntegerFactory::~IntegerFactory() {} - -static FormattedPlaceholder notANumber(const FormattedPlaceholder& input) { - return FormattedPlaceholder(input, FormattedValue(UnicodeString("NaN"))); + return nf.locale(locale); } -static double parseNumberLiteral(const FormattedPlaceholder& input, UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return {}; - } - - // Copying string to avoid GCC dangling-reference warning - // (although the reference is safe) - UnicodeString inputStr = input.asFormattable().getString(errorCode); - // Precondition: `input`'s source Formattable has type string +static double parseNumberLiteral(const UnicodeString& inputStr, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return {}; } @@ -463,10 +441,12 @@ static double parseNumberLiteral(const FormattedPlaceholder& input, UErrorCode& return result; } -static FormattedPlaceholder tryParsingNumberLiteral(const number::LocalizedNumberFormatter& nf, const FormattedPlaceholder& input, UErrorCode& errorCode) { +static number::FormattedNumber tryParsingNumberLiteral(const number::LocalizedNumberFormatter& nf, + const UnicodeString& input, + UErrorCode& errorCode) { double numberValue = parseNumberLiteral(input, errorCode); if (U_FAILURE(errorCode)) { - return notANumber(input); + return {}; } UErrorCode savedStatus = errorCode; @@ -475,20 +455,27 @@ static FormattedPlaceholder tryParsingNumberLiteral(const number::LocalizedNumbe if (errorCode == U_USING_DEFAULT_WARNING) { errorCode = savedStatus; } - return FormattedPlaceholder(input, FormattedValue(std::move(result))); + return result; } -int32_t StandardFunctions::Number::maximumFractionDigits(const FunctionOptions& opts) const { - Formattable opt; - - if (isInteger) { - return 0; - } - - if (opts.getFunctionOption(UnicodeString("maximumFractionDigits"), opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - int64_t val = getInt64Value(locale, opt, localErrorCode); - if (U_SUCCESS(localErrorCode)) { +int32_t StandardFunctions::Number::digitSizeOption(const FunctionOptions& opts, + const UnicodeString& k) const { + UErrorCode localStatus = U_ZERO_ERROR; + const FunctionValue* opt = opts.getFunctionOption(k, + localStatus); + if (U_SUCCESS(localStatus)) { + // First try the formatted value + UnicodeString formatted = opt->formatToString(localStatus); + int64_t val = 0; + if (U_SUCCESS(localStatus)) { + val = getInt64Value(Locale("en-US"), Formattable(formatted), localStatus); + } + if (U_FAILURE(localStatus)) { + localStatus = U_ZERO_ERROR; + } + // Next try the operand + val = getInt64Value(Locale("en-US"), opt->getOperand(), localStatus); + if (U_SUCCESS(localStatus)) { return static_cast(val); } } @@ -498,207 +485,140 @@ int32_t StandardFunctions::Number::maximumFractionDigits(const FunctionOptions& return -1; } +int32_t StandardFunctions::Number::maximumFractionDigits(const FunctionOptions& opts) const { + if (isInteger) { + return 0; + } + + return digitSizeOption(opts, UnicodeString("maximumFractionDigits")); +} + int32_t StandardFunctions::Number::minimumFractionDigits(const FunctionOptions& opts) const { Formattable opt; - if (!isInteger) { - if (opts.getFunctionOption(UnicodeString("minimumFractionDigits"), opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - int64_t val = getInt64Value(locale, opt, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return static_cast(val); - } - } + if (isInteger) { + return -1; } - // Returning -1 indicates that the option wasn't provided or was a non-integer. - // The caller needs to check for that case, since passing -1 to Precision::minFraction() - // is an error. - return -1; + return digitSizeOption(opts, UnicodeString("minimumFractionDigits")); } int32_t StandardFunctions::Number::minimumIntegerDigits(const FunctionOptions& opts) const { - Formattable opt; - - if (opts.getFunctionOption(UnicodeString("minimumIntegerDigits"), opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - int64_t val = getInt64Value(locale, opt, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return static_cast(val); - } - } - return 0; + return digitSizeOption(opts, UnicodeString("minimumIntegerDigits")); } int32_t StandardFunctions::Number::minimumSignificantDigits(const FunctionOptions& opts) const { - Formattable opt; - - if (!isInteger) { - if (opts.getFunctionOption(UnicodeString("minimumSignificantDigits"), opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - int64_t val = getInt64Value(locale, opt, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return static_cast(val); - } - } + if (isInteger) { + return -1; } - // Returning -1 indicates that the option wasn't provided or was a non-integer. - // The caller needs to check for that case, since passing -1 to Precision::minSignificantDigits() - // is an error. - return -1; + return digitSizeOption(opts, UnicodeString("minimumSignificantDigits")); } int32_t StandardFunctions::Number::maximumSignificantDigits(const FunctionOptions& opts) const { - Formattable opt; - - if (opts.getFunctionOption(UnicodeString("maximumSignificantDigits"), opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - int64_t val = getInt64Value(locale, opt, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return static_cast(val); - } - } - // Returning -1 indicates that the option wasn't provided or was a non-integer. - // The caller needs to check for that case, since passing -1 to Precision::maxSignificantDigits() - // is an error. - return -1; // Not a valid value for Precision; has to be checked + return digitSizeOption(opts, UnicodeString("maximumSignificantDigits")); } bool StandardFunctions::Number::usePercent(const FunctionOptions& opts) const { - Formattable opt; - if (isInteger - || !opts.getFunctionOption(UnicodeString("style"), opt) - || opt.getType() != UFMT_STRING) { + const UnicodeString& style = opts.getStringFunctionOption(UnicodeString("style")); + if (isInteger || style.length() == 0) { return false; } - UErrorCode localErrorCode = U_ZERO_ERROR; - const UnicodeString& style = opt.getString(localErrorCode); - U_ASSERT(U_SUCCESS(localErrorCode)); return (style == UnicodeString("percent")); } -/* static */ StandardFunctions::Number StandardFunctions::Number::integer(const Locale& loc) { - return StandardFunctions::Number(loc, true); -} - -FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& arg, FunctionOptions&& opts, UErrorCode& errorCode) const { - if (U_FAILURE(errorCode)) { - return {}; - } - - // No argument => return "NaN" - if (!arg.canFormat()) { +StandardFunctions::NumberValue::NumberValue(const Number& parent, + const FunctionContext& context, + const FunctionValue& arg, + const FunctionOptions& options, + UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + // Must have an argument + if (arg.isNullOperand()) { errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return notANumber(arg); + return; } + locale = context.getLocale(); + opts = options.mergeOptions(arg.getResolvedOptions(), errorCode); + operand = arg.getOperand(); + number::LocalizedNumberFormatter realFormatter; - realFormatter = formatterForOptions(*this, opts, errorCode); + realFormatter = formatterForOptions(parent, locale, opts, errorCode); - number::FormattedNumber numberResult; if (U_SUCCESS(errorCode)) { - // Already checked that contents can be formatted - const Formattable& toFormat = arg.asFormattable(); - switch (toFormat.getType()) { + switch (operand.getType()) { case UFMT_DOUBLE: { - double d = toFormat.getDouble(errorCode); + double d = operand.getDouble(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - numberResult = realFormatter.formatDouble(d, errorCode); + formattedNumber = realFormatter.formatDouble(d, errorCode); break; } case UFMT_LONG: { - int32_t l = toFormat.getLong(errorCode); + int32_t l = operand.getLong(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - numberResult = realFormatter.formatInt(l, errorCode); + formattedNumber = realFormatter.formatInt(l, errorCode); break; } case UFMT_INT64: { - int64_t i = toFormat.getInt64(errorCode); + int64_t i = operand.getInt64(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - numberResult = realFormatter.formatInt(i, errorCode); + formattedNumber = realFormatter.formatInt(i, errorCode); break; } case UFMT_STRING: { // Try to parse the string as a number - return tryParsingNumberLiteral(realFormatter, arg, errorCode); + formattedNumber = tryParsingNumberLiteral(realFormatter, + operand.getString(errorCode), + errorCode); + break; } default: { // Other types can't be parsed as a number errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return notANumber(arg); + break; } } } - - return FormattedPlaceholder(arg, FormattedValue(std::move(numberResult))); } -StandardFunctions::Number::~Number() {} -StandardFunctions::NumberFactory::~NumberFactory() {} +UnicodeString StandardFunctions::NumberValue::formatToString(UErrorCode& errorCode) const { + if (U_FAILURE(errorCode)) { + return {}; + } -// --------- PluralFactory + return formattedNumber.toString(errorCode); +} +StandardFunctions::Number::~Number() {} +StandardFunctions::NumberValue::~NumberValue() {} -StandardFunctions::Plural::PluralType StandardFunctions::Plural::pluralType(const FunctionOptions& opts) const { - Formattable opt; +/* static */ StandardFunctions::Number::PluralType +StandardFunctions::Number::pluralType(const FunctionOptions& opts) { + const UnicodeString& select = opts.getStringFunctionOption(UnicodeString("select")); - if (opts.getFunctionOption(UnicodeString("select"), opt)) { - UErrorCode localErrorCode = U_ZERO_ERROR; - UnicodeString val = opt.getString(localErrorCode); - if (U_SUCCESS(localErrorCode)) { - if (val == UnicodeString("ordinal")) { - return PluralType::PLURAL_ORDINAL; - } - if (val == UnicodeString("exact")) { - return PluralType::PLURAL_EXACT; - } + if (select.length() > 0) { + if (select == UnicodeString("ordinal")) { + return PluralType::PLURAL_ORDINAL; + } + if (select == UnicodeString("exact")) { + return PluralType::PLURAL_EXACT; } } return PluralType::PLURAL_CARDINAL; } -Selector* StandardFunctions::PluralFactory::createSelector(const Locale& locale, UErrorCode& errorCode) const { - NULL_ON_ERROR(errorCode); - - Selector* result; - if (isInteger) { - result = new Plural(Plural::integer(locale, errorCode)); - } else { - result = new Plural(locale, errorCode); - } - NULL_ON_ERROR(errorCode); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - -void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, - FunctionOptions&& opts, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& errorCode) const { +void StandardFunctions::NumberValue::selectKeys(const UnicodeString* keys, + int32_t keysLen, + int32_t* prefs, + int32_t& prefsLen, + UErrorCode& errorCode) const { CHECK_ERROR(errorCode); - // No argument => return "NaN" - if (!toFormat.canFormat()) { - errorCode = U_MF_SELECTOR_ERROR; - return; - } + Number::PluralType type = Number::pluralType(opts); - // Handle any formatting options - PluralType type = pluralType(opts); - FormattedPlaceholder resolvedSelector = numberFormatter->format(std::move(toFormat), - std::move(opts), - errorCode); - CHECK_ERROR(errorCode); - - U_ASSERT(resolvedSelector.isEvaluated() && resolvedSelector.output().isNumber()); + // (resolvedSelector is `this`) // See https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#number-selection // 1. Let exact be the JSON string representation of the numeric value of resolvedSelector - const number::FormattedNumber& formattedNumber = resolvedSelector.output().getNumber(); UnicodeString exact = formattedNumber.toString(errorCode); if (U_FAILURE(errorCode)) { @@ -710,8 +630,8 @@ void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, // Step 2. Let keyword be a string which is the result of rule selection on resolvedSelector. // If the option select is set to exact, rule-based selection is not used. Return the empty string. UnicodeString keyword; - if (type != PluralType::PLURAL_EXACT) { - UPluralType t = type == PluralType::PLURAL_ORDINAL ? UPLURAL_TYPE_ORDINAL : UPLURAL_TYPE_CARDINAL; + if (type != Number::PluralType::PLURAL_EXACT) { + UPluralType t = type == Number::PluralType::PLURAL_ORDINAL ? UPLURAL_TYPE_ORDINAL : UPLURAL_TYPE_CARDINAL; // Look up plural rules by locale and type LocalPointer rules(PluralRules::forLocale(locale, t, errorCode)); CHECK_ERROR(errorCode); @@ -738,7 +658,7 @@ void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, // 5i(a). If key and exact consist of the same sequence of Unicode code points, then if (exact == keys[i]) { // 5i(a)(a) Append key as the last element of the list resultExact. - prefs[prefsLen] = keys[i]; + prefs[prefsLen] = i; prefsLen++; break; } @@ -746,7 +666,7 @@ void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, } // Return immediately if exact matching was requested - if (prefsLen == keysLen || type == PluralType::PLURAL_EXACT) { + if (prefsLen == keysLen || type == Number::PluralType::PLURAL_EXACT) { return; } @@ -759,7 +679,7 @@ void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, // 5ii(a). If key and keyword consist of the same sequence of Unicode code points, then if (keyword == keys[i]) { // 5ii(a)(a) Append key as the last element of the list resultKeyword. - prefs[prefsLen] = keys[i]; + prefs[prefsLen] = i; prefsLen++; } } @@ -771,116 +691,58 @@ void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat, // (Implicit, since `prefs` is an out-parameter) } -StandardFunctions::Plural::Plural(const Locale& loc, UErrorCode& status) : locale(loc) { - CHECK_ERROR(status); +// --------- DateTime - numberFormatter.adoptInstead(new StandardFunctions::Number(loc)); - if (!numberFormatter.isValid()) { - status = U_MEMORY_ALLOCATION_ERROR; - } +/* static */ StandardFunctions::DateTime* +StandardFunctions::DateTime::date(UErrorCode& success) { + return DateTime::create(DateTimeType::kDate, success); } -StandardFunctions::Plural::Plural(const Locale& loc, bool isInt, UErrorCode& status) : locale(loc), isInteger(isInt) { - CHECK_ERROR(status); - - if (isInteger) { - numberFormatter.adoptInstead(new StandardFunctions::Number(loc, true)); - } else { - numberFormatter.adoptInstead(new StandardFunctions::Number(loc)); - } - - if (!numberFormatter.isValid()) { - status = U_MEMORY_ALLOCATION_ERROR; - } +/* static */ StandardFunctions::DateTime* +StandardFunctions::DateTime::time(UErrorCode& success) { + return DateTime::create(DateTimeType::kTime, success); } -StandardFunctions::Plural::~Plural() {} - -StandardFunctions::PluralFactory::~PluralFactory() {} - -// --------- DateTimeFactory - -/* static */ UnicodeString StandardFunctions::getStringOption(const FunctionOptions& opts, - const UnicodeString& optionName, - UErrorCode& errorCode) { - if (U_SUCCESS(errorCode)) { - Formattable opt; - if (opts.getFunctionOption(optionName, opt)) { - return opt.getString(errorCode); // In case it's not a string, error code will be set - } else { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - } - } - // Default is empty string - return {}; +/* static */ StandardFunctions::DateTime* +StandardFunctions::DateTime::dateTime(UErrorCode& success) { + return DateTime::create(DateTimeType::kDateTime, success); } -// Date/time options only -static UnicodeString defaultForOption(const UnicodeString& optionName) { - if (optionName == UnicodeString("dateStyle") - || optionName == UnicodeString("timeStyle") - || optionName == UnicodeString("style")) { - return UnicodeString("short"); - } - return {}; // Empty string is default -} +/* static */ StandardFunctions::DateTime* +StandardFunctions::DateTime::create(DateTime::DateTimeType type, + UErrorCode& success) { + NULL_ON_ERROR(success); -// TODO -// Only DateTime currently uses the function options stored in the placeholder. -// It also doesn't use them very consistently (it looks at the previous set of options, -// and others aren't preserved). This needs to be generalized, -// but that depends on https://github.com/unicode-org/message-format-wg/issues/515 -// Finally, the option value is assumed to be a string, -// which works for datetime options but not necessarily in general. -UnicodeString StandardFunctions::DateTime::getFunctionOption(const FormattedPlaceholder& toFormat, - const FunctionOptions& opts, - const UnicodeString& optionName) const { - // Options passed to the current function invocation take priority - Formattable opt; - UnicodeString s; - UErrorCode localErrorCode = U_ZERO_ERROR; - s = getStringOption(opts, optionName, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return s; - } - // Next try the set of options used to construct `toFormat` - localErrorCode = U_ZERO_ERROR; - s = getStringOption(toFormat.options(), optionName, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return s; + LocalPointer result(new DateTime(type)); + if (!result.isValid()) { + success = U_MEMORY_ALLOCATION_ERROR; + return nullptr; } - // Finally, use default - return defaultForOption(optionName); + return result.orphan(); } -// Used for options that don't have defaults -UnicodeString StandardFunctions::DateTime::getFunctionOption(const FormattedPlaceholder& toFormat, - const FunctionOptions& opts, - const UnicodeString& optionName, - UErrorCode& errorCode) const { - if (U_SUCCESS(errorCode)) { - // Options passed to the current function invocation take priority - Formattable opt; - UnicodeString s; - UErrorCode localErrorCode = U_ZERO_ERROR; - s = getStringOption(opts, optionName, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return s; - } - // Next try the set of options used to construct `toFormat` - localErrorCode = U_ZERO_ERROR; - s = getStringOption(toFormat.options(), optionName, localErrorCode); - if (U_SUCCESS(localErrorCode)) { - return s; - } - errorCode = U_ILLEGAL_ARGUMENT_ERROR; +LocalPointer +StandardFunctions::DateTime::call(const FunctionContext& context, + const FunctionValue& val, + const FunctionOptions& opts, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + LocalPointer + result(new DateTimeValue(type, context, val, opts, errorCode)); + if (!result.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; } - return {}; + return result; } static DateFormat::EStyle stringToStyle(UnicodeString option, UErrorCode& errorCode) { if (U_SUCCESS(errorCode)) { UnicodeString upper = option.toUpper(); + if (upper.isEmpty()) { + return DateFormat::EStyle::kShort; + } if (upper == UnicodeString("FULL")) { return DateFormat::EStyle::kFull; } @@ -893,7 +755,7 @@ static DateFormat::EStyle stringToStyle(UnicodeString option, UErrorCode& errorC if (upper == UnicodeString("SHORT")) { return DateFormat::EStyle::kShort; } - if (upper.isEmpty() || upper == UnicodeString("DEFAULT")) { + if (upper == UnicodeString("DEFAULT")) { return DateFormat::EStyle::kDefault; } errorCode = U_ILLEGAL_ARGUMENT_ERROR; @@ -901,58 +763,30 @@ static DateFormat::EStyle stringToStyle(UnicodeString option, UErrorCode& errorC return DateFormat::EStyle::kNone; } -/* static */ StandardFunctions::DateTimeFactory* StandardFunctions::DateTimeFactory::dateTime(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); +UnicodeString StandardFunctions::DateTimeValue::formatToString(UErrorCode& status) const { + (void) status; - DateTimeFactory* result = new StandardFunctions::DateTimeFactory(DateTimeType::DateTime); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - -/* static */ StandardFunctions::DateTimeFactory* StandardFunctions::DateTimeFactory::date(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - DateTimeFactory* result = new DateTimeFactory(DateTimeType::Date); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - -/* static */ StandardFunctions::DateTimeFactory* StandardFunctions::DateTimeFactory::time(UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - DateTimeFactory* result = new DateTimeFactory(DateTimeType::Time); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; + return formattedDate; } -Formatter* StandardFunctions::DateTimeFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); +StandardFunctions::DateTimeValue::DateTimeValue(DateTime::DateTimeType type, + const FunctionContext& context, + const FunctionValue& val, + const FunctionOptions& options, + UErrorCode& errorCode) { + CHECK_ERROR(errorCode); - Formatter* result = new StandardFunctions::DateTime(locale, type); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; + // Must have an argument + if (val.isNullOperand()) { + errorCode = U_MF_OPERAND_MISMATCH_ERROR; + return; } - return result; -} -FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& toFormat, - FunctionOptions&& opts, - UErrorCode& errorCode) const { - if (U_FAILURE(errorCode)) { - return {}; - } + const Locale& locale = context.getLocale(); + operand = val.getOperand(); + opts = options.mergeOptions(val.getResolvedOptions(), errorCode); - // Argument must be present - if (!toFormat.canFormat()) { - errorCode = U_MF_OPERAND_MISMATCH_ERROR; - return std::move(toFormat); - } + const Formattable* source = &operand; LocalPointer df; Formattable opt; @@ -964,30 +798,34 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& UnicodeString timeStyleName("timeStyle"); UnicodeString styleName("style"); - bool hasDateStyleOption = opts.getFunctionOption(dateStyleName, opt); - bool hasTimeStyleOption = opts.getFunctionOption(timeStyleName, opt); + UnicodeString dateStyleOption = opts.getStringFunctionOption(dateStyleName); + UnicodeString timeStyleOption = opts.getStringFunctionOption(timeStyleName); + bool hasDateStyleOption = dateStyleOption.length() > 0; + bool hasTimeStyleOption = timeStyleOption.length() > 0; bool noOptions = opts.optionsCount() == 0; - bool useStyle = (type == DateTimeFactory::DateTimeType::DateTime + using DateTimeType = DateTime::DateTimeType; + + bool useStyle = (type == DateTimeType::kDateTime && (hasDateStyleOption || hasTimeStyleOption || noOptions)) - || (type != DateTimeFactory::DateTimeType::DateTime); + || (type != DateTimeType::kDateTime); - bool useDate = type == DateTimeFactory::DateTimeType::Date - || (type == DateTimeFactory::DateTimeType::DateTime + bool useDate = type == DateTimeType::kDate + || (type == DateTimeType::kDateTime && hasDateStyleOption); - bool useTime = type == DateTimeFactory::DateTimeType::Time - || (type == DateTimeFactory::DateTimeType::DateTime + bool useTime = type == DateTimeType::kTime + || (type == DateTimeType::kDateTime && hasTimeStyleOption); if (useStyle) { // Extract style options - if (type == DateTimeFactory::DateTimeType::DateTime) { + if (type == DateTimeType::kDateTime) { // Note that the options-getting has to be repeated across the three cases, // since `:datetime` uses "dateStyle"/"timeStyle" and `:date` and `:time` // use "style" - dateStyle = stringToStyle(getFunctionOption(toFormat, opts, dateStyleName), errorCode); - timeStyle = stringToStyle(getFunctionOption(toFormat, opts, timeStyleName), errorCode); + dateStyle = stringToStyle(opts.getStringFunctionOption(dateStyleName), errorCode); + timeStyle = stringToStyle(opts.getStringFunctionOption(timeStyleName), errorCode); if (useDate && !useTime) { df.adoptInstead(DateFormat::createDateInstance(dateStyle, locale)); @@ -996,12 +834,12 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } else { df.adoptInstead(DateFormat::createDateTimeInstance(dateStyle, timeStyle, locale)); } - } else if (type == DateTimeFactory::DateTimeType::Date) { - dateStyle = stringToStyle(getFunctionOption(toFormat, opts, styleName), errorCode); + } else if (type == DateTimeType::kDate) { + dateStyle = stringToStyle(opts.getStringFunctionOption(styleName), errorCode); df.adoptInstead(DateFormat::createDateInstance(dateStyle, locale)); } else { // :time - timeStyle = stringToStyle(getFunctionOption(toFormat, opts, styleName), errorCode); + timeStyle = stringToStyle(opts.getStringFunctionOption(styleName), errorCode); df.adoptInstead(DateFormat::createTimeInstance(timeStyle, locale)); } } else { @@ -1012,7 +850,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& #define ADD_PATTERN(s) skeleton += UnicodeString(s) if (U_SUCCESS(errorCode)) { // Year - UnicodeString year = getFunctionOption(toFormat, opts, UnicodeString("year"), errorCode); + UnicodeString year = opts.getStringFunctionOption(UnicodeString("year"), errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1024,7 +862,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Month - UnicodeString month = getFunctionOption(toFormat, opts, UnicodeString("month"), errorCode); + UnicodeString month = opts.getStringFunctionOption(UnicodeString("month"), errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1043,7 +881,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Weekday - UnicodeString weekday = getFunctionOption(toFormat, opts, UnicodeString("weekday"), errorCode); + UnicodeString weekday = opts.getStringFunctionOption(UnicodeString("weekday"), errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1057,7 +895,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Day - UnicodeString day = getFunctionOption(toFormat, opts, UnicodeString("day"), errorCode); + UnicodeString day = opts.getStringFunctionOption(UnicodeString("day"), errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1069,7 +907,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Hour - UnicodeString hour = getFunctionOption(toFormat, opts, UnicodeString("hour"), errorCode); + UnicodeString hour = opts.getStringFunctionOption(UnicodeString("hour"), errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1081,7 +919,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Minute - UnicodeString minute = getFunctionOption(toFormat, opts, UnicodeString("minute"), errorCode); + UnicodeString minute = opts.getStringFunctionOption(UnicodeString("minute"), errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1093,7 +931,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } // Second - UnicodeString second = getFunctionOption(toFormat, opts, UnicodeString("second"), errorCode); + UnicodeString second = opts.getStringFunctionOption(UnicodeString("second"), errorCode); if (U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; } else { @@ -1116,18 +954,17 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } if (U_FAILURE(errorCode)) { - return {}; + return; } if (!df.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; - return {}; + return; } UnicodeString result; - const Formattable& source = toFormat.asFormattable(); - switch (source.getType()) { + switch (source->getType()) { case UFMT_STRING: { - const UnicodeString& sourceStr = source.getString(errorCode); + const UnicodeString& sourceStr = source->getString(errorCode); U_ASSERT(U_SUCCESS(errorCode)); // Pattern for ISO 8601 format - datetime UnicodeString pattern("YYYY-MM-dd'T'HH:mm:ss"); @@ -1154,14 +991,13 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& // Use the parsed date as the source value // in the returned FormattedPlaceholder; this is necessary // so the date can be re-formatted - toFormat = FormattedPlaceholder(message2::Formattable::forDate(d), - toFormat.getFallback()); + operand = message2::Formattable::forDate(d); df->format(d, result, 0, errorCode); } break; } case UFMT_DATE: { - df->format(source.asICUFormattable(errorCode), result, 0, errorCode); + df->format(source->asICUFormattable(errorCode), result, 0, errorCode); if (U_FAILURE(errorCode)) { if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { errorCode = U_MF_OPERAND_MISMATCH_ERROR; @@ -1176,64 +1012,96 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } } if (U_FAILURE(errorCode)) { - return {}; + return; + } + // Ignore U_USING_DEFAULT_WARNING + if (errorCode == U_USING_DEFAULT_WARNING) { + errorCode = U_ZERO_ERROR; } - return FormattedPlaceholder(toFormat, std::move(opts), FormattedValue(std::move(result))); + formattedDate = result; } -StandardFunctions::DateTimeFactory::~DateTimeFactory() {} StandardFunctions::DateTime::~DateTime() {} +StandardFunctions::DateTimeValue::~DateTimeValue() {} -// --------- TextFactory +// --------- String -Selector* StandardFunctions::TextFactory::createSelector(const Locale& locale, UErrorCode& errorCode) const { - Selector* result = new TextSelector(locale); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; +/* static */ StandardFunctions::String* +StandardFunctions::String::string(UErrorCode& success) { + NULL_ON_ERROR(success); + + LocalPointer result(new String()); + if (!result.isValid()) { + success = U_MEMORY_ALLOCATION_ERROR; return nullptr; } + return result.orphan(); +} + +extern UnicodeString formattableToString(const Locale&, + const UBiDiDirection, + const Formattable&, + UErrorCode&); + +LocalPointer +StandardFunctions::String::call(const FunctionContext& context, + const FunctionValue& val, + const FunctionOptions& opts, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + LocalPointer + result(new StringValue(context, val, opts, errorCode)); + if (!result.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } return result; } -void StandardFunctions::TextSelector::selectKey(FormattedPlaceholder&& toFormat, - FunctionOptions&& opts, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& errorCode) const { +UnicodeString StandardFunctions::StringValue::formatToString(UErrorCode& errorCode) const { + (void) errorCode; + + return formattedString; +} + +StandardFunctions::StringValue::StringValue(const FunctionContext& context, + const FunctionValue& val, + const FunctionOptions&, + UErrorCode& status) { + CHECK_ERROR(status); + operand = val.getOperand(); // No options - (void) opts; + // Convert to string + formattedString = formattableToString(context.getLocale(), context.getDirection(), operand, status); +} +void StandardFunctions::StringValue::selectKeys(const UnicodeString* keys, + int32_t keysLen, + int32_t* prefs, + int32_t& prefsLen, + UErrorCode& errorCode) const { CHECK_ERROR(errorCode); // Just compares the key and value as strings - // Argument must be present - if (!toFormat.canFormat()) { - errorCode = U_MF_SELECTOR_ERROR; - return; - } - prefsLen = 0; - // Convert to string - const UnicodeString& formattedValue = toFormat.formatToString(locale, errorCode); if (U_FAILURE(errorCode)) { return; } for (int32_t i = 0; i < keysLen; i++) { - if (keys[i] == formattedValue) { - prefs[0] = keys[i]; + if (keys[i] == formattedString) { + prefs[0] = i; prefsLen = 1; break; } } } -StandardFunctions::TextFactory::~TextFactory() {} -StandardFunctions::TextSelector::~TextSelector() {} +StandardFunctions::String::~String() {} +StandardFunctions::StringValue::~StringValue() {} } // namespace message2 U_NAMESPACE_END diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index 733fc5e945d5..c99bc91340de 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -31,97 +31,65 @@ namespace message2 { class StandardFunctions { friend class MessageFormatter; - static UnicodeString getStringOption(const FunctionOptions& opts, - const UnicodeString& optionName, - UErrorCode& errorCode); - class DateTime; + class DateTimeValue; - class DateTimeFactory : public FormatterFactory { + class DateTime : public Function { public: - Formatter* createFormatter(const Locale& locale, UErrorCode& status) override; - static DateTimeFactory* date(UErrorCode&); - static DateTimeFactory* time(UErrorCode&); - static DateTimeFactory* dateTime(UErrorCode&); - DateTimeFactory() = delete; - virtual ~DateTimeFactory(); + static DateTime* date(UErrorCode&); + static DateTime* time(UErrorCode&); + static DateTime* dateTime(UErrorCode&); + + LocalPointer call(const FunctionContext& context, + const FunctionValue& operand, + const FunctionOptions& options, + UErrorCode& errorCode) override; + virtual ~DateTime(); private: - friend class DateTime; + friend class DateTimeFactory; + friend class DateTimeValue; typedef enum DateTimeType { - Date, - Time, - DateTime + kDate, + kTime, + kDateTime } DateTimeType; - DateTimeType type; - DateTimeFactory(DateTimeType t) : type(t) {} - }; - - class DateTime : public Formatter { - public: - FormattedPlaceholder format(FormattedPlaceholder&& toFormat, FunctionOptions&& options, UErrorCode& status) const override; - virtual ~DateTime(); - - private: - const Locale& locale; - const DateTimeFactory::DateTimeType type; - friend class DateTimeFactory; - DateTime(const Locale& l, DateTimeFactory::DateTimeType t) : locale(l), type(t) {} + const DateTimeType type; + static DateTime* create(DateTimeType, UErrorCode&); + DateTime(DateTimeType t) : type(t) {} const LocalPointer icuFormatter; - - /* - Looks up an option by name, first checking `opts`, then the cached options - in `toFormat` if applicable, and finally using a default - - Ignores any options with non-string values - */ - UnicodeString getFunctionOption(const FormattedPlaceholder& toFormat, - const FunctionOptions& opts, - const UnicodeString& optionName) const; - // Version for options that don't have defaults; sets the error - // code instead of returning a default value - UnicodeString getFunctionOption(const FormattedPlaceholder& toFormat, - const FunctionOptions& opts, - const UnicodeString& optionName, - UErrorCode& errorCode) const; - }; - // Note: IntegerFactory doesn't implement SelectorFactory; - // instead, an instance of PluralFactory is registered to the integer - // selector - // TODO - class IntegerFactory : public FormatterFactory { - public: - Formatter* createFormatter(const Locale& locale, UErrorCode& status) override; - virtual ~IntegerFactory(); - }; + class NumberValue; - class NumberFactory : public FormatterFactory { + class Number : public Function { public: - Formatter* createFormatter(const Locale& locale, UErrorCode& status) override; - virtual ~NumberFactory(); - private: - friend class IntegerFactory; - static NumberFactory integer(const Locale& locale, UErrorCode& status); - }; + static Number* integer(UErrorCode& success); + static Number* number( UErrorCode& success); - class Number : public Formatter { - public: - FormattedPlaceholder format(FormattedPlaceholder&& toFormat, FunctionOptions&& options, UErrorCode& status) const override; + LocalPointer call(const FunctionContext& context, + const FunctionValue& operand, + const FunctionOptions& options, + UErrorCode& errorCode) override; virtual ~Number(); private: - friend class NumberFactory; + friend class NumberValue; friend class StandardFunctions; - Number(const Locale& loc) : locale(loc), icuFormatter(number::NumberFormatter::withLocale(loc)) {} - Number(const Locale& loc, bool isInt) : locale(loc), isInteger(isInt), icuFormatter(number::NumberFormatter::withLocale(loc)) {} - static Number integer(const Locale& loc); + typedef enum PluralType { + PLURAL_ORDINAL, + PLURAL_CARDINAL, + PLURAL_EXACT + } PluralType; + + static Number* create(bool, UErrorCode&); + Number(bool isInt) : isInteger(isInt) /*, icuFormatter(number::NumberFormatter::withLocale(loc))*/ {} // These options have their own accessor methods, since they have different default values. + int32_t digitSizeOption(const FunctionOptions&, const UnicodeString&) const; int32_t maximumFractionDigits(const FunctionOptions& options) const; int32_t minimumFractionDigits(const FunctionOptions& options) const; int32_t minimumSignificantDigits(const FunctionOptions& options) const; @@ -129,86 +97,86 @@ namespace message2 { int32_t minimumIntegerDigits(const FunctionOptions& options) const; bool usePercent(const FunctionOptions& options) const; - const Locale& locale; const bool isInteger = false; const number::LocalizedNumberFormatter icuFormatter; + + static PluralType pluralType(const FunctionOptions& opts); }; static number::LocalizedNumberFormatter formatterForOptions(const Number& number, + const Locale& locale, const FunctionOptions& opts, UErrorCode& status); - class PluralFactory : public SelectorFactory { - public: - Selector* createSelector(const Locale& locale, UErrorCode& status) const override; - virtual ~PluralFactory(); + class NumberValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const override; + void selectKeys(const UnicodeString* keys, + int32_t keysLen, + int32_t* prefs, + int32_t& prefsLen, + UErrorCode& status) const override; + UBool isSelectable() const override { return true; } + NumberValue(); + virtual ~NumberValue(); private: - friend class IntegerFactory; - friend class MessageFormatter; + friend class Number; + + Locale locale; + number::FormattedNumber formattedNumber; + NumberValue(const Number&, + const FunctionContext&, + const FunctionValue&, + const FunctionOptions&, + UErrorCode&); + }; // class NumberValue + + class DateTimeValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const; + DateTimeValue(); + virtual ~DateTimeValue(); + private: + friend class DateTime; - PluralFactory() {} - PluralFactory(bool isInt) : isInteger(isInt) {} - static PluralFactory integer() { return PluralFactory(true);} - const bool isInteger = false; - }; + UnicodeString formattedDate; + DateTimeValue(DateTime::DateTimeType type, const FunctionContext& context, + const FunctionValue&, const FunctionOptions&, UErrorCode&); + }; // class DateTimeValue - class Plural : public Selector { + class String : public Function { public: - void selectKey(FormattedPlaceholder&& val, - FunctionOptions&& options, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& status) const override; - virtual ~Plural(); + LocalPointer call(const FunctionContext& context, + const FunctionValue& val, + const FunctionOptions& opts, + UErrorCode& errorCode) override; + static String* string(UErrorCode& status); + virtual ~String(); private: - friend class IntegerFactory; - friend class PluralFactory; + friend class StringFactory; - // Can't use UPluralType for this since we want to include - // exact matching as an option - typedef enum PluralType { - PLURAL_ORDINAL, - PLURAL_CARDINAL, - PLURAL_EXACT - } PluralType; - Plural(const Locale& loc, UErrorCode& errorCode); - Plural(const Locale& loc, bool isInt, UErrorCode& errorCode); - static Plural integer(const Locale& loc, UErrorCode& errorCode) { return Plural(loc, true, errorCode); } - PluralType pluralType(const FunctionOptions& opts) const; - const Locale& locale; - const bool isInteger = false; - LocalPointer numberFormatter; + String() {} }; - class TextFactory : public SelectorFactory { + class StringValue : public FunctionValue { public: - Selector* createSelector(const Locale& locale, UErrorCode& status) const override; - virtual ~TextFactory(); - }; - - class TextSelector : public Selector { - public: - void selectKey(FormattedPlaceholder&& val, - FunctionOptions&& options, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& status) const override; - virtual ~TextSelector(); - + UnicodeString formatToString(UErrorCode&) const override; + void selectKeys(const UnicodeString* keys, + int32_t keysLen, + int32_t* prefs, + int32_t& prefsLen, + UErrorCode& status) const override; + UBool isSelectable() const override { return true; } + virtual ~StringValue(); private: - friend class TextFactory; + friend class String; - // Formatting `value` to a string might require the locale - const Locale& locale; + UnicodeString formattedString; + StringValue(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&); + }; // class StringValue - TextSelector(const Locale& l) : locale(l) {} - }; }; extern void formatDateWithDefaults(const Locale& locale, UDate date, UnicodeString&, UErrorCode& errorCode); diff --git a/icu4c/source/i18n/messageformat2_macros.h b/icu4c/source/i18n/messageformat2_macros.h index f06ed1a5a977..d4504eec40f9 100644 --- a/icu4c/source/i18n/messageformat2_macros.h +++ b/icu4c/source/i18n/messageformat2_macros.h @@ -17,6 +17,7 @@ #include "unicode/format.h" #include "unicode/unistr.h" #include "plurrule_impl.h" +#include "ubidiimp.h" U_NAMESPACE_BEGIN @@ -30,8 +31,6 @@ using namespace pluralimpl; #define LEFT_CURLY_BRACE ((UChar32)0x007B) #define RIGHT_CURLY_BRACE ((UChar32)0x007D) #define HTAB ((UChar32)0x0009) -#define CR ((UChar32)0x000D) -#define LF ((UChar32)0x000A) #define IDEOGRAPHIC_SPACE ((UChar32)0x3000) #define PIPE ((UChar32)0x007C) diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index c5459f042f40..a4f223029f1e 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -30,8 +30,9 @@ namespace message2 { class Environment; class MessageContext; - class ResolvedSelector; class StaticErrors; + class InternalValue; + class BaseValue; /** *

MessageFormatter is a Technical Preview API implementing MessageFormat 2.0. @@ -334,45 +335,31 @@ namespace message2 { // Do not define default assignment operator const MessageFormatter &operator=(const MessageFormatter &) = delete; - ResolvedSelector resolveVariables(const Environment& env, const data_model::Operand&, MessageContext&, UErrorCode &) const; - ResolvedSelector resolveVariables(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode &) const; - // Selection methods // Takes a vector of FormattedPlaceholders - void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const; + void resolveSelectors(MessageContext&, Environment& env, UErrorCode&, UVector&) const; // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output) void filterVariants(const UVector&, UVector&, UErrorCode&) const; // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output) void sortVariants(const UVector&, UVector&, UErrorCode&) const; // Takes a vector of strings (input) and a vector of strings (output) - void matchSelectorKeys(const UVector&, MessageContext&, ResolvedSelector&& rv, UVector&, UErrorCode&) const; + void matchSelectorKeys(const UVector&, MessageContext&, InternalValue&& rv, UVector&, UErrorCode&) const; // Takes a vector of FormattedPlaceholders (input), // and a vector of vectors of strings (output) void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const; // Formatting methods - [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const; - void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; - // Formats a call to a formatting function - // Dispatches on argument type - [[nodiscard]] FormattedPlaceholder evalFormatterCall(FormattedPlaceholder&& argument, - MessageContext& context, - UErrorCode& status) const; - // Dispatches on function name - [[nodiscard]] FormattedPlaceholder evalFormatterCall(const FunctionName& functionName, - FormattedPlaceholder&& argument, - FunctionOptions&& options, - MessageContext& context, - UErrorCode& status) const; - // Formats an expression that appears as a selector - ResolvedSelector formatSelectorExpression(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode&) const; - // Formats an expression that appears in a pattern or as the definition of a local variable - [[nodiscard]] FormattedPlaceholder formatExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const; - [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const; - [[nodiscard]] FormattedPlaceholder formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const; - [[nodiscard]] FormattedPlaceholder evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const; - void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const; + [[nodiscard]] InternalValue evalLiteral(const data_model::Literal&, UErrorCode&) const; + void formatPattern(MessageContext&, Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; + FunctionContext makeFunctionContext(const FunctionOptions&) const; + [[nodiscard]] InternalValue& apply(Environment&, const FunctionName&, InternalValue&, FunctionOptions&&, + MessageContext&, UErrorCode&) const; + [[nodiscard]] InternalValue& evalExpression(Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const; + [[nodiscard]] FunctionOptions resolveOptions(Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const; + [[nodiscard]] InternalValue& evalOperand(Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const; + [[nodiscard]] InternalValue evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const; + void formatSelectors(MessageContext& context, Environment& env, UErrorCode &status, UnicodeString& result) const; // Function registry methods bool hasCustomMFFunctionRegistry() const { @@ -384,18 +371,12 @@ namespace message2 { // (a FormatterFactory can have mutable state) const MFFunctionRegistry& getCustomMFFunctionRegistry() const; - bool isCustomFormatter(const FunctionName&) const; - FormatterFactory* lookupFormatterFactory(const FunctionName&, UErrorCode& status) const; - bool isBuiltInSelector(const FunctionName&) const; - bool isBuiltInFormatter(const FunctionName&) const; - bool isCustomSelector(const FunctionName&) const; - const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const; - bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); } - bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); } - const Formatter* lookupFormatter(const FunctionName&, UErrorCode&) const; - - Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const; - Formatter* getFormatter(const FunctionName&, UErrorCode&) const; + bool isCustomFunction(const FunctionName&) const; + bool isBuiltInFunction(const FunctionName&) const; + bool isFunction(const FunctionName& fn) const { return isBuiltInFunction(fn) || isCustomFunction(fn); } + void setNotSelectableError(MessageContext&, const InternalValue&, UErrorCode&) const; + // Result is not adopted + Function* lookupFunction(const FunctionName&, UErrorCode&) const; bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const; // Checking for resolution errors diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index 8a779adb9ab3..0298040f2f3b 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -28,10 +28,6 @@ class UVector; namespace message2 { - class Formatter; - class MessageContext; - class Selector; - // Formattable // ---------- @@ -443,29 +439,30 @@ namespace message2 { * a single named function option. It pairs the given name with the `Formattable` * value resulting from evaluating the option's value. * - * `ResolvedFunctionOption` is immutable and is not copyable or movable. + * `ResolvedFunctionOption` is immutable, movable, and copyable. * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ #ifndef U_IN_DOXYGEN +class FunctionValue; class U_I18N_API ResolvedFunctionOption : public UObject { private: /* const */ UnicodeString name; - /* const */ Formattable value; + // owned by the global environment + const FunctionValue* value; public: const UnicodeString& getName() const { return name; } - const Formattable& getValue() const { return value; } - ResolvedFunctionOption(const UnicodeString& n, const Formattable& f) : name(n), value(f) {} + const FunctionValue& getValue() const { return *value; } + // Adopts `f` + ResolvedFunctionOption(const UnicodeString& n, const FunctionValue& f); ResolvedFunctionOption() {} ResolvedFunctionOption(ResolvedFunctionOption&&); - ResolvedFunctionOption& operator=(ResolvedFunctionOption&& other) noexcept { - name = std::move(other.name); - value = std::move(other.value); - return *this; - } + ResolvedFunctionOption& operator=(ResolvedFunctionOption&& other) = default; + ResolvedFunctionOption& operator=(const ResolvedFunctionOption& other) = default; + ResolvedFunctionOption(const ResolvedFunctionOption&) = default; virtual ~ResolvedFunctionOption(); }; // class ResolvedFunctionOption #endif @@ -477,11 +474,13 @@ class U_I18N_API ResolvedFunctionOption : public UObject { * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ -using FunctionOptionsMap = std::map; +using FunctionOptionsMap = std::map; /** * Structure encapsulating named options passed to a custom selector or formatter. * + * This class is immutable, movable and copyable. + * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ @@ -492,24 +491,31 @@ class U_I18N_API FunctionOptions : public UObject { * The syntactic order of options is not guaranteed to * be preserved. * - * This class is immutable and movable but not copyable. - * - * @return A map from strings to `message2::Formattable` objects representing + * @return A map from strings to FormattedPlaceholder objects representing * the results of resolving each option value. * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ FunctionOptionsMap getOptions() const { - int32_t len; - const ResolvedFunctionOption* resolvedOptions = getResolvedFunctionOptions(len); FunctionOptionsMap result; - for (int32_t i = 0; i < len; i++) { - const ResolvedFunctionOption& opt = resolvedOptions[i]; - result[opt.getName()] = opt.getValue(); + for (int32_t i = 0; i < functionOptionsLen; i++) { + ResolvedFunctionOption& opt = options[i]; + result[opt.getName()] = &opt.getValue(); } return result; } + /** + * Returns a new FunctionOptions object containing all the key-value + * pairs from `this` and `other`. When `this` and `other` define options with + * the same name, `this` takes preference. + * + * @return The result of merging `this` and `other`. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionOptions mergeOptions(const FunctionOptions& other, UErrorCode&) const; /** * Default constructor. * Returns an empty mapping. @@ -526,28 +532,37 @@ class U_I18N_API FunctionOptions : public UObject { */ virtual ~FunctionOptions(); /** - * Move assignment operator: - * The source FunctionOptions will be left in a valid but undefined state. + * Non-member swap function. + * @param f1 will get f2's contents + * @param f2 will get f1's contents * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - FunctionOptions& operator=(FunctionOptions&&) noexcept; + friend inline void swap(FunctionOptions& f1, FunctionOptions& f2) noexcept { + using std::swap; + + if (f1.bogus || f2.bogus) { + f1.bogus = f2.bogus = true; + return; + } + swap(f1.options, f2.options); + swap(f1.functionOptionsLen, f2.functionOptionsLen); + } /** - * Move constructor: - * The source FunctionOptions will be left in a valid but undefined state. + * Assignment operator * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - FunctionOptions(FunctionOptions&&); + FunctionOptions& operator=(FunctionOptions) noexcept; /** * Copy constructor. * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - FunctionOptions& operator=(const FunctionOptions&) = delete; + FunctionOptions(const FunctionOptions&); private: friend class MessageFormatter; friend class StandardFunctions; @@ -555,11 +570,14 @@ class U_I18N_API FunctionOptions : public UObject { explicit FunctionOptions(UVector&&, UErrorCode&); const ResolvedFunctionOption* getResolvedFunctionOptions(int32_t& len) const; - UBool getFunctionOption(const UnicodeString&, Formattable&) const; + const FunctionValue* getFunctionOption(const UnicodeString&, UErrorCode&) const; // Returns empty string if option doesn't exist UnicodeString getStringFunctionOption(const UnicodeString&) const; + // Sets error code if option doesn't exist + UnicodeString getStringFunctionOption(const UnicodeString&, UErrorCode&) const; int32_t optionsCount() const { return functionOptionsLen; } + bool bogus = false; // Used in case a copy fails // Named options passed to functions // This is not a Hashtable in order to make it possible for code in a public header file // to construct a std::map from it, on-the-fly. Otherwise, it would be impossible to put @@ -568,306 +586,6 @@ class U_I18N_API FunctionOptions : public UObject { int32_t functionOptionsLen = 0; }; // class FunctionOptions - - // TODO doc comments - // Encapsulates either a formatted string or formatted number; - // more output types could be added in the future. - - /** - * A `FormattedValue` represents the result of formatting a `message2::Formattable`. - * It contains either a string or a formatted number. (More types could be added - * in the future.) - * - * `FormattedValue` is immutable and movable. It is not copyable. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - class U_I18N_API FormattedValue : public UObject { - public: - /** - * Formatted string constructor. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - explicit FormattedValue(const UnicodeString&); - /** - * Formatted number constructor. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - explicit FormattedValue(number::FormattedNumber&&); - /** - * Default constructor. Leaves the FormattedValue in - * a valid but undefined state. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedValue() : type(kString) {} - /** - * Returns true iff this is a formatted string. - * - * @return True if and only if this value is a formatted string. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - bool isString() const { return type == kString; } - /** - * Returns true iff this is a formatted number. - * - * @return True if and only if this value is a formatted number. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - bool isNumber() const { return type == kNumber; } - /** - * Gets the string contents of this value. If !isString(), then - * the result is undefined. - * @return A reference to a formatted string. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const UnicodeString& getString() const { return stringOutput; } - /** - * Gets the number contents of this value. If !isNumber(), then - * the result is undefined. - * @return A reference to a formatted number. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const number::FormattedNumber& getNumber() const { return numberOutput; } - /** - * Move assignment operator: - * The source FormattedValue will be left in a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedValue& operator=(FormattedValue&&) noexcept; - /** - * Move constructor: - * The source FormattedValue will be left in a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedValue(FormattedValue&& other) { *this = std::move(other); } - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~FormattedValue(); - private: - enum Type { - kString, - kNumber - }; - Type type; - UnicodeString stringOutput; - number::FormattedNumber numberOutput; - }; // class FormattedValue - - /** - * A `FormattablePlaceholder` encapsulates an input value (a `message2::Formattable`) - * together with an optional output value (a `message2::FormattedValue`). - * More information, such as source line/column numbers, could be added to the class - * in the future. - * - * `FormattablePlaceholder` is immutable (not deeply immutable) and movable. - * It is not copyable. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - class U_I18N_API FormattedPlaceholder : public UObject { - public: - /** - * Fallback constructor. Constructs a value that represents a formatting error, - * without recording an input `Formattable` as the source. - * - * @param s An error string. (See the MessageFormat specification for details - * on fallback strings.) - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - explicit FormattedPlaceholder(const UnicodeString& s) : fallback(s), type(kFallback) {} - /** - * Constructor for fully formatted placeholders. - * - * @param input A `FormattedPlaceholder` containing the fallback string and source - * `Formattable` used to construct the formatted value. - * @param output A `FormattedValue` representing the formatted output of `input`. - * Passed by move. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder(const FormattedPlaceholder& input, FormattedValue&& output) - : fallback(input.fallback), source(input.source), - formatted(std::move(output)), previousOptions(FunctionOptions()), type(kEvaluated) {} - /** - * Constructor for fully formatted placeholders with options. - * - * @param input A `FormattedPlaceholder` containing the fallback string and source - * `Formattable` used to construct the formatted value. - * @param opts Function options that were used to construct `output`. May be the empty map. - * @param output A `FormattedValue` representing the formatted output of `input`. - * Passed by move. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder(const FormattedPlaceholder& input, FunctionOptions&& opts, FormattedValue&& output) - : fallback(input.fallback), source(input.source), - formatted(std::move(output)), previousOptions(std::move(opts)), type(kEvaluated) {} - /** - * Constructor for unformatted placeholders. - * - * @param input A `Formattable` object. - * @param fb Fallback string to use if an error occurs while formatting the input. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder(const Formattable& input, const UnicodeString& fb) - : fallback(fb), source(input), type(kUnevaluated) {} - /** - * Default constructor. Leaves the FormattedPlaceholder in a - * valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder() : type(kNull) {} - /** - * Returns the source `Formattable` value for this placeholder. - * The result is undefined if this is a null operand. - * - * @return A message2::Formattable value. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const message2::Formattable& asFormattable() const; - /** - * Returns true iff this is a fallback placeholder. - * - * @return True if and only if this placeholder was constructed from a fallback string, - * with no `Formattable` source or formatting output. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - bool isFallback() const { return type == kFallback; } - /** - * Returns true iff this is a null placeholder. - * - * @return True if and only if this placeholder represents the absent argument to a formatter - * that was invoked without an argument. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - bool isNullOperand() const { return type == kNull; } - /** - * Returns true iff this has formatting output. - * - * @return True if and only if this was constructed from both an input `Formattable` and - * output `FormattedValue`. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - bool isEvaluated() const { return (type == kEvaluated); } - /** - * Returns true iff this represents a valid argument to the formatter. - * - * @return True if and only if this is neither the null argument nor a fallback placeholder. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - bool canFormat() const { return !(isFallback() || isNullOperand()); } - /** - * Gets the fallback value of this placeholder, to be used in its place if an error occurs while - * formatting it. - * @return A reference to this placeholder's fallback string. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const UnicodeString& getFallback() const { return fallback; } - /** - * Returns the options of this placeholder. The result is the empty map if !isEvaluated(). - * @return A reference to an option map, capturing the options that were used - * in producing the output of this `FormattedPlaceholder` - * (or empty if there is no output) - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const FunctionOptions& options() const { return previousOptions; } - - /** - * Returns the formatted output of this placeholder. The result is undefined if !isEvaluated(). - * @return A fully formatted `FormattedPlaceholder`. - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const FormattedValue& output() const { return formatted; } - /** - * Move assignment operator: - * The source FormattedPlaceholder will be left in a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder& operator=(FormattedPlaceholder&&) noexcept; - /** - * Move constructor: - * The source FormattedPlaceholder will be left in a valid but undefined state. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormattedPlaceholder(FormattedPlaceholder&& other) { *this = std::move(other); } - /** - * Formats this as a string, using defaults. If this is - * either the null operand or is a fallback value, the return value is the result of formatting the - * fallback value (which is the default fallback string if this is the null operand). - * If there is no formatted output and the input is object- or array-typed, - * then the argument is treated as a fallback value, since there is no default formatter - * for objects or arrays. - * - * @param locale The locale to use for formatting numbers or dates - * @param status Input/output error code - * @return The result of formatting this placeholder. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - UnicodeString formatToString(const Locale& locale, - UErrorCode& status) const; - - private: - friend class MessageFormatter; - - enum Type { - kFallback, // Represents the result of formatting that encountered an error - kNull, // Represents the absence of both an output and an input (not necessarily an error) - kUnevaluated, // `source` should be valid, but there's no result yet - kEvaluated, // `formatted` exists - }; - UnicodeString fallback; - Formattable source; - FormattedValue formatted; - FunctionOptions previousOptions; // Ignored unless type is kEvaluated - Type type; - }; // class FormattedPlaceholder - /** * Not yet implemented: The result of a message formatting operation. Based on * ICU4J's FormattedMessage.java. diff --git a/icu4c/source/i18n/unicode/messageformat2_function_registry.h b/icu4c/source/i18n/unicode/messageformat2_function_registry.h index b8429e3b83aa..c411c47a4ca8 100644 --- a/icu4c/source/i18n/unicode/messageformat2_function_registry.h +++ b/icu4c/source/i18n/unicode/messageformat2_function_registry.h @@ -14,6 +14,7 @@ #include "unicode/messageformat2_data_model_names.h" #include "unicode/messageformat2_formattable.h" +#include "unicode/ubidi.h" #ifndef U_HIDE_DEPRECATED_API @@ -28,81 +29,7 @@ namespace message2 { using namespace data_model; - /** - * Interface that factory classes for creating formatters must implement. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - class U_I18N_API FormatterFactory : public UObject { - // TODO: the coding guidelines say that interface classes - // shouldn't inherit from UObject, but if I change it so these - // classes don't, and the individual formatter factory classes - // inherit from public FormatterFactory, public UObject, then - // memory leaks ensue - public: - /** - * Constructs a new formatter object. This method is not const; - * formatter factories with local state may be defined. - * - * @param locale Locale to be used by the formatter. - * @param status Input/output error code. - * @return The new Formatter, which is non-null if U_SUCCESS(status). - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual Formatter* createFormatter(const Locale& locale, UErrorCode& status) = 0; - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~FormatterFactory(); - /** - * Copy constructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - FormatterFactory& operator=(const FormatterFactory&) = delete; - }; // class FormatterFactory - - /** - * Interface that factory classes for creating selectors must implement. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - class U_I18N_API SelectorFactory : public UObject { - public: - /** - * Constructs a new selector object. - * - * @param locale Locale to be used by the selector. - * @param status Input/output error code. - * @return The new selector, which is non-null if U_SUCCESS(status). - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual Selector* createSelector(const Locale& locale, UErrorCode& status) const = 0; - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~SelectorFactory(); - /** - * Copy constructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - SelectorFactory& operator=(const SelectorFactory&) = delete; - }; // class SelectorFactory + class Function; /** * Defines mappings from names of formatters and selectors to functions implementing them. @@ -117,38 +44,25 @@ namespace message2 { class U_I18N_API MFFunctionRegistry : public UObject { private: - using FormatterMap = Hashtable; // Map from stringified function names to FormatterFactory* - using SelectorMap = Hashtable; // Map from stringified function names to SelectorFactory* + using FunctionMap = Hashtable; // Map from function names to FunctionFactory* public: /** - * Looks up a formatter factory by the name of the formatter. The result is non-const, - * since formatter factories may have local state. Returns the result by pointer + * Looks up a function by the name of the function. The result is non-const, + * since functions may have local state. Returns the result by pointer * rather than by reference since it can fail. * - * @param formatterName Name of the desired formatter. - * @return A pointer to the `FormatterFactory` registered under `formatterName`, or null - * if no formatter was registered under that name. The pointer is not owned + * @param functionName Name of the desired function. + * @return A pointer to the function registered under `functionName`, or null + * if no function was registered under that name. The pointer is not owned * by the caller. * - * @internal ICU 75 technology preview + * @internal ICU 77 technology preview * @deprecated This API is for technology preview only. */ - FormatterFactory* getFormatter(const FunctionName& formatterName) const; + Function* getFunction(const FunctionName& functionName) const; /** - * Looks up a selector factory by the name of the selector. (This returns the result by pointer - * rather than by reference since `FormatterFactory` is an abstract class.) - * - * @param selectorName Name of the desired selector. - * @return A pointer to the `SelectorFactory` registered under `selectorName`, or null - * if no formatter was registered under that name. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - const SelectorFactory* getSelector(const FunctionName& selectorName) const; - /** - * Looks up a formatter factory by a type tag. This method gets the name of the default formatter registered + * Looks up a function by a type tag. This method gets the name of the default formatter registered * for that type. If no formatter was explicitly registered for this type, it returns false. * * @param formatterType Type tag for the desired `FormattableObject` type to be formatted. @@ -174,9 +88,9 @@ namespace message2 { class U_I18N_API Builder : public UObject { private: // Must use raw pointers to avoid instantiating `LocalPointer` on an internal type - FormatterMap* formatters; - SelectorMap* selectors; - Hashtable* formattersByType; + FunctionMap* functions; + // Mapping from strings (type tags) to FunctionNames + Hashtable* formattersByType = nullptr; // Do not define copy constructor/assignment operator Builder& operator=(const Builder&) = delete; @@ -200,18 +114,20 @@ namespace message2 { be re-thought. */ /** - * Registers a formatter factory to a given formatter name. + * Registers a function to a given name. * - * @param formatterName Name of the formatter being registered. - * @param formatterFactory A pointer to a FormatterFactory object to use - * for creating `formatterName` formatters. This argument is adopted. + * @param functionName Name of the formatter being registered. + * @param function A pointer to a Function object. + * This argument is adopted. * @param errorCode Input/output error code * @return A reference to the builder. * - * @internal ICU 75 technology preview + * @internal ICU 77 technology preview * @deprecated This API is for technology preview only. */ - Builder& adoptFormatter(const data_model::FunctionName& formatterName, FormatterFactory* formatterFactory, UErrorCode& errorCode); + Builder& adoptFunction(const data_model::FunctionName& functionName, + Function* function, + UErrorCode& errorCode); /** * Registers a formatter factory to a given type tag. * (See `FormattableObject` for details on type tags.) @@ -225,21 +141,9 @@ namespace message2 { * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - Builder& setDefaultFormatterNameByType(const UnicodeString& type, const data_model::FunctionName& functionName, UErrorCode& errorCode); - - /** - * Registers a selector factory to a given selector name. Adopts `selectorFactory`. - * - * @param selectorName Name of the selector being registered. - * @param selectorFactory A SelectorFactory object to use for creating `selectorName` - * selectors. - * @param errorCode Input/output error code - * @return A reference to the builder. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - Builder& adoptSelector(const data_model::FunctionName& selectorName, SelectorFactory* selectorFactory, UErrorCode& errorCode); + Builder& setDefaultFormatterNameByType(const UnicodeString& type, + const data_model::FunctionName& functionName, + UErrorCode& errorCode); /** * Creates an immutable `MFFunctionRegistry` object with the selectors and formatters * that were previously registered. The builder cannot be used after this call. @@ -305,112 +209,247 @@ namespace message2 { MFFunctionRegistry& operator=(const MFFunctionRegistry&) = delete; MFFunctionRegistry(const MFFunctionRegistry&) = delete; - MFFunctionRegistry(FormatterMap* f, SelectorMap* s, Hashtable* byType); + MFFunctionRegistry(FunctionMap*, Hashtable*); MFFunctionRegistry() {} // Debugging; should only be called on a function registry with // all the standard functions registered - void checkFormatter(const char*) const; - void checkSelector(const char*) const; + void checkFunction(const char*) const; void checkStandard() const; - bool hasFormatter(const data_model::FunctionName& f) const; - bool hasSelector(const data_model::FunctionName& s) const; + bool hasFunction(const data_model::FunctionName& f) const; void cleanup() noexcept; // Must use raw pointers to avoid instantiating `LocalPointer` on an internal type - FormatterMap* formatters = nullptr; - SelectorMap* selectors = nullptr; + FunctionMap* functions = nullptr; // Mapping from strings (type tags) to FunctionNames Hashtable* formattersByType = nullptr; }; // class MFFunctionRegistry /** - * Interface that formatter classes must implement. + * Class implementing data from contextual options. + * See https://github.com/unicode-org/message-format-wg/pull/846 * - * @internal ICU 75 technology preview + * @internal ICU 77 technology preview * @deprecated This API is for technology preview only. */ - class U_I18N_API Formatter : public UObject { - public: - /** - * Formats the input passed in `context` by setting an output using one of the - * `FormattingContext` methods or indicating an error. - * - * @param toFormat Placeholder, including a source formattable value and possibly - * the output of a previous formatter applied to it; see - * `message2::FormattedPlaceholder` for details. Passed by move. - * @param options The named function options. Passed by move - * @param status Input/output error code. Should not be set directly by the - * custom formatter, which should use `FormattingContext::setFormattingWarning()` - * to signal errors. The custom formatter may pass `status` to other ICU functions - * that can signal errors using this mechanism. - * - * @return The formatted value. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual FormattedPlaceholder format(FormattedPlaceholder&& toFormat, - FunctionOptions&& options, - UErrorCode& status) const = 0; - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~Formatter(); - }; // class Formatter + class U_I18N_API FunctionContext : public UObject { + public: + /** + * Returns the locale from this context. + * + * @return Locale the context was created with. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + const Locale& getLocale() const { return locale; } + /** + * Returns the text direction from this context. + * + * @return A UBiDiDirection indicating the text direction. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + UBiDiDirection getDirection() const { return dir; } + /** + * Returns the ID from this context. + * + * @return A string to be used in formatting to parts. + * (Formatting to parts is not yet implemented.) + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + const UnicodeString& getID() const { return id; } + private: + friend class MessageFormatter; + + Locale locale; + UBiDiDirection dir; + UnicodeString id; + + FunctionContext(const Locale& loc, UBiDiDirection d, UnicodeString i) + : locale(loc), dir(d), id(i) {} + }; // class FunctionContext + + class FunctionValue; /** - * Interface that selector classes must implement. + * Interface that function handler classes must implement. * - * @internal ICU 75 technology preview + * @internal ICU 77 technology preview * @deprecated This API is for technology preview only. */ - class U_I18N_API Selector : public UObject { - public: - /** - * Compares the input to an array of keys, and returns an array of matching - * keys sorted by preference. - * - * @param toFormat The unnamed function argument; passed by move. - * @param options A reference to the named function options. - * @param keys An array of strings that are compared to the input - * (`context.getFormattableInput()`) in an implementation-specific way. - * @param keysLen The length of `keys`. - * @param prefs An array of strings with length `keysLen`. The contents of - * the array is undefined. `selectKey()` should set the contents - * of `prefs` to a subset of `keys`, with the best match placed at the lowest index. - * @param prefsLen A reference that `selectKey()` should set to the length of `prefs`, - * which must be less than or equal to `keysLen`. - * @param status Input/output error code. Should not be set directly by the - * custom selector, which should use `FormattingContext::setSelectorError()` - * to signal errors. The custom selector may pass `status` to other ICU functions - * that can signal errors using this mechanism. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual void selectKey(FormattedPlaceholder&& toFormat, - FunctionOptions&& options, - const UnicodeString* keys, - int32_t keysLen, - UnicodeString* prefs, - int32_t& prefsLen, - UErrorCode& status) const = 0; - // Note: This takes array arguments because the internal MessageFormat code has to - // call this method, and can't include any code that constructs std::vectors. - /** - * Destructor. - * - * @internal ICU 75 technology preview - * @deprecated This API is for technology preview only. - */ - virtual ~Selector(); - }; // class Selector + class U_I18N_API Function : public UObject { + public: + /** + * Calls this Function on a FunctionValue operand and its FunctionOptions options, + * returning a LocalPointer to a FunctionValue. + * + * @param context The context of this function, based on its contextual options + * @param operand The unnamed argument to the function. + * @param options Resolved options for this function. + * @param status Input/output error code + * @return The function value that is the result of calling this function on + * the arguments. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + virtual LocalPointer call(const FunctionContext& context, + const FunctionValue& operand, + const FunctionOptions& options, + UErrorCode& status) = 0; + /** + * Destructor. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~Function(); + }; // class Function + + /** + * Type representing argument and return values for custom functions. + * It encapsulates an operand and resolved options, and can be extended with + * additional state. + * Adding a new custom function requires adding a new class that + * implements this interface. + * + * FunctionValues are assumed to be immutable (the call() method on + * Function takes a const FunctionValue&, and the formatToString() + * and selectKeys() methods are const.) Feedback on whether internal + * mutable state in classes implementing FunctionValue is welcomed + * during the Technology Preview period. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API FunctionValue : public UObject { + public: + /** + * Returns the string representation of this value. The default + * method signals an error. Must be overridden by classes + * implementing values that support formatting. + * + * @param status Input/output error code + * @return A string. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + virtual UnicodeString formatToString(UErrorCode& status) const { + if (U_SUCCESS(status)) { + status = U_MF_FORMATTING_ERROR; + } + return {}; + } + /** + * Returns the Formattable operand that was used to construct + * this value. The operand may be obtained from calling getOperand() + * on the input FunctionValue, or it may be constructed separately. + * + * @return A reference to a message2::Formattable object. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + virtual const Formattable& getOperand() const { return operand; } + /** + * Returns a reference to the resolved options for this value. + * + * @return A reference to the resolved options for this value. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + virtual const FunctionOptions& getResolvedOptions() const { return opts; } + /** + * Returns true if this value supports selection. The default method + * returns false. The method must be overridden for values that support + * selection. + * + * @return True iff this value supports selection. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + virtual UBool isSelectable() const { + // In the future, this function could return a capability + // indicating whether this function can format, select, or both. + return false; + } + /** + * Returns true if this value represents a null operand, that is, + * the absence of an argument. This method should not be overridden. + * It can be called in order to check whether the argument is present. + * Some functions may be nullary (they may work with no arguments). + * + * @return True iff this value represents an absent operand. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + virtual UBool isNullOperand() const { return false; } + /** + * Compares this value to an array of keys, and returns an array of matching + * keys sorted by preference. The default implementation of this method + * signals an error. It should be overridden for value classes that support + * selection. + * + * @param keys An array of strings to compare to the input. + * @param keysLen The length of `keys`. + * @param prefs An array of indices into `keys`. + * The initial contents of + * the array is undefined. `selectKey()` should set the contents + * of `prefs` to a subset of the indices in `keys`, + * with the best match placed at the lowest index in `prefs`. + * @param prefsLen A reference that `selectKey()` should set to the length of `prefs`, + * which must be less than or equal to `keysLen`. + * @param status Input/output error code. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + virtual void selectKeys(const UnicodeString* keys, + int32_t keysLen, + int32_t* prefs, + int32_t& prefsLen, + UErrorCode& status) const { + (void) keys; + (void) keysLen; + (void) prefs; + (void) prefsLen; + if (U_SUCCESS(status)) { + status = U_MF_SELECTOR_ERROR; + } + } + /** + * Destructor. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~FunctionValue(); + protected: + /** + * Operand used to construct this value. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + Formattable operand; + /** + * Resolved options attached to this value. + * + * @internal ICU 77 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionOptions opts; + }; // class FunctionValue } // namespace message2 diff --git a/icu4c/source/test/depstest/depstest.py b/icu4c/source/test/depstest/depstest.py index fba45a079815..f993308fbd38 100755 --- a/icu4c/source/test/depstest/depstest.py +++ b/icu4c/source/test/depstest/depstest.py @@ -123,6 +123,9 @@ def _ReadLibrary(root_path, library_name): ("i18n/messageformat2_data_model.o", "typeinfo for std::exception"), ("i18n/messageformat2_data_model.o", "vtable for std::exception"), ("i18n/messageformat2_data_model.o", "std::exception::~exception()"), + ("i18n/messageformat2_evaluation.o", "typeinfo for std::exception"), + ("i18n/messageformat2_evaluation.o", "vtable for std::exception"), + ("i18n/messageformat2_evaluation.o", "std::exception::~exception()"), ("i18n/messageformat2_formattable.o", "typeinfo for std::exception"), ("i18n/messageformat2_formattable.o", "vtable for std::exception"), ("i18n/messageformat2_formattable.o", "std::exception::~exception()"), diff --git a/icu4c/source/test/intltest/messageformat2test.cpp b/icu4c/source/test/intltest/messageformat2test.cpp index 353082ef5c91..0c42c83f0ecd 100644 --- a/icu4c/source/test/intltest/messageformat2test.cpp +++ b/icu4c/source/test/intltest/messageformat2test.cpp @@ -278,7 +278,7 @@ void TestMessageFormat2::testAPICustomFunctions() { // Set up custom function registry MFFunctionRegistry::Builder builder(errorCode); MFFunctionRegistry functionRegistry = - builder.adoptFormatter(data_model::FunctionName("person"), new PersonNameFormatterFactory(), errorCode) + builder.adoptFunction(data_model::FunctionName("person"), new PersonNameFunction(), errorCode) .build(); Person* person = new Person(UnicodeString("Mr."), UnicodeString("John"), UnicodeString("Doe")); @@ -318,13 +318,13 @@ void TestMessageFormat2::testAPICustomFunctions() { // By type MFFunctionRegistry::Builder builderByType(errorCode); - FunctionName personFormatterName("person"); + FunctionName personFunctionName("person"); MFFunctionRegistry functionRegistryByType = - builderByType.adoptFormatter(personFormatterName, - new PersonNameFormatterFactory(), - errorCode) + builderByType.adoptFunction(personFunctionName, + new PersonNameFunction(), + errorCode) .setDefaultFormatterNameByType("person", - personFormatterName, + personFunctionName, errorCode) .build(); mfBuilder.setFunctionRegistry(functionRegistryByType); @@ -336,9 +336,12 @@ void TestMessageFormat2::testAPICustomFunctions() { // Expect "Hello John" because in the custom function we registered, // "informal" is the default formality and "length" is the default length assertEquals("testAPICustomFunctions", "Hello John", result); + delete person; } +PersonNameFunction::~PersonNameFunction() {} + // ICU-22890 lone surrogate cause infinity loop void TestMessageFormat2::testHighLoneSurrogate() { IcuTestErrorCode errorCode(*this, "testHighLoneSurrogate"); diff --git a/icu4c/source/test/intltest/messageformat2test.h b/icu4c/source/test/intltest/messageformat2test.h index 71dfb3916c9b..d4f6b2dce2e9 100644 --- a/icu4c/source/test/intltest/messageformat2test.h +++ b/icu4c/source/test/intltest/messageformat2test.h @@ -62,6 +62,8 @@ class TestMessageFormat2: public IntlTest { void testGrammarCasesFormatter(IcuTestErrorCode&); void testListFormatter(IcuTestErrorCode&); void testMessageRefFormatter(IcuTestErrorCode&); + void testComplexOptions(IcuTestErrorCode&); + void testSingleEvaluation(IcuTestErrorCode&); // Feature tests void testEmptyMessage(message2::TestCase::Builder&, IcuTestErrorCode&); @@ -98,11 +100,6 @@ U_NAMESPACE_BEGIN namespace message2 { // Custom function classes -class PersonNameFormatterFactory : public FormatterFactory { - - public: - Formatter* createFormatter(const Locale&, UErrorCode&) override; -}; class Person : public FormattableObject { public: @@ -116,11 +113,28 @@ class Person : public FormattableObject { const UnicodeString tagName; }; -class PersonNameFormatter : public Formatter { +class PersonNameFunction : public Function { public: - FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override; + LocalPointer call(const FunctionContext&, + const FunctionValue&, + const FunctionOptions&, + UErrorCode&) override; + virtual ~PersonNameFunction(); + PersonNameFunction() {} }; +class PersonNameValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const override; + PersonNameValue(); + virtual ~PersonNameValue(); + private: + friend class PersonNameFunction; + + UnicodeString formattedString; + PersonNameValue(const FunctionValue&, const FunctionOptions&, UErrorCode&); +}; // class PersonNameValue + class FormattableProperties : public FormattableObject { public: const UnicodeString& tag() const override { return tagName; } @@ -133,53 +147,139 @@ class FormattableProperties : public FormattableObject { const UnicodeString tagName; }; -class GrammarCasesFormatterFactory : public FormatterFactory { +class GrammarCasesFunction : public Function { public: - Formatter* createFormatter(const Locale&, UErrorCode&) override; + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; + static MFFunctionRegistry customRegistry(UErrorCode&); }; -class GrammarCasesFormatter : public Formatter { +class GrammarCasesValue : public FunctionValue { public: - FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override; - static MFFunctionRegistry customRegistry(UErrorCode&); + UnicodeString formatToString(UErrorCode&) const override; + GrammarCasesValue(); + virtual ~GrammarCasesValue(); private: + friend class GrammarCasesFunction; + + UnicodeString formattedString; + GrammarCasesValue(const FunctionValue&, const FunctionOptions&, UErrorCode&); void getDativeAndGenitive(const UnicodeString&, UnicodeString& result) const; -}; +}; // class GrammarCasesValue -class ListFormatterFactory : public FormatterFactory { +class ListFunction : public Function { public: - Formatter* createFormatter(const Locale&, UErrorCode&) override; + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; + static MFFunctionRegistry customRegistry(UErrorCode&); + ListFunction() {} + virtual ~ListFunction(); }; -class ListFormatter : public Formatter { +class ListValue : public FunctionValue { public: - FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override; - static MFFunctionRegistry customRegistry(UErrorCode&); + UnicodeString formatToString(UErrorCode&) const override; + virtual ~ListValue(); private: - friend class ListFormatterFactory; - const Locale& locale; - ListFormatter(const Locale& loc) : locale(loc) {} -}; + friend class ListFunction; + + UnicodeString formattedString; + ListValue(const Locale&, + const FunctionValue&, + const FunctionOptions&, + UErrorCode&); +}; // class ListValue -class ResourceManagerFactory : public FormatterFactory { +class NounValue : public FunctionValue { public: - Formatter* createFormatter(const Locale&, UErrorCode&) override; -}; + UnicodeString formatToString(UErrorCode&) const override; + NounValue(); + virtual ~NounValue(); + private: + friend class NounFunction; + + UnicodeString formattedString; + NounValue(const FunctionValue&, + const FunctionOptions&, + UErrorCode&); +}; // class NounValue + +class AdjectiveValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const override; + AdjectiveValue(); + virtual ~AdjectiveValue(); + private: + friend class AdjectiveFunction; + + UnicodeString formattedString; + AdjectiveValue(const FunctionValue&, + const FunctionOptions&, + UErrorCode&); +}; // class AdjectiveValue -class ResourceManager : public Formatter { + +class ResourceManager : public Function { public: - FormattedPlaceholder format(FormattedPlaceholder&&, FunctionOptions&& opts, UErrorCode& errorCode) const override; + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; static MFFunctionRegistry customRegistry(UErrorCode&); static Hashtable* properties(UErrorCode&); static UnicodeString propertiesAsString(const Hashtable&); static Hashtable* parseProperties(const UnicodeString&, UErrorCode&); + ResourceManager() {} + virtual ~ResourceManager(); +}; +class ResourceManagerValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const override; + ResourceManagerValue(); + virtual ~ResourceManagerValue(); private: - friend class ResourceManagerFactory; - ResourceManager(const Locale& loc) : locale(loc) {} - const Locale& locale; + friend class ResourceManager; + + UnicodeString formattedString; + ResourceManagerValue(const FunctionValue&, + const FunctionOptions&, + UErrorCode&); +}; // class ResourceManagerValue + +class NounFunction : public Function { + public: + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; + NounFunction() { } + virtual ~NounFunction(); }; +class AdjectiveFunction : public Function { + public: + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; + AdjectiveFunction() { } + virtual ~AdjectiveFunction(); +}; + +class CounterFunction : public Function { + public: + LocalPointer call(const FunctionContext&, const FunctionValue&, const FunctionOptions&, UErrorCode&) override; + CounterFunction() { } + virtual ~CounterFunction(); + private: + int32_t count = 0; // Number of times the function was called +}; + +class CounterFunctionValue : public FunctionValue { + public: + UnicodeString formatToString(UErrorCode&) const override; + CounterFunctionValue(); + virtual ~CounterFunctionValue(); + private: + friend class CounterFunction; + int32_t& count; + + CounterFunctionValue(int32_t&, + const FunctionValue&, + const FunctionOptions&, + UErrorCode&); +}; // class ResourceManagerValue + } // namespace message2 U_NAMESPACE_END diff --git a/icu4c/source/test/intltest/messageformat2test_custom.cpp b/icu4c/source/test/intltest/messageformat2test_custom.cpp index b498be791ca9..1d031560e231 100644 --- a/icu4c/source/test/intltest/messageformat2test_custom.cpp +++ b/icu4c/source/test/intltest/messageformat2test_custom.cpp @@ -9,6 +9,7 @@ #include "plurrule_impl.h" #include "unicode/listformatter.h" +#include "unicode/numberformatter.h" #include "messageformat2test.h" #include "hash.h" #include "intltest.h" @@ -32,7 +33,9 @@ void TestMessageFormat2::testPersonFormatter(IcuTestErrorCode& errorCode) { CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) - .adoptFormatter(FunctionName("person"), new PersonNameFormatterFactory(), errorCode) + .adoptFunction(FunctionName("person"), + new PersonNameFunction(), + errorCode) .build()); UnicodeString name = "name"; LocalPointer person(new Person(UnicodeString("Mr."), UnicodeString("John"), UnicodeString("Doe"))); @@ -89,14 +92,18 @@ void TestMessageFormat2::testPersonFormatter(IcuTestErrorCode& errorCode) { .setExpected("Hello Mr. Doe") .setExpectSuccess() .build(); + TestUtils::runTestCase(*this, test, errorCode); + } void TestMessageFormat2::testCustomFunctionsComplexMessage(IcuTestErrorCode& errorCode) { CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) - .adoptFormatter(FunctionName("person"), new PersonNameFormatterFactory(), errorCode) + .adoptFunction(FunctionName("person"), + new PersonNameFunction(), + errorCode) .build()); UnicodeString host = "host"; UnicodeString hostGender = "hostGender"; @@ -182,6 +189,51 @@ void TestMessageFormat2::testCustomFunctionsComplexMessage(IcuTestErrorCode& err TestUtils::runTestCase(*this, test, errorCode); } +void TestMessageFormat2::testComplexOptions(IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) + .adoptFunction(FunctionName("noun"), + new NounFunction(), + errorCode) + .adoptFunction(FunctionName("adjective"), + new AdjectiveFunction(), + errorCode) + .build()); + UnicodeString name = "name"; + TestCase::Builder testBuilder; + testBuilder.setName("testComplexOptions"); + testBuilder.setLocale(Locale("en")); + testBuilder.setFunctionRegistry(&customRegistry); + + // Test that options can be values with their own resolved + // options attached + TestCase test = testBuilder.setPattern(".input {$item :noun case=accusative count=1} \ + .local $colorMatchingGrammaticalNumberGenderCase = {$color :adjective accord=$item} \ + {{{$colorMatchingGrammaticalNumberGenderCase}}}") + + .setArgument(UnicodeString("color"), UnicodeString("red")) + .setArgument(UnicodeString("item"), UnicodeString("balloon")) + .setExpected("red balloon (accusative, singular adjective)") + .build(); + TestUtils::runTestCase(*this, test, errorCode); + + // Test that the same noun can be used multiple times + test = testBuilder.setPattern(".input {$item :noun case=accusative count=1} \ + .local $colorMatchingGrammaticalNumberGenderCase = {$color :adjective accord=$item} \ + .local $sizeMatchingGrammaticalNumberGenderCase = {$size :adjective accord=$item} \ + {{{$colorMatchingGrammaticalNumberGenderCase}, {$sizeMatchingGrammaticalNumberGenderCase}}}") + + .setArgument(UnicodeString("color"), UnicodeString("red")) + .setArgument(UnicodeString("item"), UnicodeString("balloon")) + .setArgument(UnicodeString("size"), UnicodeString("huge")) + .setExpected("red balloon (accusative, singular adjective), \ +huge balloon (accusative, singular adjective)") + .build(); + TestUtils::runTestCase(*this, test, errorCode); + +} + void TestMessageFormat2::testCustomFunctions() { IcuTestErrorCode errorCode(*this, "testCustomFunctions"); @@ -190,50 +242,92 @@ void TestMessageFormat2::testCustomFunctions() { testGrammarCasesFormatter(errorCode); testListFormatter(errorCode); testMessageRefFormatter(errorCode); + testComplexOptions(errorCode); + testSingleEvaluation(errorCode); } // -------------- Custom function implementations -Formatter* PersonNameFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return nullptr; +static UnicodeString getStringOption(const FunctionOptionsMap& opt, + const UnicodeString& k) { + if (opt.count(k) == 0) { + return {}; + } + UErrorCode localErrorCode = U_ZERO_ERROR; + const message2::FunctionValue* optVal = opt.at(k); + if (optVal == nullptr) { + return {}; + } + const UnicodeString& formatted = optVal->formatToString(localErrorCode); + if (U_SUCCESS(localErrorCode)) { + return formatted; } + const UnicodeString& original = optVal->getOperand().getString(localErrorCode); + if (U_SUCCESS(localErrorCode)) { + return original; + } + return {}; +} + +static bool hasStringOption(const FunctionOptionsMap& opt, + const UnicodeString& k, const UnicodeString& v) { + return getStringOption(opt, k) == v; +} - // Locale not used - (void) locale; +LocalPointer PersonNameFunction::call(const FunctionContext& context, + const FunctionValue& arg, + const FunctionOptions& opts, + UErrorCode& errorCode) { + (void) context; - Formatter* result = new PersonNameFormatter(); - if (result == nullptr) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + LocalPointer v(new PersonNameValue(arg, std::move(opts), errorCode)); + if (!v.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } - return result; + return v; } -message2::FormattedPlaceholder PersonNameFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { +UnicodeString PersonNameValue::formatToString(UErrorCode& status) const { + (void) status; + return formattedString; +} + +PersonNameValue::PersonNameValue(const FunctionValue& arg, + const FunctionOptions& options, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - return {}; + return; } + operand = arg.getOperand(); + opts = options; - message2::FormattedPlaceholder errorVal = message2::FormattedPlaceholder("not a person"); - - if (!arg.canFormat() || arg.asFormattable().getType() != UFMT_OBJECT) { - return errorVal; + const Formattable* toFormat = &operand; + if (U_FAILURE(errorCode)) { + errorCode = U_MF_OPERAND_MISMATCH_ERROR; + return; } - const Formattable& toFormat = arg.asFormattable(); - FunctionOptionsMap opt = options.getOptions(); - bool hasFormality = opt.count("formality") > 0 && opt["formality"].getType() == UFMT_STRING; - bool hasLength = opt.count("length") > 0 && opt["length"].getType() == UFMT_STRING; + FunctionOptionsMap opt = opts.getOptions(); - bool useFormal = hasFormality && opt["formality"].getString(errorCode) == "formal"; - UnicodeString length = hasLength ? opt["length"].getString(errorCode) : "short"; + bool useFormal = hasStringOption(opt, "formality", "formal"); + UnicodeString length = getStringOption(opt, "length"); + if (length.length() == 0) { + length = "short"; + } - const FormattableObject* fp = toFormat.getObject(errorCode); - U_ASSERT(U_SUCCESS(errorCode)); + const FormattableObject* fp = toFormat->getObject(errorCode); + if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { + errorCode = U_MF_FORMATTING_ERROR; + return; + } if (fp == nullptr || fp->tag() != u"person") { - return errorVal; + errorCode = U_MF_FORMATTING_ERROR; + return; } const Person* p = static_cast(fp); @@ -241,58 +335,41 @@ message2::FormattedPlaceholder PersonNameFormatter::format(FormattedPlaceholder& UnicodeString firstName = p->firstName; UnicodeString lastName = p->lastName; - UnicodeString result; if (length == "long") { - result += title; - result += " "; - result += firstName; - result += " "; - result += lastName; + formattedString += title; + formattedString += " "; + formattedString += firstName; + formattedString += " "; + formattedString += lastName; } else if (length == "medium") { if (useFormal) { - result += firstName; - result += " "; - result += lastName; + formattedString += firstName; + formattedString += " "; + formattedString += lastName; } else { - result += title; - result += " "; - result += firstName; + formattedString += title; + formattedString += " "; + formattedString += firstName; } } else if (useFormal) { // Default to "short" length - result += title; - result += " "; - result += lastName; + formattedString += title; + formattedString += " "; + formattedString += lastName; } else { - result += firstName; + formattedString += firstName; } - - return FormattedPlaceholder(arg, FormattedValue(std::move(result))); } FormattableProperties::~FormattableProperties() {} Person::~Person() {} +PersonNameValue::~PersonNameValue() {} /* See ICU4J: CustomFormatterGrammarCaseTest.java */ -Formatter* GrammarCasesFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return nullptr; - } - - // Locale not used - (void) locale; - Formatter* result = new GrammarCasesFormatter(); - if (result == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return result; -} - - -/* static */ void GrammarCasesFormatter::getDativeAndGenitive(const UnicodeString& value, UnicodeString& result) const { +/* static */ void GrammarCasesValue::getDativeAndGenitive(const UnicodeString& value, UnicodeString& result) const { UnicodeString postfix; if (value.endsWith("ana")) { value.extract(0, value.length() - 3, postfix); @@ -316,49 +393,78 @@ Formatter* GrammarCasesFormatterFactory::createFormatter(const Locale& locale, U result += postfix; } -message2::FormattedPlaceholder GrammarCasesFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { +LocalPointer +GrammarCasesFunction::call(const FunctionContext& context, + const FunctionValue& arg, + const FunctionOptions& opts, + UErrorCode& errorCode) { + (void) context; + if (U_FAILURE(errorCode)) { - return {}; + return LocalPointer(); } - // Argument must be present - if (!arg.canFormat()) { - errorCode = U_MF_FORMATTING_ERROR; - return message2::FormattedPlaceholder("grammarBB"); + LocalPointer v(new GrammarCasesValue(arg, std::move(opts), errorCode)); + if (!v.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; } + return v; +} - // Assumes the argument is not-yet-formatted - const Formattable& toFormat = arg.asFormattable(); - UnicodeString result; +UnicodeString GrammarCasesValue::formatToString(UErrorCode& status) const { + (void) status; + return formattedString; +} - FunctionOptionsMap opt = options.getOptions(); - switch (toFormat.getType()) { +GrammarCasesValue::GrammarCasesValue(const FunctionValue& val, + const FunctionOptions& opts, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + + operand = val.getOperand(); + // Tests don't cover composition, so no need to merge options + const Formattable* toFormat = &operand; + + UnicodeString result; + const FunctionOptionsMap opt = opts.getOptions(); + switch (toFormat->getType()) { case UFMT_STRING: { - const UnicodeString& in = toFormat.getString(errorCode); + const UnicodeString& in = toFormat->getString(errorCode); bool hasCase = opt.count("case") > 0; - bool caseIsString = opt["case"].getType() == UFMT_STRING; - if (hasCase && caseIsString && (opt["case"].getString(errorCode) == "dative" || opt["case"].getString(errorCode) == "genitive")) { - getDativeAndGenitive(in, result); - } else { - result += in; + const Formattable& caseAsFormattable = opt.at("case")->getOperand(); + if (U_FAILURE(errorCode)) { + errorCode = U_MF_FORMATTING_ERROR; + return; + } + bool caseIsString = caseAsFormattable.getType() == UFMT_STRING; + if (hasCase && caseIsString) { + const UnicodeString& caseOpt = caseAsFormattable.getString(errorCode); + if (caseOpt == "dative" || caseOpt == "genitive") { + getDativeAndGenitive(in, result); + } + else { + result += in; + } } U_ASSERT(U_SUCCESS(errorCode)); break; } default: { - result += toFormat.getString(errorCode); + result += toFormat->getString(errorCode); break; } } - return message2::FormattedPlaceholder(arg, FormattedValue(std::move(result))); + formattedString = result; } void TestMessageFormat2::testGrammarCasesFormatter(IcuTestErrorCode& errorCode) { CHECK_ERROR(errorCode); MFFunctionRegistry customRegistry = MFFunctionRegistry::Builder(errorCode) - .adoptFormatter(FunctionName("grammarBB"), new GrammarCasesFormatterFactory(), errorCode) + .adoptFunction(FunctionName("grammarBB"), new GrammarCasesFunction(), errorCode) .build(); TestCase::Builder testBuilder; @@ -410,94 +516,103 @@ void TestMessageFormat2::testGrammarCasesFormatter(IcuTestErrorCode& errorCode) TestUtils::runTestCase(*this, test, errorCode); } +GrammarCasesValue::~GrammarCasesValue() {} + /* See ICU4J: CustomFormatterListTest.java */ -Formatter* ListFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { + +LocalPointer +ListFunction::call(const FunctionContext& context, + const FunctionValue& arg, + const FunctionOptions& opts, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - return nullptr; + return LocalPointer(); } - Formatter* result = new ListFormatter(locale); - if (result == nullptr) { + LocalPointer + v(new ListValue(context.getLocale(), arg, std::move(opts), errorCode)); + if (!v.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } - return result; + return v; } -message2::FormattedPlaceholder message2::ListFormatter::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { +UnicodeString ListValue::formatToString(UErrorCode& errorCode) const { + (void) errorCode; + + return formattedString; +} + +message2::ListValue::ListValue(const Locale& locale, + const FunctionValue& val, + const FunctionOptions& opts, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - return {}; + return; } - message2::FormattedPlaceholder errorVal = FormattedPlaceholder("listformat"); + operand = val.getOperand(); + // Tests don't cover composition, so no need to merge options - // Argument must be present - if (!arg.canFormat()) { - errorCode = U_MF_FORMATTING_ERROR; - return errorVal; + const Formattable* toFormat = &operand; + if (U_FAILURE(errorCode)) { + // Must have an argument + errorCode = U_MF_OPERAND_MISMATCH_ERROR; + return; } - // Assumes arg is not-yet-formatted - const Formattable& toFormat = arg.asFormattable(); - FunctionOptionsMap opt = options.getOptions(); - bool hasType = opt.count("type") > 0 && opt["type"].getType() == UFMT_STRING; + FunctionOptionsMap opt = opts.getOptions(); UListFormatterType type = UListFormatterType::ULISTFMT_TYPE_AND; - if (hasType) { - if (opt["type"].getString(errorCode) == "OR") { - type = UListFormatterType::ULISTFMT_TYPE_OR; - } else if (opt["type"].getString(errorCode) == "UNITS") { - type = UListFormatterType::ULISTFMT_TYPE_UNITS; - } + if (hasStringOption(opt, "type", "OR")) { + type = UListFormatterType::ULISTFMT_TYPE_OR; + } else if (hasStringOption(opt, "type", "UNITS")) { + type = UListFormatterType::ULISTFMT_TYPE_UNITS; } - bool hasWidth = opt.count("width") > 0 && opt["width"].getType() == UFMT_STRING; UListFormatterWidth width = UListFormatterWidth::ULISTFMT_WIDTH_WIDE; - if (hasWidth) { - if (opt["width"].getString(errorCode) == "SHORT") { - width = UListFormatterWidth::ULISTFMT_WIDTH_SHORT; - } else if (opt["width"].getString(errorCode) == "NARROW") { - width = UListFormatterWidth::ULISTFMT_WIDTH_NARROW; - } + if (hasStringOption(opt, "width", "SHORT")) { + width = UListFormatterWidth::ULISTFMT_WIDTH_SHORT; + } else if (hasStringOption(opt, "width", "NARROW")) { + width = UListFormatterWidth::ULISTFMT_WIDTH_NARROW; } - U_ASSERT(U_SUCCESS(errorCode)); LocalPointer lf(icu::ListFormatter::createInstance(locale, type, width, errorCode)); if (U_FAILURE(errorCode)) { - return {}; + return; } - UnicodeString result; - - switch (toFormat.getType()) { + switch (toFormat->getType()) { case UFMT_ARRAY: { int32_t n_items; - const Formattable* objs = toFormat.getArray(n_items, errorCode); + const Formattable* objs = toFormat->getArray(n_items, errorCode); if (U_FAILURE(errorCode)) { errorCode = U_MF_FORMATTING_ERROR; - return errorVal; + return; } UnicodeString* parts = new UnicodeString[n_items]; if (parts == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; - return {}; + return; } for (int32_t i = 0; i < n_items; i++) { parts[i] = objs[i].getString(errorCode); } U_ASSERT(U_SUCCESS(errorCode)); - lf->format(parts, n_items, result, errorCode); + lf->format(parts, n_items, formattedString, errorCode); delete[] parts; break; } default: { - result += toFormat.getString(errorCode); + formattedString += toFormat->getString(errorCode); U_ASSERT(U_SUCCESS(errorCode)); break; } } - - return FormattedPlaceholder(arg, FormattedValue(std::move(result))); } +ListValue::~ListValue() {} +ListFunction::~ListFunction() {} + void TestMessageFormat2::testListFormatter(IcuTestErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; @@ -511,10 +626,11 @@ void TestMessageFormat2::testListFormatter(IcuTestErrorCode& errorCode) { TestCase::Builder testBuilder; MFFunctionRegistry reg = MFFunctionRegistry::Builder(errorCode) - .adoptFormatter(FunctionName("listformat"), new ListFormatterFactory(), errorCode) + .adoptFunction(FunctionName("listformat"), new ListFunction(), errorCode) .build(); CHECK_ERROR(errorCode); + testBuilder.setLocale(Locale("en")); testBuilder.setFunctionRegistry(®); testBuilder.setArgument("languages", progLanguages, 3); @@ -568,66 +684,84 @@ void TestMessageFormat2::testListFormatter(IcuTestErrorCode& errorCode) { return nullptr; } -Formatter* ResourceManagerFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { +using Arguments = MessageArguments; + +static Arguments localToGlobal(const FunctionOptionsMap& opts, UErrorCode& status) { + if (U_FAILURE(status)) { + return {}; + } + std::map result; + for (auto iter = opts.cbegin(); iter != opts.cend(); ++iter) { + result[iter->first] = iter->second->getOperand(); + } + return MessageArguments(result, status); +} + +LocalPointer +ResourceManager::call(const FunctionContext&, + const FunctionValue& arg, + const FunctionOptions& options, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - return nullptr; + return LocalPointer(); } - Formatter* result = new ResourceManager(locale); - if (result == nullptr) { + LocalPointer + result(new ResourceManagerValue(arg, std::move(options), errorCode)); + + if (!result.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } return result; } -using Arguments = MessageArguments; - -static Arguments localToGlobal(const FunctionOptionsMap& opts, UErrorCode& status) { - if (U_FAILURE(status)) { - return {}; - } - return MessageArguments(opts, status); +UnicodeString message2::ResourceManagerValue::formatToString(UErrorCode&) const { + return formattedString; } -message2::FormattedPlaceholder ResourceManager::format(FormattedPlaceholder&& arg, FunctionOptions&& options, UErrorCode& errorCode) const { +message2::ResourceManagerValue::ResourceManagerValue(const FunctionValue& arg, + const FunctionOptions& opts, + UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - return {}; + return; } - message2::FormattedPlaceholder errorVal = message2::FormattedPlaceholder("msgref"); + operand = arg.getOperand(); + // Tests don't cover composition, so no need to merge options - // Argument must be present - if (!arg.canFormat()) { + const Formattable* toFormat = &operand; + // Check for null or fallback + if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { errorCode = U_MF_FORMATTING_ERROR; - return errorVal; + return; } - - // Assumes arg is not-yet-formatted - const Formattable& toFormat = arg.asFormattable(); UnicodeString in; - switch (toFormat.getType()) { + switch (toFormat->getType()) { case UFMT_STRING: { - in = toFormat.getString(errorCode); + in = toFormat->getString(errorCode); break; } default: { // Ignore non-strings - return errorVal; + return; } } - FunctionOptionsMap opt = options.getOptions(); - bool hasProperties = opt.count("resbundle") > 0 && opt["resbundle"].getType() == UFMT_OBJECT && opt["resbundle"].getObject(errorCode)->tag() == u"properties"; + FunctionOptionsMap opt = opts.getOptions(); + bool hasProperties = opt.count("resbundle") > 0 + && opt["resbundle"]->getOperand().getType() == UFMT_OBJECT + && opt["resbundle"]->getOperand().getObject(errorCode)->tag() == u"properties"; // If properties were provided, look up the given string in the properties, // yielding a message if (hasProperties) { - const FormattableProperties* properties = reinterpret_cast(opt["resbundle"].getObject(errorCode)); + const FormattableProperties* properties = reinterpret_cast + (opt["resbundle"]->getOperand().getObject(errorCode)); U_ASSERT(U_SUCCESS(errorCode)); UnicodeString* msg = static_cast(properties->properties->get(in)); if (msg == nullptr) { // No message given for this key -- error out errorCode = U_MF_FORMATTING_ERROR; - return errorVal; + return; } MessageFormatter::Builder mfBuilder(errorCode); UParseError parseErr; @@ -635,7 +769,7 @@ message2::FormattedPlaceholder ResourceManager::format(FormattedPlaceholder&& ar MessageFormatter mf = mfBuilder.setPattern(*msg, parseErr, errorCode).build(errorCode); Arguments arguments = localToGlobal(opt, errorCode); if (U_FAILURE(errorCode)) { - return errorVal; + return; } UErrorCode savedStatus = errorCode; @@ -646,14 +780,16 @@ message2::FormattedPlaceholder ResourceManager::format(FormattedPlaceholder&& ar if (U_FAILURE(errorCode)) { errorCode = savedStatus; } - return FormattedPlaceholder(arg, FormattedValue(std::move(result))); + formattedString = result; } else { // Properties must be provided errorCode = U_MF_FORMATTING_ERROR; } - return errorVal; + return; } +ResourceManager::~ResourceManager() {} +ResourceManagerValue::~ResourceManagerValue() {} void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { CHECK_ERROR(errorCode); @@ -666,7 +802,7 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { return; } MFFunctionRegistry reg = MFFunctionRegistry::Builder(errorCode) - .adoptFormatter(FunctionName("msgRef"), new ResourceManagerFactory(), errorCode) + .adoptFunction(FunctionName("msgRef"), new ResourceManager(), errorCode) .build(); CHECK_ERROR(errorCode); @@ -727,6 +863,200 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { TestUtils::runTestCase(*this, test, errorCode); } +LocalPointer +NounFunction::call(const FunctionContext&, + const FunctionValue& arg, + const FunctionOptions& opts, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + + LocalPointer + v(new NounValue(arg, std::move(opts), errorCode)); + if (!v.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return v; +} + +UnicodeString NounValue::formatToString(UErrorCode& status) const { + (void) status; + + return formattedString; +} + +NounValue::NounValue(const FunctionValue& arg, + const FunctionOptions& options, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + + operand = arg.getOperand(); + opts = options.mergeOptions(arg.getResolvedOptions(), errorCode); + + const Formattable* toFormat = &operand; + FunctionOptionsMap opt = opts.getOptions(); + + // very simplified example + bool useAccusative = hasStringOption(opt, "case", "accusative"); + bool useSingular = hasStringOption(opt, "count", "1"); + const UnicodeString& noun = toFormat->getString(errorCode); + if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { + errorCode = U_MF_FORMATTING_ERROR; + return; + } + + if (useAccusative) { + if (useSingular) { + formattedString = noun + " accusative, singular noun"; + } else { + formattedString = noun + " accusative, plural noun"; + } + } else { + if (useSingular) { + formattedString = noun + " dative, singular noun"; + } else { + formattedString = noun + " dative, plural noun"; + } + } +} + +LocalPointer +AdjectiveFunction::call(const FunctionContext&, + const FunctionValue& arg, + const FunctionOptions& opts, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + + LocalPointer + v(new AdjectiveValue(arg, std::move(opts), errorCode)); + if (!v.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return v; +} + +UnicodeString AdjectiveValue::formatToString(UErrorCode& status) const { + (void) status; + + return formattedString; +} + +AdjectiveValue::AdjectiveValue(const FunctionValue& arg, + const FunctionOptions& options, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + + operand = arg.getOperand(); + opts = options.mergeOptions(arg.getResolvedOptions(), errorCode); + + const Formattable* toFormat = &operand; + + const FunctionOptionsMap opt = opts.getOptions(); + // Return empty string if no accord is provided + if (opt.count("accord") <= 0) { + return; + } + + const FunctionValue& accordOpt = *opt.at("accord"); + const Formattable& accordSrc = accordOpt.getOperand(); + UnicodeString accord = accordSrc.getString(errorCode); + const UnicodeString& adjective = toFormat->getString(errorCode); + if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { + errorCode = U_MF_FORMATTING_ERROR; + return; + } + + formattedString = adjective + " " + accord; + // very simplified example + FunctionOptionsMap accordOptionsMap = accordOpt.getResolvedOptions().getOptions(); + bool accordIsAccusative = hasStringOption(accordOptionsMap, "case", "accusative"); + bool accordIsSingular = hasStringOption(accordOptionsMap, "count", "1"); + if (accordIsAccusative) { + if (accordIsSingular) { + formattedString += " (accusative, singular adjective)"; + } else { + formattedString += " (accusative, plural adjective)"; + } + } else { + if (accordIsSingular) { + formattedString += " (dative, singular adjective)"; + } else { + formattedString += " (dative, plural adjective)"; + } + } +} + +NounFunction::~NounFunction() {} +AdjectiveFunction::~AdjectiveFunction() {} +NounValue::~NounValue() {} +AdjectiveValue::~AdjectiveValue() {} + +void TestMessageFormat2::testSingleEvaluation(IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + MFFunctionRegistry customRegistry(MFFunctionRegistry::Builder(errorCode) + .adoptFunction(FunctionName("counter"), + new CounterFunction(), + errorCode) + .build()); + UnicodeString name = "name"; + TestCase::Builder testBuilder; + testBuilder.setName("testSingleEvaluation"); + testBuilder.setLocale(Locale("en")); + testBuilder.setFunctionRegistry(&customRegistry); + + // Test that the RHS of each declaration is evaluated at most once + TestCase test = testBuilder.setPattern(".local $x = {:counter}\ + {{{$x} {$x}}}") + .setExpected("1 1") + .build(); + TestUtils::runTestCase(*this, test, errorCode); +} + +LocalPointer +CounterFunction::call(const FunctionContext&, + const FunctionValue& arg, + const FunctionOptions& opts, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return LocalPointer(); + } + + LocalPointer + v(new CounterFunctionValue(count, arg, std::move(opts), errorCode)); + if (!v.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + count++; + return v; +} + +CounterFunctionValue::CounterFunctionValue(int32_t& c, + const FunctionValue&, + const FunctionOptions&, + UErrorCode&) : count(c) { + // No operand, no options +} + +UnicodeString CounterFunctionValue::formatToString(UErrorCode& status) const { + if (U_FAILURE(status)) { + return {}; + } + number::UnlocalizedNumberFormatter nf = number::NumberFormatter::with(); + number::FormattedNumber formattedNumber = nf.locale("en-US").formatInt(count, status); + return formattedNumber.toString(status); +} + +CounterFunction::~CounterFunction() {} +CounterFunctionValue::~CounterFunctionValue() {} + #endif /* #if !UCONFIG_NO_MF2 */ #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/messageformat2test_read_json.cpp b/icu4c/source/test/intltest/messageformat2test_read_json.cpp index ddf93da632ce..5ef9be49c710 100644 --- a/icu4c/source/test/intltest/messageformat2test_read_json.cpp +++ b/icu4c/source/test/intltest/messageformat2test_read_json.cpp @@ -309,12 +309,14 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) { runTestsFromJsonFile(*this, "spec/functions/time.json", errorCode); // Other tests (non-spec) + // TODO: move this into the spec tests when + // https://github.com/unicode-org/message-format-wg/pull/846 lands + runTestsFromJsonFile(*this, "u-options.json", errorCode); runTestsFromJsonFile(*this, "more-functions.json", errorCode); runTestsFromJsonFile(*this, "valid-tests.json", errorCode); runTestsFromJsonFile(*this, "resolution-errors.json", errorCode); runTestsFromJsonFile(*this, "matches-whitespace.json", errorCode); runTestsFromJsonFile(*this, "alias-selector-annotations.json", errorCode); - runTestsFromJsonFile(*this, "runtime-errors.json", errorCode); // Re: the expected output for the first test in this file: // Note: the more "correct" fallback output seems like it should be "1.000 3" (ignoring the diff --git a/icu4c/source/test/intltest/messageformat2test_utils.h b/icu4c/source/test/intltest/messageformat2test_utils.h index c4ad251c7f48..c2f40767b3c5 100644 --- a/icu4c/source/test/intltest/messageformat2test_utils.h +++ b/icu4c/source/test/intltest/messageformat2test_utils.h @@ -274,6 +274,9 @@ class TestUtils { if (!testCase.lineNumberAndOffsetMatch(parseError.line, parseError.offset)) { failWrongOffset(tmsg, testCase, parseError.line, parseError.offset); } + if (testCase.expectSuccess() && !testCase.outputMatches(result)) { + failWrongOutput(tmsg, testCase, result); + } if (U_FAILURE(errorCode) && !testCase.expectSuccess() && testCase.expectedErrorCode() != U_MF_SYNTAX_ERROR) { // Re-run the formatter if there was an error, diff --git a/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json b/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json index 6d78ffe4f04d..4a4d6eb1529d 100644 --- a/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json +++ b/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json @@ -129,7 +129,7 @@ ], "exp": "Hello John, you want '9:43 PM', 'August 3, 2024 at 9:43 PM', or '8/3/24, 9:43:57 PM Pacific Daylight Time' or even 'Saturday, August 3, 2024 at 9:43 PM'?", "params": {"exp": { "date": 1722746637000 }, "user": "John", "tsOver" : "long" }, - "ignoreTest": "ICU-22754 ICU4C doesn't implement this kind of function composition yet. See https://github.com/unicode-org/message-format-wg/issues/515" + "ignoreTest": "timeStyle=long should print 'PDT', not 'Pacific Daylight Time'?" }, { "srcs": [ diff --git a/testdata/message2/duplicate-declarations.json b/testdata/message2/duplicate-declarations.json index cd3acc1576d3..b744365f51f1 100644 --- a/testdata/message2/duplicate-declarations.json +++ b/testdata/message2/duplicate-declarations.json @@ -12,32 +12,26 @@ "tests": [ { "src": ".local $foo = {$foo} .local $foo = {42} {{bar {$foo}}}", - "params": [{ "name": "foo", "value": "foo" }], - "exp": "bar 42" + "params": [{ "name": "foo", "value": "foo" }] }, { "src": ".local $foo = {42} .local $foo = {42} {{bar {$foo}}}", - "params": [{ "name": "foo", "value": "foo" }], - "exp": "bar 42" + "params": [{ "name": "foo", "value": "foo" }] }, { "src": ".local $foo = {:unknown} .local $foo = {42} {{bar {$foo}}}", - "params": [{ "name": "foo", "value": "foo" }], - "exp": "bar 42" + "params": [{ "name": "foo", "value": "foo" }] }, { - "src": ".local $x = {42} .local $y = {$x} .local $x = {13} {{{$x} {$y}}}", - "exp": "13 42" + "src": ".local $x = {42} .local $y = {$x} .local $x = {13} {{{$x} {$y}}}" }, { "src": ".local $foo = {$foo} {{bar {$foo}}}", - "params": [{ "name": "foo", "value": "foo" }], - "exp": "bar foo" + "params": [{ "name": "foo", "value": "foo" }] }, { "src": ".local $foo = {$bar} .local $bar = {$baz} {{bar {$foo}}}", - "params": [{ "name": "baz", "value": "foo" }], - "exp": "bar {$bar}" + "params": [{ "name": "baz", "value": "foo" }] } ] } diff --git a/testdata/message2/icu-test-functions.json b/testdata/message2/icu-test-functions.json index a97446addf0e..4f4be286537f 100644 --- a/testdata/message2/icu-test-functions.json +++ b/testdata/message2/icu-test-functions.json @@ -119,6 +119,15 @@ "exp": "Expires at 7:23:45 PM GMT+03:30", "ignoreCpp": "ICU-22754 Time zones not working yet (bug)" }, + { + "comment": "Horibly long, but I really wanted to test multiple declarations with overrides, and you can't join strings in JSON", + "src": [ + ".input {$exp :datetime timeStyle=short}\n", + "{{Hello John, or even '{$exp :datetime dateStyle=full}'?}}" + ], + "exp": "Hello John, or even 'Saturday, August 3, 2024 at 9:43 PM'?", + "params": [{"name": "exp", "value": { "date": 1722746637000 }}] + }, { "comment": "Horibly long, but I really wanted to test multiple declarations with overrides, and you can't join strings in JSON", "src": [ @@ -131,8 +140,7 @@ "exp": "Hello John, you want '9:43 PM', 'August 3, 2024 at 9:43 PM', or '8/3/24, 9:43:57 PM Pacific Daylight Time' or even 'Saturday, August 3, 2024 at 9:43 PM'?", "params": [{"name": "exp", "value": { "date": 1722746637000 }}, {"name": "user", "value": "John"}, - {"name": "tsOver", "value": "full" }], - "ignoreCpp": "ICU-22754 ICU4C doesn't implement this kind of function composition yet. See https://github.com/unicode-org/message-format-wg/issues/515" + {"name": "tsOver", "value": "full" }] }, { "src": [ diff --git a/testdata/message2/more-functions.json b/testdata/message2/more-functions.json index b34803635ce9..7d21b69e93eb 100644 --- a/testdata/message2/more-functions.json +++ b/testdata/message2/more-functions.json @@ -112,6 +112,35 @@ "exp": "Default number: 1.234.567.890.123.456.789,987654!", "locale": "ro", "params": [{ "name": "val", "value": {"decimal": "1234567890123456789.987654321"} }] + }, + { + "src": ".local $x = {42 :number minimumFractionDigits=2} .local $y = {$x :number minimumFractionDigits=5} {{{$x} {$y}}}", + "exp": "42.00 42.00000", + "locale": "en" + }, + { + "src": ".local $x = {42 :number minimumFractionDigits=5} .local $y = {$x :number minimumFractionDigits=2} {{{$x} {$y}}}", + "exp": "42.00000 42.00", + "locale": "en" + }, + { + "src": ".local $x = {42 :number minimumFractionDigits=5} .local $y = {$x :number minimumIntegerDigits=3} {{{$x} {$y}}}", + "exp": "42.00000 042.00000", + "locale": "en" + }, + { + "comment": "Modified from icu4j test", + "src": [ + ".input {$exp :datetime timeStyle=short}\n", + ".input {$user :string}\n", + ".local $longExp = {$exp :datetime dateStyle=long}\n", + ".local $zooExp = {$exp :datetime dateStyle=short timeStyle=$tsOver}\n", + "{{Hello John, you want '{$exp}', '{$longExp}', or '{$zooExp}' or even '{$exp :datetime dateStyle=full}'?}}" + ], + "exp": "Hello John, you want '9:43 PM', 'August 3, 2024 at 9:43 PM', or '8/3/24, 9:43:57 PM Pacific Daylight Time' or even 'Saturday, August 3, 2024 at 9:43 PM'?", + "params": [{"name": "exp", "value": { "date": 1722746637000 }}, + {"name": "user", "value": "John"}, + {"name": "tsOver", "value" : "full" }] } ] } diff --git a/testdata/message2/runtime-errors.json b/testdata/message2/runtime-errors.json deleted file mode 100644 index b1bb0cd491a0..000000000000 --- a/testdata/message2/runtime-errors.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "scenario": "Runtime errors", - "description": "Tests for bad-selector and bad-operand errors", - "defaultTestProperties": { - "locale": "en-US" - }, - "tests": [ - { - "src": ".match {|horse| :date}\n 1 {{The value is one.}}\n * {{Formatter used as selector.}}", - "exp": "Formatter used as selector.", - "expErrors": [{"type": "bad-selector"}], - "ignoreJava": "ICU4J doesn't signal runtime errors?" - }, - { - "src": ".match {|horse| :number}\n 1 {{The value is one.}}\n * {{horse is not a number.}}", - "exp": "horse is not a number.", - "expErrors": [{"type": "bad-selector"}], - "ignoreJava": "ICU4J doesn't signal runtime errors?" - }, - { - "src": ".local $sel = {|horse| :number}\n .match {$sel}\n 1 {{The value is one.}}\n * {{horse is not a number.}}", - "exp": "horse is not a number.", - "expErrors": [{"type": "bad-selector"}], - "ignoreJava": "ICU4J doesn't signal runtime errors?" - } - ] -} diff --git a/testdata/message2/spec/functions/integer.json b/testdata/message2/spec/functions/integer.json index c8e75077a221..8f386338f3ec 100644 --- a/testdata/message2/spec/functions/integer.json +++ b/testdata/message2/spec/functions/integer.json @@ -27,6 +27,10 @@ } ], "exp": "one" + }, + { + "src": ".local $x = {1.25 :integer} .local $y = {$x :number} {{{$x}}}", + "exp": "1" } ] } diff --git a/testdata/message2/tricky-declarations.json b/testdata/message2/tricky-declarations.json index 3fded666e633..eb7b5ac90add 100644 --- a/testdata/message2/tricky-declarations.json +++ b/testdata/message2/tricky-declarations.json @@ -6,12 +6,10 @@ }, "tests": [ { "src": ".input {$var :number minimumFractionDigits=$var2} .input {$var2 :number minimumFractionDigits=5} {{{$var} {$var2}}}", - "exp": "1.000 3.00000", "params": [{ "name": "var", "value": 1}, {"name": "var2", "value": 3 }], "expErrors": [{ "type": "duplicate-declaration" }] }, { "src": ".local $var = {$var2} .local $var2 = {1} {{{$var} {$var2}}}", - "exp": "5 1", "params": [{ "name": "var2", "value": 5 }], "expErrors": [{ "type": "duplicate-declaration" }] } diff --git a/testdata/message2/u-options.json b/testdata/message2/u-options.json new file mode 100644 index 000000000000..3e13b30a2479 --- /dev/null +++ b/testdata/message2/u-options.json @@ -0,0 +1,126 @@ +{ + "$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json", + "scenario": "u: Options", + "description": "Common options affecting the function context", + "defaultTestProperties": { + "locale": "en-US" + }, + "tests": [ + { + "src": "{#tag u:id=x}content{/ns:tag u:id=x}", + "exp": "content", + "expParts": [ + { + "type": "markup", + "kind": "open", + "id": "x", + "name": "tag" + }, + { + "type": "literal", + "value": "content" + }, + { + "type": "markup", + "kind": "close", + "id": "x", + "name": "tag" + } + ] + }, + { + "src": "{#tag u:dir=rtl u:locale=ar}content{/ns:tag}", + "exp": "content", + "expErrors": [{ "type": "bad-option" }, { "type": "bad-option" }], + "expParts": [ + { + "type": "markup", + "kind": "open", + "name": "tag" + }, + { + "type": "literal", + "value": "content" + }, + { + "type": "markup", + "kind": "close", + "name": "tag" + } + ] + }, + { + "src": "hello {4.2 :number u:locale=fr}", + "exp": "hello 4,2" + }, + { + "src": "hello {world :string u:dir=ltr u:id=foo}", + "exp": "hello world", + "expParts": [ + { + "type": "literal", + "value": "hello " + }, + { + "type": "string", + "source": "|world|", + "dir": "ltr", + "id": "foo", + "value": "world" + } + ] + }, + { + "src": "hello {world :string u:dir=rtl}", + "exp": "hello \u2067world\u2069", + "expParts": [ + { + "type": "literal", + "value": "hello " + }, + { + "type": "string", + "source": "|world|", + "dir": "rtl", + "value": "world" + } + ] + }, + { + "src": "hello {world :string u:dir=auto}", + "exp": "hello \u2068world\u2069", + "expParts": [ + { + "type": "literal", + "value": "hello " + }, + { + "type": "string", + "source": "|world|", + "dir": "auto", + "value": "world" + } + ] + }, + { + "locale": "ar", + "src": "أهلاً {بالعالم :string u:dir=rtl}", + "exp": "أهلاً \u2067بالعالم\u2069" + }, + { + "locale": "ar", + "src": "أهلاً {بالعالم :string u:dir=auto}", + "exp": "أهلاً \u2068بالعالم\u2069" + }, + { + "locale": "ar", + "src": "أهلاً {world :string u:dir=ltr}", + "exp": "أهلاً \u2066world\u2069" + }, + { + "locale": "ar", + "src": "أهلاً {بالعالم :string}", + "exp": "أهلاً \u2067بالعالم\u2069" + } + ] +}