ICU 77.1  77.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
messageformat2.h
Go to the documentation of this file.
1 // © 2024 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #ifndef MESSAGEFORMAT2_H
7 #define MESSAGEFORMAT2_H
8 
9 #if U_SHOW_CPLUSPLUS_API
10 
11 #if !UCONFIG_NO_NORMALIZATION
12 
13 #if !UCONFIG_NO_FORMATTING
14 
15 #if !UCONFIG_NO_MF2
16 
23 #include "unicode/messageformat2_data_model.h"
24 #include "unicode/messageformat2_function_registry.h"
25 #include "unicode/normalizer2.h"
26 #include "unicode/unistr.h"
27 
28 #ifndef U_HIDE_DEPRECATED_API
29 
30 U_NAMESPACE_BEGIN
31 
32 namespace message2 {
33 
34  class Environment;
35  class MessageContext;
36  class StaticErrors;
37  class InternalValue;
38 
55  // Note: This class does not currently inherit from the existing
56  // `Format` class.
57  public:
65  MessageFormatter& operator=(MessageFormatter&&) noexcept;
72  virtual ~MessageFormatter();
73 
88  UnicodeString formatToString(const MessageArguments& arguments, UErrorCode &status);
89 
105  FormattedMessage format(const MessageArguments& arguments, UErrorCode &status) const {
106  (void) arguments;
107  if (U_SUCCESS(status)) {
108  status = U_UNSUPPORTED_ERROR;
109  }
110  return FormattedMessage(status);
111  }
112 
121  const Locale& getLocale() const { return locale; }
122 
132  UnicodeString getPattern() const;
133 
143  const MFDataModel& getDataModel() const;
144 
159  U_MF_BEST_EFFORT = 0,
167  U_MF_STRICT
168  } UMFErrorHandlingBehavior;
169 
176  class U_I18N_API Builder : public UObject {
177  private:
178  friend class MessageFormatter;
179 
180  // The pattern to be parsed to generate the formatted message
181  UnicodeString pattern;
182  bool hasPattern = false;
183  bool hasDataModel = false;
184  // The data model to be used to generate the formatted message
185  // Initialized either by `setDataModel()`, or by the parser
186  // through a call to `setPattern()`
187  MFDataModel dataModel;
188  // Normalized representation of the pattern;
189  // ignored if `setPattern()` wasn't called
190  UnicodeString normalizedInput;
191  // Errors (internal representation of parse errors)
192  // Ignored if `setPattern()` wasn't called
193  StaticErrors* errors;
194  Locale locale;
195  // Not owned
196  const MFFunctionRegistry* customMFFunctionRegistry;
197  // Error behavior; see comment in `MessageFormatter` class
198  bool signalErrors = false;
199 
200  void clearState();
201  public:
211  Builder& setLocale(const Locale& locale);
227  Builder& setPattern(const UnicodeString& pattern, UParseError& parseError, UErrorCode& status);
241  Builder& setFunctionRegistry(const MFFunctionRegistry& functionRegistry);
251  Builder& setDataModel(MFDataModel&& dataModel);
281  Builder& setErrorHandlingBehavior(UMFErrorHandlingBehavior type);
296  MessageFormatter build(UErrorCode& status) const;
308  Builder(UErrorCode& status);
315  virtual ~Builder();
316  }; // class MessageFormatter::Builder
317 
318  // TODO: Shouldn't be public; only used for testing
327  const UnicodeString& getNormalizedPattern() const { return normalizedInput; }
328 
329  private:
330  friend class Builder;
331  friend class Checker;
332  friend class MessageArguments;
333  friend class MessageContext;
334 
335  MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &status);
336 
337  MessageFormatter() = delete; // default constructor not implemented
338 
339  // Do not define default assignment operator
340  const MessageFormatter &operator=(const MessageFormatter &) = delete;
341 
342  // Selection methods
343 
344  // Takes a vector of FormattedPlaceholders
345  void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const;
346  // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output)
347  void filterVariants(const UVector&, UVector&, UErrorCode&) const;
348  // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output)
349  void sortVariants(const UVector&, UVector&, UErrorCode&) const;
350  // Takes a vector of strings (input) and a vector of strings (output)
351  void matchSelectorKeys(const UVector&, MessageContext&, InternalValue* rv, UVector&, UErrorCode&) const;
352  // Takes a vector of FormattedPlaceholders (input),
353  // and a vector of vectors of strings (output)
354  void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const;
355 
356  // Formatting methods
357 
358  // Used for normalizing variable names and keys for comparison
359  UnicodeString normalizeNFC(const UnicodeString&) const;
360  [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const;
361  void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
362  // Evaluates a function call
363  // Dispatches on argument type
364  [[nodiscard]] InternalValue* evalFunctionCall(FormattedPlaceholder&& argument,
365  MessageContext& context,
366  UErrorCode& status) const;
367  // Dispatches on function name
368  [[nodiscard]] InternalValue* evalFunctionCall(const FunctionName& functionName,
369  InternalValue* argument,
370  FunctionOptions&& options,
371  MessageContext& context,
372  UErrorCode& status) const;
373  // Formats an expression that appears in a pattern or as the definition of a local variable
374  [[nodiscard]] InternalValue* formatExpression(const Environment&,
375  const data_model::Expression&,
376  MessageContext&,
377  UErrorCode&) const;
378  [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const;
379  [[nodiscard]] InternalValue* formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const;
380  [[nodiscard]] FormattedPlaceholder evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const;
381  void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const;
382 
383  // Function registry methods
384  bool hasCustomMFFunctionRegistry() const {
385  return (customMFFunctionRegistry != nullptr);
386  }
387 
388  // Precondition: custom function registry exists
389  // Note: this is non-const because the values in the MFFunctionRegistry are mutable
390  // (a FormatterFactory can have mutable state)
391  const MFFunctionRegistry& getCustomMFFunctionRegistry() const;
392 
393  bool isCustomFormatter(const FunctionName&) const;
394  FormatterFactory* lookupFormatterFactory(const FunctionName&, UErrorCode& status) const;
395  bool isBuiltInSelector(const FunctionName&) const;
396  bool isBuiltInFormatter(const FunctionName&) const;
397  bool isCustomSelector(const FunctionName&) const;
398  const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const;
399  bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); }
400  bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); }
401  const Formatter* lookupFormatter(const FunctionName&, UErrorCode&) const;
402 
403  Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const;
404  Formatter* getFormatter(const FunctionName&, UErrorCode&) const;
405  bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const;
406 
407  // Checking for resolution errors
408  void checkDeclarations(MessageContext&, Environment*&, UErrorCode&) const;
409  void check(MessageContext&, const Environment&, const data_model::Expression&, UErrorCode&) const;
410  void check(MessageContext&, const Environment&, const data_model::Operand&, UErrorCode&) const;
411  void check(MessageContext&, const Environment&, const OptionMap&, UErrorCode&) const;
412 
413  void initErrors(UErrorCode&);
414  void clearErrors() const;
415  void cleanup() noexcept;
416 
417  // The locale this MessageFormatter was created with
418  /* const */ Locale locale;
419 
420  // Registry for built-in functions
421  MFFunctionRegistry standardMFFunctionRegistry;
422  // Registry for custom functions; may be null if no custom registry supplied
423  // Note: this is *not* owned by the MessageFormatter object
424  // The reason for this choice is to have a non-destructive MessageFormatter::Builder,
425  // while also not requiring the function registry to be deeply-copyable. Making the
426  // function registry copyable would impose a requirement on any implementations
427  // of the FormatterFactory and SelectorFactory interfaces to implement a custom
428  // clone() method, which is necessary to avoid sharing between copies of the
429  // function registry (and thus double-frees)
430  // Not deeply immutable (the values in the function registry are mutable,
431  // as a FormatterFactory can have mutable state
432  const MFFunctionRegistry* customMFFunctionRegistry;
433 
434  // Data model, representing the parsed message
435  MFDataModel dataModel;
436 
437  // Normalized version of the input string (optional whitespace removed)
438  UnicodeString normalizedInput;
439 
440  // Errors -- only used while parsing and checking for data model errors; then
441  // the MessageContext keeps track of errors
442  // Must be a raw pointer to avoid including the internal header file
443  // defining StaticErrors
444  // Owned by `this`
445  StaticErrors* errors = nullptr;
446 
447  // Error handling behavior.
448  // If true, then formatting methods set their UErrorCode arguments
449  // to signal MessageFormat errors, and no useful output is returned.
450  // If false, then MessageFormat errors are not signaled and the
451  // formatting methods return best-effort output.
452  // The default is false.
453  bool signalErrors = false;
454 
455  // Used for implementing normalizeNFC()
456  const Normalizer2* nfcNormalizer = nullptr;
457 
458  }; // class MessageFormatter
459 
460 } // namespace message2
461 
462 U_NAMESPACE_END
463 
464 #endif // U_HIDE_DEPRECATED_API
465 
466 #endif /* #if !UCONFIG_NO_MF2 */
467 
468 #endif /* #if !UCONFIG_NO_FORMATTING */
469 
470 #endif /* #if !UCONFIG_NO_NORMALIZATION */
471 
472 #endif /* U_SHOW_CPLUSPLUS_API */
473 
474 #endif // MESSAGEFORMAT2_H
475 
476 // eof
The mutable Builder class allows each part of the MessageFormatter to be initialized separately; call...
#define U_SUCCESS(x)
Does the error code indicate success?
Definition: utypes.h:743
const Locale & getLocale() const
Accesses the locale that this MessageFormatter object was created with.
C++ API: Unicode String.
The MessageArguments class represents the named arguments to a message.
const UnicodeString & getNormalizedPattern() const
Returns a string consisting of the input with optional spaces removed.
C++ API: Formats messages using the draft MessageFormat 2.0.
C++ API: New API for Unicode Normalization.
The Literal class corresponds to the literal nonterminal in the MessageFormat 2 grammar, https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf and the Literal interface defined in // https://github.com/unicode-org/message-format-wg/blob/main/spec/data-model.md#expressions.
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:316
Requested operation not supported in current context.
Definition: utypes.h:482
The MFDataModel class describes a parsed representation of the text of a message. ...
UMFErrorHandlingBehavior
Used in conjunction with the MessageFormatter::Builder::setErrorHandlingBehavior() method...
The Operand class corresponds to the operand nonterminal in the MessageFormat 2 grammar, https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf .
The Expression class corresponds to the expression nonterminal in the MessageFormat 2 grammar and the...
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:430
Not yet implemented: The result of a message formatting operation.
Structure encapsulating named options passed to a custom selector or formatter.
FormattedMessage format(const MessageArguments &arguments, UErrorCode &status) const
Not yet implemented; formats the message to a FormattedMessage object, using the data model that was ...
A FormattablePlaceholder encapsulates an input value (a message2::Formattable) together with an optio...
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
Basic definitions for ICU, for both C and C++ APIs.
A Pattern is a sequence of formattable parts.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:295
Defines mappings from names of formatters and selectors to functions implementing them...
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:195