From ed1d335d535d5a6b8600d156d5f1c65942c8339c Mon Sep 17 00:00:00 2001 From: Greg Lindhorst Date: Wed, 10 Jul 2024 12:46:11 -0700 Subject: [PATCH 1/4] Updates --- .../Public/Values/StringValue.cs | 5 - .../Functions/LibraryOperators.cs | 23 +- .../Functions/LibraryTable.cs | 28 ++- .../ExpressionTestCases/Culture_en-US.txt | 178 +++++++++++++++ .../ExpressionTestCases/Culture_tr-TR.txt | 207 ++++++++++++++++++ .../ExpressionTestHelpers/TestRunner.cs | 1 + .../TestRunnerTests/InternalSetup.cs | 20 ++ .../Helpers/AsyncVerify.cs | 5 + .../PowerFxEvaluationTests.cs | 5 + src/tools/Repl/Program.cs | 29 +++ 10 files changed, 476 insertions(+), 25 deletions(-) create mode 100644 src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_en-US.txt create mode 100644 src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_tr-TR.txt diff --git a/src/libraries/Microsoft.PowerFx.Core/Public/Values/StringValue.cs b/src/libraries/Microsoft.PowerFx.Core/Public/Values/StringValue.cs index 2dabf96f14..e4ff4a41d8 100644 --- a/src/libraries/Microsoft.PowerFx.Core/Public/Values/StringValue.cs +++ b/src/libraries/Microsoft.PowerFx.Core/Public/Values/StringValue.cs @@ -36,11 +36,6 @@ public override void Visit(IValueVisitor visitor) visitor.Visit(this); } - internal StringValue ToLower() - { - return new StringValue(IRContext.NotInSource(FormulaType.String), Value.ToLowerInvariant()); - } - public override void ToExpression(StringBuilder sb, FormulaValueSerializerSettings settings) { sb.Append($"\"{CharacterUtils.ExcelEscapeString(Value)}\""); diff --git a/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryOperators.cs b/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryOperators.cs index 0abd961927..db7bcabb78 100644 --- a/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryOperators.cs +++ b/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryOperators.cs @@ -2,6 +2,7 @@ // Licensed under the MIT license. using System; +using System.Globalization; using System.Linq; using System.Text; using Microsoft.PowerFx.Core.IR; @@ -720,9 +721,9 @@ private static BooleanValue NotEqualPolymorphic(IRContext irContext, FormulaValu } // See in_SS in JScript membershipReplacementFunctions - public static Func StringInOperator(bool exact) + public static Func StringInOperator(bool exact) { - return (irContext, args) => + return (services, irContext, args) => { var left = args[0]; var right = args[1]; @@ -738,23 +739,25 @@ public static Func StringInOperator(boo var leftStr = (StringValue)left; var rightStr = (StringValue)right; - - return new BooleanValue(irContext, rightStr.Value.IndexOf(leftStr.Value, exact ? StringComparison.Ordinal : StringComparison.OrdinalIgnoreCase) >= 0); + + return new BooleanValue(irContext, services.GetService().CompareInfo.IndexOf(rightStr.Value, leftStr.Value, exact ? CompareOptions.Ordinal : CompareOptions.IgnoreCase) >= 0); }; } // Left is a scalar. Right is a single-column table. // See in_ST() - public static Func InScalarTableOperator(bool exact) + public static Func InScalarTableOperator(bool exact) { - return (irContext, args) => + return (services, irContext, args) => { var left = args[0]; - var right = args[1]; - + var right = args[1]; + + var cultureInfo = services.GetService(); + if (!exact && left is StringValue strLhs) { - left = strLhs.ToLower(); + left = new StringValue(IRContext.NotInSource(FormulaType.String), cultureInfo.TextInfo.ToLower(strLhs.Value)); } var source = (TableValue)right; @@ -767,7 +770,7 @@ public static Func InScalarTableOperato if (!exact && rhs is StringValue strRhs) { - rhs = strRhs.ToLower(); + rhs = new StringValue(IRContext.NotInSource(FormulaType.String), cultureInfo.TextInfo.ToLower(strRhs.Value)); } if (RuntimeHelpers.AreEqual(left, rhs)) diff --git a/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryTable.cs b/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryTable.cs index ad1f52d9e5..ee4c1e3d09 100644 --- a/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryTable.cs +++ b/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryTable.cs @@ -652,31 +652,31 @@ public static async ValueTask SortTable(EvalVisitor runner, EvalVi if (allNumbers) { - return SortValueType(pairs, irContext, compareToResultModifier); + return SortValueType(pairs, runner, irContext, compareToResultModifier); } else if (allDecimals) { - return SortValueType(pairs, irContext, compareToResultModifier); + return SortValueType(pairs, runner, irContext, compareToResultModifier); } else if (allStrings) { - return SortValueType(pairs, irContext, compareToResultModifier); + return SortValueType(pairs, runner, irContext, compareToResultModifier); } else if (allBooleans) { - return SortValueType(pairs, irContext, compareToResultModifier); + return SortValueType(pairs, runner, irContext, compareToResultModifier); } else if (allDatetimes) { - return SortValueType(pairs, irContext, compareToResultModifier); + return SortValueType(pairs, runner, irContext, compareToResultModifier); } else if (allDates) { - return SortValueType(pairs, irContext, compareToResultModifier); + return SortValueType(pairs, runner, irContext, compareToResultModifier); } else if (allTimes) { - return SortValueType(pairs, irContext, compareToResultModifier); + return SortValueType(pairs, runner, irContext, compareToResultModifier); } else if (allOptionSets) { @@ -1103,7 +1103,7 @@ private static FormulaValue DistinctValueType(List<(DValue row, For return new InMemoryTableValue(irContext, result); } - private static FormulaValue SortValueType(List<(DValue row, FormulaValue sortValue)> pairs, IRContext irContext, int compareToResultModifier) + private static FormulaValue SortValueType(List<(DValue row, FormulaValue sortValue)> pairs, EvalVisitor runner, IRContext irContext, int compareToResultModifier) where TPFxPrimitive : PrimitiveValue where TDotNetPrimitive : IComparable { @@ -1119,8 +1119,16 @@ private static FormulaValue SortValueType(List< } var n1 = a.sortValue as TPFxPrimitive; - var n2 = b.sortValue as TPFxPrimitive; - return n1.Value.CompareTo(n2.Value) * compareToResultModifier; + var n2 = b.sortValue as TPFxPrimitive; + CultureInfo culture; + if (n1.Value is string n1s && n2.Value is string n2s && (culture = runner.GetService()) != null) + { + return culture.CompareInfo.Compare(n1s, n2s) * compareToResultModifier; + } + else + { + return n1.Value.CompareTo(n2.Value) * compareToResultModifier; + } }); return new InMemoryTableValue(irContext, pairs.Select(pair => pair.row)); diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_en-US.txt b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_en-US.txt new file mode 100644 index 0000000000..121dd01006 --- /dev/null +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_en-US.txt @@ -0,0 +1,178 @@ +#SETUP: RegEx,CultureInfo("en-US"),PowerFxV1CompatibilityRules,ConsistentOneColumnTableResult + +// Four types of letter I +// Dotted Dotless +// Upper İ U+0130 I U+0049 +// Lower i U+0069 ı U+0131 + +>> Language() +"en-US" + +>> "İ" = UniChar( Hex2Dec( "0130") ) +true + +>> "ı" = UniChar( Hex2Dec( "0131" ) ) +true + +// UPPER, LOWER, PROPER + +>> Upper( "i" ) +"I" + +>> Lower( "I" ) +"i" + +>> Upper( "i" ) = "I" +true + +>> Lower( "I" ) = "i" +true + +>> Lower( "quit" ) = Lower( "QUIT" ) +true + +>> Lower( "quit" ) = Lower( "QUİT" ) +true + +>> Lower( "quıt" ) = Lower( "QUIT" ) +false + +>> Upper( "quit" ) = Upper( "QUIT" ) +true + +>> Proper( "Iabc" ) +"Iabc" + +>> Proper( "iabc" ) +"Iabc" + +// VALUE, DECIMAL, FLOAT + +>> Value( "123,456" ) +123456 + +>> Value( "123,456", "tr-TR" ) +123.456 + +>> Decimal( "123,456" ) +123456 + +>> Decimal( "123,456", "tr-TR" ) +123.456 + +>> Float( "123,456" ) +123456 + +>> Float( "123,456", "tr-TR" ) +123.456 + +// TEXT + +>> Text( DateTime(2010,1,1,14,0,0,0), "mmm ddd yyyy AM/PM" ) +"Jan Fri 2010 PM" + +>> Text( DateTime(2020,1,1,2,0,0,0), "mmmm dddd yyyy AM/PM" ) +"January Wednesday 2020 AM" + +>> Text( 123456789, "#,###.00" ) +"123,456,789.00" + +>> Text( 123456789, "#.###,00" ) +"123456789.00000" + +// IN AND EXACTIN + +>> "i" in "SIGH" +true + +>> "I" in "sigh" +true + +>> "i" exactin "SIGH" +false + +>> "I" exactin "sigh" +false + +>> "I" exactin "SIGH" +true + +>> "i" exactin "sigh" +true + +>> "sIGh" in ["sigh","bcde"] +true + +>> "siGh" in ["SIGH","bcde"] +true + +>> "sIGH" in ["sigh","bcde"] +true + +>> "siGH" in ["bcde","sIgh"] +true + +>> "SIgh" in ["bcde","sigh"] +true + +// SORT +// Relative order of i, I, ı, İ are different between en-US and tr-TR + +>> Sort( [ "Z", "İ", "z", "I", "J", "j", "ı", "a", "h", "i", "A", "H"], Value ) +Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"i"},{Value:"I"},{Value:"İ"},{Value:"ı"},{Value:"j"},{Value:"J"},{Value:"z"},{Value:"Z"}) + +>> SortByColumns( [ "Z", "İ", "z", "I", "J", "j", "ı", "a", "h", "i", "A", "H"], "Value" ) +Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"i"},{Value:"I"},{Value:"İ"},{Value:"ı"},{Value:"j"},{Value:"J"},{Value:"z"},{Value:"Z"}) + +>> Concat( Sort( Split( "j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C d D e E f F g G h H i I", " " ), Value ), Value, " " ) +"a A b B c C ç Ç d D e E f F g G ğ Ğ h H i I İ ı j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" + +>> Concat( SortByColumns( Split( "d D e E f F g G h H i I j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C", " " ), "Value" ), Value, " " ) +"a A b B c C ç Ç d D e E f F g G ğ Ğ h H i I İ ı j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" + +// REGULAR EXPRESSIONS +// Always uses invariant even though tr-TR is set, subject of https://github.com/microsoft/Power-Fx/issues/2538 + +// Results when using C# // Invariant tr-TR en-US + +>> IsMatch( "İ", "i", MatchOptions.IgnoreCase ) // false TRUE TRUE +false + +>> IsMatch( "i", "İ", MatchOptions.IgnoreCase ) // false TRUE TRUE +false + +>> IsMatch( "ı", "I", MatchOptions.IgnoreCase ) // false TRUE false +false + +>> IsMatch( "I", "ı", MatchOptions.IgnoreCase ) // false TRUE false +false + +>> IsMatch( "İ", "I", MatchOptions.IgnoreCase ) // false false TRUE +false + +>> IsMatch( "I", "İ", MatchOptions.IgnoreCase ) // false false TRUE +false + +>> IsMatch( "ı", "i", MatchOptions.IgnoreCase ) // false false false +false + +>> IsMatch( "i", "ı", MatchOptions.IgnoreCase ) // false false false +false + +>> IsMatch( "i", "I", MatchOptions.IgnoreCase ) // TRUE false TRUE +true + +>> IsMatch( "I", "i", MatchOptions.IgnoreCase ) // TRUE false TRUE +true + +>> IsMatch( "ı", "İ", MatchOptions.IgnoreCase ) // false false false +false + +>> IsMatch( "İ", "ı", MatchOptions.IgnoreCase ) // false false false +false + +>> Match( "hiIıİİıIhi", "\u0130+" ) +{FullMatch:"İİ",StartMatch:5,SubMatches:Table()} + +>> IsMatch( "Sıgh", "\u0131", MatchOptions.Contains ) +true diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_tr-TR.txt b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_tr-TR.txt new file mode 100644 index 0000000000..db3c8a756d --- /dev/null +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_tr-TR.txt @@ -0,0 +1,207 @@ +#SETUP: RegEx,CultureInfo("tr-TR"),PowerFxV1CompatibilityRules,ConsistentOneColumnTableResult + +// Four types of letter I +// Dotted Dotless +// Upper İ U+0130 I U+0049 +// Lower i U+0069 ı U+0131 + +>> Language() +"tr-TR" + +>> "İ" = UniChar( Hex2Dec( "0130") ) +true + +>> "ı" = UniChar( Hex2Dec( "0131" ) ) +true + +// UPPER, LOWER, PROPER + +>> Upper( "i" ) +"İ" + +>> Lower( "I" ) +"ı" + +>> Upper( "ı" ) +"I" + +>> Lower( "İ" ) +"i" + +>> Upper( "i" ) = UniChar( Hex2Dec( "0130") ) +true + +>> Lower( "I" ) = UniChar( Hex2Dec( "0131") ) +true + +>> Upper( "i" ) = "I" +false + +>> Lower( "I" ) = "i" +false + +>> Lower( "quit" ) = Lower( "QUIT" ) +false + +>> Lower( "quit" ) = Lower( "QUİT" ) +true + +>> Lower( "quıt" ) = Lower( "QUIT" ) +true + +>> Upper( "quit" ) = Upper( "QUIT" ) +false + +>> Upper( "quit" ) = Upper( "QUİT" ) +true + +>> Upper( "quıt" ) = Upper( "QUIT" ) +true + +>> Proper( "Iabc" ) +"Iabc" + +>> Proper( "iabc" ) +"İabc" + +>> Proper( "İabc" ) +"İabc" + +>> Proper( "ıabc" ) +"Iabc" + +// VALUE, DECIMAL, FLOAT + +>> Value( "123,456" ) +123.456 + +>> Value( "123,456", "en-US" ) +123456 + +>> Decimal( "123,456" ) +123.456 + +>> Decimal( "123,456", "en-US" ) +123456 + +>> Float( "123,456" ) +123.456 + +>> Float( "123,456", "en-US" ) +123456 + +// TEXT + +>> Text( DateTime(2010,1,1,14,0,0,0), "mmm ddd yyyy AM/PM" ) +"Oca Cum 2010 ÖS" + +>> Text( DateTime(2020,1,1,2,0,0,0), "mmmm dddd yyyy AM/PM" ) +"Ocak Çarşamba 2020 ÖÖ" + +>> Text( 123456789, "#,###.00" ) +"123456789,00000" + +>> Text( 123456789, "#.###,00" ) +"123.456.789,00" + +// IN AND EXACTIN + +>> "ı" in "SIGH" +true + +>> "İ" in "sigh" +true + +>> "ı" in "SİGH" +false + +>> "İ" in "sıgh" +false + +>> "ı" exactin "SIGH" +false + +>> "İ" exactin "sigh" +false + +>> "ı" exactin "SİGH" +false + +>> "İ" exactin "sıgh" +false + +>> "sİGh" in ["sigh","bcde"] +true + +>> "siGh" in ["SİGH","bcde"] +true + +>> "sIGH" in ["sigh","bcde"] +false + +>> "sıGH" in ["bcde","sIgh"] +true + +>> "SIgh" in ["bcde","sıgh"] +true + +// SORT + +>> Sort( [ "Z", "İ", "z", "I", "J", "j", "ı", "a", "h", "i", "A", "H"], Value ) +Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"ı"},{Value:"I"},{Value:"i"},{Value:"İ"},{Value:"j"},{Value:"J"},{Value:"z"},{Value:"Z"}) + +>> SortByColumns( [ "Z", "İ", "z", "I", "J", "j", "ı", "a", "h", "i", "A", "H"], "Value" ) +Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"ı"},{Value:"I"},{Value:"i"},{Value:"İ"},{Value:"j"},{Value:"J"},{Value:"z"},{Value:"Z"}) + +>> Concat( Sort( Split( "j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C d D e E f F g G h H i I", " " ), Value ), Value, " " ) +"a A b B c C ç Ç d D e E f F g G ğ Ğ h H ı I i İ j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" + +>> Concat( SortByColumns( Split( "d D e E f F g G h H i I j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C", " " ), "Value" ), Value, " " ) +"a A b B c C ç Ç d D e E f F g G ğ Ğ h H ı I i İ j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" + +// REGULAR EXPRESSIONS +// Always uses invariant even though tr-TR is set, subject of https://github.com/microsoft/Power-Fx/issues/2538 + +// Results when using C# // Invariant tr-TR en-US + +>> IsMatch( "İ", "i", MatchOptions.IgnoreCase ) // false TRUE TRUE +false + +>> IsMatch( "i", "İ", MatchOptions.IgnoreCase ) // false TRUE TRUE +false + +>> IsMatch( "ı", "I", MatchOptions.IgnoreCase ) // false TRUE false +false + +>> IsMatch( "I", "ı", MatchOptions.IgnoreCase ) // false TRUE false +false + +>> IsMatch( "İ", "I", MatchOptions.IgnoreCase ) // false false TRUE +false + +>> IsMatch( "I", "İ", MatchOptions.IgnoreCase ) // false false TRUE +false + +>> IsMatch( "ı", "i", MatchOptions.IgnoreCase ) // false false false +false + +>> IsMatch( "i", "ı", MatchOptions.IgnoreCase ) // false false false +false + +>> IsMatch( "i", "I", MatchOptions.IgnoreCase ) // TRUE false TRUE +true + +>> IsMatch( "I", "i", MatchOptions.IgnoreCase ) // TRUE false TRUE +true + +>> IsMatch( "ı", "İ", MatchOptions.IgnoreCase ) // false false false +false + +>> IsMatch( "İ", "ı", MatchOptions.IgnoreCase ) // false false false +false + +>> Match( "hiIıİİıIhi", "\u0130+" ) +{FullMatch:"İİ",StartMatch:5,SubMatches:Table()} + +>> IsMatch( "Sıgh", "\u0131", MatchOptions.Contains ) +true diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestHelpers/TestRunner.cs b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestHelpers/TestRunner.cs index ef33b4c3be..4a3a3f9e50 100644 --- a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestHelpers/TestRunner.cs +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestHelpers/TestRunner.cs @@ -102,6 +102,7 @@ public static Dictionary ParseSetupString(string setup) possible.Add("RegEx"); possible.Add("TimeZoneInfo"); possible.Add("TraceSetup"); + possible.Add("CultureInfo"); foreach (Match match in Regex.Matches(setup, @"(disable:)?(([\w]+|//)(\([^\)]*\))?)")) { diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/TestRunnerTests/InternalSetup.cs b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/TestRunnerTests/InternalSetup.cs index 353934ff6f..b39f3d8a1e 100644 --- a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/TestRunnerTests/InternalSetup.cs +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/TestRunnerTests/InternalSetup.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; +using System.Globalization; using System.Linq; using System.Reflection; using System.Text.RegularExpressions; @@ -20,6 +21,8 @@ internal class InternalSetup internal TimeZoneInfo TimeZoneInfo { get; set; } + internal CultureInfo CultureInfo { get; set; } + /// /// By default, we run expressions with a memory governor to enforce a limited amount of memory. /// When true, disable memory checks and allow expression to use as much memory as it needs. @@ -111,6 +114,23 @@ internal static InternalSetup Parse(string setupHandlerName, Features features, throw new ArgumentException("Invalid TimeZoneInfo setup!"); } } + else if (part.StartsWith("CultureInfo", StringComparison.OrdinalIgnoreCase)) + { + var m = new Regex(@"CultureInfo\(""(?[^)]+)""\)", RegexOptions.IgnoreCase).Match(part); + + if (m.Success) + { + var culture = m.Groups["culture"].Value; + + // This call will throw if the Language tag in invalid + iSetup.CultureInfo = new CultureInfo(culture); + parts.Remove(part); + } + else + { + throw new ArgumentException("Invalid TimeZoneInfo setup!"); + } + } } iSetup.HandlerNames = parts; diff --git a/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/Helpers/AsyncVerify.cs b/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/Helpers/AsyncVerify.cs index 0cc4d0bfc2..2c91f9112d 100644 --- a/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/Helpers/AsyncVerify.cs +++ b/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/Helpers/AsyncVerify.cs @@ -83,6 +83,11 @@ public async Task EvalAsync(RecalcEngine engine, string expr, Inte rtConfig.AddService(setup.TimeZoneInfo); } + if (setup.CultureInfo != null) + { + rtConfig.AddService(setup.CultureInfo); + } + var task = engine.EvalAsync(expr, CancellationToken.None, options: setup.Flags.ToParserOptions(new CultureInfo("en-US")), runtimeConfig: rtConfig); var i = 0; diff --git a/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/PowerFxEvaluationTests.cs b/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/PowerFxEvaluationTests.cs index afa31bcb1f..a7994bc2ca 100644 --- a/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/PowerFxEvaluationTests.cs +++ b/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/PowerFxEvaluationTests.cs @@ -958,6 +958,11 @@ protected override async Task RunAsyncInternal(string expr, string se runtimeConfig.AddService(iSetup.TimeZoneInfo); } + if (iSetup.CultureInfo != null) + { + runtimeConfig.AddService(iSetup.CultureInfo); + } + if (engine.TryGetByName("traceRecord", out _)) { var traceRecord = engine.GetValue("traceRecord"); diff --git a/src/tools/Repl/Program.cs b/src/tools/Repl/Program.cs index 37cabab2b6..63ee94d22a 100644 --- a/src/tools/Repl/Program.cs +++ b/src/tools/Repl/Program.cs @@ -44,6 +44,8 @@ public static class ConsoleRepl private static StandardFormatter _standardFormatter; + private static CultureInfo _cultureInfo = CultureInfo.CurrentCulture; + private static bool _reset; private static RecalcEngine ReplRecalcEngine() @@ -90,6 +92,7 @@ private static RecalcEngine ReplRecalcEngine() config.AddFunction(new Option2Function()); config.AddFunction(new Run1Function()); config.AddFunction(new Run2Function()); + config.AddFunction(new Language1Function()); var optionsSet = new OptionSet("Options", DisplayNameUtility.MakeUnique(options)); @@ -132,6 +135,10 @@ public MyRepl() this.ValueFormatter = _standardFormatter; this.HelpProvider = new MyHelpProvider(); + var bsp = new BasicServiceProvider(); + bsp.AddService(_cultureInfo); + this.InnerServices = bsp; + this.AllowSetDefinitions = true; this.EnableSampleUserObject(); this.AddPseudoFunction(new IRPseudoFunction()); @@ -407,6 +414,26 @@ public FormulaValue Execute(StringValue option, BooleanValue value) } } + // set the language + private class Language1Function : ReflectionFunction + { + public Language1Function() + : base("Language", FormulaType.Void, new[] { FormulaType.String }) + { + } + + public FormulaValue Execute(StringValue lang) + { + var cultureInfo = new CultureInfo(lang.Value); + + _cultureInfo = cultureInfo; + + _reset = true; + + return FormulaValue.NewVoid(); + } + } + private class MyHelpProvider : HelpProvider { #pragma warning disable CS0618 // Type or member is obsolete @@ -478,6 +505,8 @@ Use Option( Options.FormatTable, false ) to disable table formatting. Use Option() to see the list of all options with their current value. Use Help( ""Options"" ) for more information. +Use Language( ""en-US"" ) to set culture info. + Once a formula is defined or a variable's type is defined, it cannot be changed. Use Reset() to clear all formulas and variables. "; From 79d0cb24a063b9f11da4ff0e16bbce954b584024 Mon Sep 17 00:00:00 2001 From: Greg Lindhorst Date: Sat, 8 Mar 2025 16:56:48 -0800 Subject: [PATCH 2/4] Updates --- .../ExpressionTestCases/Culture_en-US.txt | 33 +++- .../ExpressionTestCases/Culture_is-IS.txt | 173 ++++++++++++++++++ .../ExpressionTestCases/Culture_tr-TR.txt | 8 +- .../FileExpressionEvaluationTests.cs | 2 +- 4 files changed, 202 insertions(+), 14 deletions(-) create mode 100644 src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_is-IS.txt diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_en-US.txt b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_en-US.txt index 121dd01006..bd6b7a5f82 100644 --- a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_en-US.txt +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_en-US.txt @@ -1,9 +1,10 @@ -#SETUP: RegEx,CultureInfo("en-US"),PowerFxV1CompatibilityRules,ConsistentOneColumnTableResult +#SETUP: RegEx,CultureInfo("en-US"),PowerFxV1CompatibilityRules,ConsistentOneColumnTableResult,SupportColumnNamesAsIdentifiers -// Four types of letter I -// Dotted Dotless -// Upper İ U+0130 I U+0049 -// Lower i U+0069 ı U+0131 +// Compared against tr-TR here, as that is one of the harder languages to get right. This should NOT be the case in en-US: +// Four types of letter I +// Dotted Dotless +// Upper İ U+0130 I U+0049 +// Lower i U+0069 ı U+0131 >> Language() "en-US" @@ -124,12 +125,26 @@ Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"i"},{Value:"I"},{V >> SortByColumns( [ "Z", "İ", "z", "I", "J", "j", "ı", "a", "h", "i", "A", "H"], "Value" ) Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"i"},{Value:"I"},{Value:"İ"},{Value:"ı"},{Value:"j"},{Value:"J"},{Value:"z"},{Value:"Z"}) ->> Concat( Sort( Split( "j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C d D e E f F g G h H i I", " " ), Value ), Value, " " ) +>> Concat( Sort( Split( "j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z a A b B c C d D e E f F g G h H i I", " " ), Value ), Value, " " ) "a A b B c C ç Ç d D e E f F g G ğ Ğ h H i I İ ı j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" ->> Concat( SortByColumns( Split( "d D e E f F g G h H i I j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C", " " ), "Value" ), Value, " " ) +>> Concat( SortByColumns( Split( "d D e E f F g G h H i I j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z a A b B c C", " " ), "Value" ), Value, " " ) "a A b B c C ç Ç d D e E f F g G ğ Ğ h H i I İ ı j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" +// is-IS alphabet, should not match the results with is-IS set +>> Concat( Sort( Split( "K L M N A B D E Ý F G Ö Ú Ó H I J O P R S T U V X Y Z Þ Æ Í É Ð Á s ý æ a á b í ó d ð e é f g h j k l m n o p r t u ú v x i y z þ ö", " " ), Value ), Value, " " ) +"A Á B D Ð E É F G H I Í J K L M N O Ó P R S T U Ú V X Y Ý Z Þ Æ Ö" + +>> Concat( SortByColumns( Split( "K L M N A B D E Ý F G Ö Ú Ó H I J O P R S T U V X Y Z Þ Æ Í É Ð Á s ý æ a á b í ó d ð e é f g h j k l m n o p r t u ú v x i y z þ ö", " " ), "Value" ), Value, " " ) +"A Á B D Ð E É F G H I Í J K L M N O Ó P R S T U Ú V X Y Ý Z Þ Æ Ö" + +// tr-TR alphabet, should not match the results with tr-TR set +>> Concat( Sort( Split( "j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C d D e E f F g G h H i I", " " ), Value ), Value, " " ) +"a A b B c C ç Ç d D e E f F g G ğ Ğ h H ı I i İ j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" + +>> Concat( SortByColumns( Split( "d D e E f F g G h H i I j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C", " " ), "Value" ), Value, " " ) +"a A b B c C ç Ç d D e E f F g G ğ Ğ h H ı I i İ j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" + // REGULAR EXPRESSIONS // Always uses invariant even though tr-TR is set, subject of https://github.com/microsoft/Power-Fx/issues/2538 @@ -171,8 +186,8 @@ false >> IsMatch( "İ", "ı", MatchOptions.IgnoreCase ) // false false false false ->> Match( "hiIıİİıIhi", "\u0130+" ) -{FullMatch:"İİ",StartMatch:5,SubMatches:Table()} +>> ShowColumns( Match( "hiIıİİıIhi", "\u0130+" ), FullMatch, StartMatch ) +{FullMatch:"İİ",StartMatch:5} >> IsMatch( "Sıgh", "\u0131", MatchOptions.Contains ) true diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_is-IS.txt b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_is-IS.txt new file mode 100644 index 0000000000..f94d52b95f --- /dev/null +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_is-IS.txt @@ -0,0 +1,173 @@ +#SETUP: RegEx,CultureInfo("is-IS"),PowerFxV1CompatibilityRules,ConsistentOneColumnTableResult,SupportColumnNamesAsIdentifiers + +>> Language() +"is-IS" + +>> "İ" = UniChar( Hex2Dec( "0130") ) +true + +>> "ı" = UniChar( Hex2Dec( "0131" ) ) +true + +// UPPER, LOWER, PROPER + +>> Upper( "i" ) +"I" + +>> Lower( "I" ) +"i" + +>> Upper( "i" ) = "I" +true + +>> Lower( "I" ) = "i" +true + +>> Lower( "quit" ) = Lower( "QUIT" ) +true + +>> Lower( "quit" ) = Lower( "QUİT" ) +true + +>> Lower( "quıt" ) = Lower( "QUIT" ) +false + +>> Upper( "quit" ) = Upper( "QUIT" ) +true + +>> Proper( "Iabc" ) +"Iabc" + +>> Proper( "iabc" ) +"Iabc" + +// VALUE, DECIMAL, FLOAT + +>> Value( "123,456" ) +123456 + +>> Value( "123,456", "tr-TR" ) +123.456 + +>> Decimal( "123,456" ) +123456 + +>> Decimal( "123,456", "tr-TR" ) +123.456 + +>> Float( "123,456" ) +123456 + +>> Float( "123,456", "tr-TR" ) +123.456 + +// TEXT + +>> Text( DateTime(2010,1,1,14,0,0,0), "mmm ddd yyyy AM/PM" ) +"Jan Fri 2010 PM" + +>> Text( DateTime(2020,1,1,2,0,0,0), "mmmm dddd yyyy AM/PM" ) +"January Wednesday 2020 AM" + +>> Text( 123456789, "#,###.00" ) +"123,456,789.00" + +>> Text( 123456789, "#.###,00" ) +"123456789.00000" + +// IN AND EXACTIN + +>> "i" in "SIGH" +true + +>> "I" in "sigh" +true + +>> "i" exactin "SIGH" +false + +>> "I" exactin "sigh" +false + +>> "I" exactin "SIGH" +true + +>> "i" exactin "sigh" +true + +>> "sIGh" in ["sigh","bcde"] +true + +>> "siGh" in ["SIGH","bcde"] +true + +>> "sIGH" in ["sigh","bcde"] +true + +>> "siGH" in ["bcde","sIgh"] +true + +>> "SIgh" in ["bcde","sigh"] +true + +// SORT +// Relative order of i, I, ı, İ are different between en-US and tr-TR + +>> Sort( [ "Z", "İ", "z", "I", "J", "j", "ı", "a", "h", "i", "A", "H"], Value ) +Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"i"},{Value:"I"},{Value:"İ"},{Value:"ı"},{Value:"j"},{Value:"J"},{Value:"z"},{Value:"Z"}) + +>> SortByColumns( [ "Z", "İ", "z", "I", "J", "j", "ı", "a", "h", "i", "A", "H"], "Value" ) +Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"i"},{Value:"I"},{Value:"İ"},{Value:"ı"},{Value:"j"},{Value:"J"},{Value:"z"},{Value:"Z"}) + +>> Concat( Sort( Split( "j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C d D e E f F g G h H i I", " " ), Value ), Value, " " ) +"a A b B c C ç Ç d D e E f F g G ğ Ğ h H i I İ ı j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" + +>> Concat( SortByColumns( Split( "d D e E f F g G h H i I j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C", " " ), "Value" ), Value, " " ) +"a A b B c C ç Ç d D e E f F g G ğ Ğ h H i I İ ı j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" + +// REGULAR EXPRESSIONS +// Always uses invariant in all locales, even in en-US and tr-TR (industry standard) + +// Results when using C# // Invariant tr-TR en-US + +>> IsMatch( "İ", "i", MatchOptions.IgnoreCase ) // false TRUE TRUE +false + +>> IsMatch( "i", "İ", MatchOptions.IgnoreCase ) // false TRUE TRUE +false + +>> IsMatch( "ı", "I", MatchOptions.IgnoreCase ) // false TRUE false +false + +>> IsMatch( "I", "ı", MatchOptions.IgnoreCase ) // false TRUE false +false + +>> IsMatch( "İ", "I", MatchOptions.IgnoreCase ) // false false TRUE +false + +>> IsMatch( "I", "İ", MatchOptions.IgnoreCase ) // false false TRUE +false + +>> IsMatch( "ı", "i", MatchOptions.IgnoreCase ) // false false false +false + +>> IsMatch( "i", "ı", MatchOptions.IgnoreCase ) // false false false +false + +>> IsMatch( "i", "I", MatchOptions.IgnoreCase ) // TRUE false TRUE +true + +>> IsMatch( "I", "i", MatchOptions.IgnoreCase ) // TRUE false TRUE +true + +>> IsMatch( "ı", "İ", MatchOptions.IgnoreCase ) // false false false +false + +>> IsMatch( "İ", "ı", MatchOptions.IgnoreCase ) // false false false +false + +>> ShowColumns( Match( "hiIıİİıIhi", "\u0130+" ), FullMatch, StartMatch ) +{FullMatch:"İİ",StartMatch:5} + +>> IsMatch( "Sıgh", "\u0131", MatchOptions.Contains ) +true diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_tr-TR.txt b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_tr-TR.txt index db3c8a756d..fb7cc98c49 100644 --- a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_tr-TR.txt +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_tr-TR.txt @@ -1,4 +1,4 @@ -#SETUP: RegEx,CultureInfo("tr-TR"),PowerFxV1CompatibilityRules,ConsistentOneColumnTableResult +#SETUP: RegEx,CultureInfo("tr-TR"),PowerFxV1CompatibilityRules,ConsistentOneColumnTableResult,SupportColumnNamesAsIdentifiers // Four types of letter I // Dotted Dotless @@ -160,7 +160,7 @@ Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"ı"},{Value:"I"},{ "a A b B c C ç Ç d D e E f F g G ğ Ğ h H ı I i İ j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" // REGULAR EXPRESSIONS -// Always uses invariant even though tr-TR is set, subject of https://github.com/microsoft/Power-Fx/issues/2538 +// Always uses invariant in all locales, even in en-US and tr-TR (industry standard) // Results when using C# // Invariant tr-TR en-US @@ -200,8 +200,8 @@ false >> IsMatch( "İ", "ı", MatchOptions.IgnoreCase ) // false false false false ->> Match( "hiIıİİıIhi", "\u0130+" ) -{FullMatch:"İİ",StartMatch:5,SubMatches:Table()} +>> ShowColumns( Match( "hiIıİİıIhi", "\u0130+" ), FullMatch, StartMatch ) +{FullMatch:"İİ",StartMatch:5} >> IsMatch( "Sıgh", "\u0131", MatchOptions.Contains ) true diff --git a/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/FileExpressionEvaluationTests.cs b/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/FileExpressionEvaluationTests.cs index ed3f5348ca..2b6d8d1413 100644 --- a/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/FileExpressionEvaluationTests.cs +++ b/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/FileExpressionEvaluationTests.cs @@ -160,7 +160,7 @@ private static bool ShouldSkipDotNetVersion(ExpressionTestCase testCase, string [Fact] public void RunOne() { - var path = @"D:\repos\osp1\src\tests\Microsoft.PowerFx.Core.Tests\ExpressionTestCases\StronglyTypedEnum_TestEnums_PreV1.txt"; + var path = @"D:\repos\culture-tr\src\tests\Microsoft.PowerFx.Core.Tests.Shared\ExpressionTestCases\Culture_en-US.txt"; var line = 0; var runner = new InterpreterRunner(); From 0d5db8847a7cb90a3f1c6efb9fd5a9f730341ec7 Mon Sep 17 00:00:00 2001 From: Greg Lindhorst Date: Sat, 8 Mar 2025 22:25:20 -0800 Subject: [PATCH 3/4] Updates --- .../ExpressionTestCases/Culture_da-DK.txt | 144 +++++++++++++++ .../ExpressionTestCases/Culture_en-US.txt | 49 +++-- .../ExpressionTestCases/Culture_is-IS.txt | 173 ------------------ .../ExpressionTestCases/Culture_sv-SE.txt | 138 ++++++++++++++ .../ExpressionTestCases/Culture_tr-TR.txt | 5 +- 5 files changed, 314 insertions(+), 195 deletions(-) create mode 100644 src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_da-DK.txt delete mode 100644 src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_is-IS.txt create mode 100644 src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_sv-SE.txt diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_da-DK.txt b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_da-DK.txt new file mode 100644 index 0000000000..a1b203bd9a --- /dev/null +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_da-DK.txt @@ -0,0 +1,144 @@ +#SETUP: RegEx,CultureInfo("da-DK"),PowerFxV1CompatibilityRules,ConsistentOneColumnTableResult,SupportColumnNamesAsIdentifiers + +>> Language() +"da-DK" + +>> "Ø" = UniChar( Hex2Dec( "00d8") ) +true + +>> "ø" = UniChar( Hex2Dec( "00f8") ) +true + +// UPPER, LOWER, PROPER + +>> Upper( "ø" ) +"Ø" + +>> Lower( "Ø" ) +"ø" + +>> Upper( "ø" ) = "Ø" +true + +>> Lower( "Ø" ) = "ø" +true + +>> Lower( "ørkesløse" ) = Lower( "ØRKESLØSE" ) +true + +>> Upper( "ørkesløse" ) = Upper( "ØRKESLØSE" ) +true + +>> Proper( "ørkesløse" ) +"Ørkesløse" + +>> Proper( "ØRKESLØSE" ) +"Ørkesløse" + +// VALUE, DECIMAL, FLOAT +// Comma decimal seperator + +>> Value( "123,456" ) +123.456 + +>> Value( "123,456", "da-DK" ) +123.456 + +>> Decimal( "123,456" ) +123.456 + +>> Decimal( "123,456", "da-DK" ) +123.456 + +>> Float( "123,456" ) +123.456 + +>> Float( "123,456", "da-DK" ) +123.456 + +// TEXT + +>> Text( DateTime(2010,1,1,14,0,0,0), "mmm ddd yyyy AM/PM" ) +"jan. fre. 2010 PM" + +>> Text( DateTime(2020,1,1,2,0,0,0), "mmmm dddd yyyy AM/PM" ) +"januar onsdag 2020 AM" + +>> Text( 123456789, "#,###.00" ) +"123456789,00000" + +>> Text( 123456789, "#.###,00" ) +"123.456.789,00" + +// IN AND EXACTIN + +>> "å" in "ømtåligt" +true + +>> "å" in "ØMTÅLIGT" +true + +>> "Å" in "ømtåligt" +true + +>> "Å" in "ØMTÅLIGT" +true + +>> "å" exactin "ømtåligt" +true + +>> "å" exactin "ØMTÅLIGT" +false + +>> "Å" exactin "ØMTÅLIGT" +true + +>> "ØMtålIGT" in ["ømtåligt","bcde"] +true + +>> "ømtålIgt" in ["bcde", "ØMTÅLiGT"] +true + +>> "ømtålIgt" in ["bcde", "MTÅLiGT"] +false + +// SORT + +>> Concat( Sort( Split( "n F X W o i j x B m I R G S h Ø L p K t A k l y J æ u v s T a ø N D z Æ e O U E H r Z å g b q Å P d f C M c Y w V Q", " " ), Value ), Value, " " ) +"A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q R r S s T t U u V v W w X x Y y Z z Æ æ Ø ø Å å" + +>> Concat( SortByColumns( Split( "U c q s X Å P L i I u d J å M E l k W v j Æ n a B K C D e ø æ f O y m Ø r Q R A x h T H N Z F V w o S g t p G Y b z", " " ), "Value" ), Value, " " ) +"A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q R r S s T t U u V v W w X x Y y Z z Æ æ Ø ø Å å" + +// REGULAR EXPRESSIONS +// Always uses invariant in all locales, even in en-US and tr-TR (industry standard) + +>> IsMatch( "å", "Å", MatchOptions.IgnoreCase ) +true + +>> IsMatch( "Å", "å", MatchOptions.IgnoreCase ) +true + +>> IsMatch( "Å", "A", MatchOptions.IgnoreCase ) +false + +>> IsMatch( "Å", "a", MatchOptions.IgnoreCase ) +false + +>> IsMatch( "ø", "Ø", MatchOptions.IgnoreCase ) +true + +>> IsMatch( "Ø", "ø", MatchOptions.IgnoreCase ) +true + +>> IsMatch( "æ", "Æ", MatchOptions.IgnoreCase ) +true + +>> IsMatch( "Æ", "æ", MatchOptions.IgnoreCase ) +true + +>> ShowColumns( Match( "ØMTÅÅLIGT", "\u00c5+" ), FullMatch, StartMatch ) +{FullMatch:"ÅÅ",StartMatch:4} + +>> IsMatch( "ØMTÅÅLIGT", "Ø", MatchOptions.Contains ) +true diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_en-US.txt b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_en-US.txt index bd6b7a5f82..ec19d5a950 100644 --- a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_en-US.txt +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_en-US.txt @@ -117,36 +117,45 @@ true true // SORT -// Relative order of i, I, ı, İ are different between en-US and tr-TR ->> Sort( [ "Z", "İ", "z", "I", "J", "j", "ı", "a", "h", "i", "A", "H"], Value ) -Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"i"},{Value:"I"},{Value:"İ"},{Value:"ı"},{Value:"j"},{Value:"J"},{Value:"z"},{Value:"Z"}) +>> Concat( Sort( Split( "U m F M Z A j K P N k v C D T R V B u p a t c l G o f O J y I d i S b H e s z g Y h E r L n", " " ), Value ), Value, " " ) +"a A b B c C d D e E f F g G h H i I j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z" ->> SortByColumns( [ "Z", "İ", "z", "I", "J", "j", "ı", "a", "h", "i", "A", "H"], "Value" ) -Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"i"},{Value:"I"},{Value:"İ"},{Value:"ı"},{Value:"j"},{Value:"J"},{Value:"z"},{Value:"Z"}) +>> Concat( SortByColumns( Split( "M v E H h t Z n V l Y O N r C z D K R G U j o I b y p k a e S d A F g s B L m c T P f u J i", " " ), "Value" ), Value, " " ) +"a A b B c C d D e E f F g G h H i I j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z" ->> Concat( Sort( Split( "j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z a A b B c C d D e E f F g G h H i I", " " ), Value ), Value, " " ) -"a A b B c C ç Ç d D e E f F g G ğ Ğ h H i I İ ı j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" +// tr-TR alphabet, should not necessarily match ->> Concat( SortByColumns( Split( "d D e E f F g G h H i I j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z a A b B c C", " " ), "Value" ), Value, " " ) -"a A b B c C ç Ç d D e E f F g G ğ Ğ h H i I İ ı j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" +>> " " & Concat( Sort( Split( "j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C d D e E f F g G h H i I", " " ), Value ), Value, " " ) +" a A b B c C ç Ç d D e E f F g G ğ Ğ h H i I İ ı j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" +// tr-TR: a A b B c C ç Ç d D e E f F g G ğ Ğ h H ı I i İ j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z -// is-IS alphabet, should not match the results with is-IS set ->> Concat( Sort( Split( "K L M N A B D E Ý F G Ö Ú Ó H I J O P R S T U V X Y Z Þ Æ Í É Ð Á s ý æ a á b í ó d ð e é f g h j k l m n o p r t u ú v x i y z þ ö", " " ), Value ), Value, " " ) -"A Á B D Ð E É F G H I Í J K L M N O Ó P R S T U Ú V X Y Ý Z Þ Æ Ö" +>> " " & Concat( SortByColumns( Split( "d D e E f F g G h H i I j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C", " " ), "Value" ), Value, " " ) +" a A b B c C ç Ç d D e E f F g G ğ Ğ h H i I İ ı j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" +// tr-TR: a A b B c C ç Ç d D e E f F g G ğ Ğ h H ı I i İ j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z ->> Concat( SortByColumns( Split( "K L M N A B D E Ý F G Ö Ú Ó H I J O P R S T U V X Y Z Þ Æ Í É Ð Á s ý æ a á b í ó d ð e é f g h j k l m n o p r t u ú v x i y z þ ö", " " ), "Value" ), Value, " " ) -"A Á B D Ð E É F G H I Í J K L M N O Ó P R S T U Ú V X Y Ý Z Þ Æ Ö" +// da-DK alphabet, should not necessarily match -// tr-TR alphabet, should not match the results with tr-TR set ->> Concat( Sort( Split( "j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C d D e E f F g G h H i I", " " ), Value ), Value, " " ) -"a A b B c C ç Ç d D e E f F g G ğ Ğ h H ı I i İ j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" +>> " " & Concat( Sort( Split( "n F X W o i j x B m I R G S h Ø L p K t A k l y J æ u v s T a ø N D z Æ e O U E H r Z å g b q Å P d f C M c Y w V Q", " " ), Value ), Value, " " ) +" a A å Å æ Æ b B c C d D e E f F g G h H i I j J k K l L m M n N o O ø Ø p P q Q r R s S t T u U v V w W x X y Y z Z" +// da-DK: A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q R r S s T t U u V v W w X x Y y Z z Æ æ Ø ø Å å ->> Concat( SortByColumns( Split( "d D e E f F g G h H i I j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C", " " ), "Value" ), Value, " " ) -"a A b B c C ç Ç d D e E f F g G ğ Ğ h H ı I i İ j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" +>> " " & Concat( SortByColumns( Split( "U c q s X Å P L i I u d J å M E l k W v j Æ n a B K C D e ø æ f O y m Ø r Q R A x h T H N Z F V w o S g t p G Y b z", " " ), "Value" ), Value, " " ) +" a A å Å æ Æ b B c C d D e E f F g G h H i I j J k K l L m M n N o O ø Ø p P q Q r R s S t T u U v V w W x X y Y z Z" +// da-DK: A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q R r S s T t U u V v W w X x Y y Z z Æ æ Ø ø Å å + +// sv-SE alphabet, should not necessarily match + +>> " " & Concat( Sort( Split( "H C å n N P X c j F Q O r A D s L ä K t b ö g k Ö z m l x U e y w S Å f p I Y W Z J B o u d G v E T i R Ä V a q M h", " " ), Value ), Value, " " ) +" a A å Å ä Ä b B c C d D e E f F g G h H i I j J k K l L m M n N o O ö Ö p P q Q r R s S t T u U v V w W x X y Y z Z" +// sv-SE: a A b B c C d D e E f F g G h H i I j J k K l L m M n N o O p P q Q r R s S t T u U v V w W x X y Y z Z å Å ä Ä ö Ö + +>> " " & Concat( SortByColumns( Split( "B I Ö p M Y h a A X V w z J Z t l k x G Ä e f v y T W E i Å R N Q K n C u F b H L q c r d m g o S j O P å D U ö s ä", " " ), "Value" ), Value, " " ) +" a A å Å ä Ä b B c C d D e E f F g G h H i I j J k K l L m M n N o O ö Ö p P q Q r R s S t T u U v V w W x X y Y z Z" +// sv-SE: a A b B c C d D e E f F g G h H i I j J k K l L m M n N o O p P q Q r R s S t T u U v V w W x X y Y z Z å Å ä Ä ö Ö // REGULAR EXPRESSIONS -// Always uses invariant even though tr-TR is set, subject of https://github.com/microsoft/Power-Fx/issues/2538 +// C# invariant culture is always used, even thgouth tr-TR is set. This is an industry standard for regular expressions, somewhat to prevent differences matching system strings (such as file name parsing). // Results when using C# // Invariant tr-TR en-US diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_is-IS.txt b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_is-IS.txt deleted file mode 100644 index f94d52b95f..0000000000 --- a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_is-IS.txt +++ /dev/null @@ -1,173 +0,0 @@ -#SETUP: RegEx,CultureInfo("is-IS"),PowerFxV1CompatibilityRules,ConsistentOneColumnTableResult,SupportColumnNamesAsIdentifiers - ->> Language() -"is-IS" - ->> "İ" = UniChar( Hex2Dec( "0130") ) -true - ->> "ı" = UniChar( Hex2Dec( "0131" ) ) -true - -// UPPER, LOWER, PROPER - ->> Upper( "i" ) -"I" - ->> Lower( "I" ) -"i" - ->> Upper( "i" ) = "I" -true - ->> Lower( "I" ) = "i" -true - ->> Lower( "quit" ) = Lower( "QUIT" ) -true - ->> Lower( "quit" ) = Lower( "QUİT" ) -true - ->> Lower( "quıt" ) = Lower( "QUIT" ) -false - ->> Upper( "quit" ) = Upper( "QUIT" ) -true - ->> Proper( "Iabc" ) -"Iabc" - ->> Proper( "iabc" ) -"Iabc" - -// VALUE, DECIMAL, FLOAT - ->> Value( "123,456" ) -123456 - ->> Value( "123,456", "tr-TR" ) -123.456 - ->> Decimal( "123,456" ) -123456 - ->> Decimal( "123,456", "tr-TR" ) -123.456 - ->> Float( "123,456" ) -123456 - ->> Float( "123,456", "tr-TR" ) -123.456 - -// TEXT - ->> Text( DateTime(2010,1,1,14,0,0,0), "mmm ddd yyyy AM/PM" ) -"Jan Fri 2010 PM" - ->> Text( DateTime(2020,1,1,2,0,0,0), "mmmm dddd yyyy AM/PM" ) -"January Wednesday 2020 AM" - ->> Text( 123456789, "#,###.00" ) -"123,456,789.00" - ->> Text( 123456789, "#.###,00" ) -"123456789.00000" - -// IN AND EXACTIN - ->> "i" in "SIGH" -true - ->> "I" in "sigh" -true - ->> "i" exactin "SIGH" -false - ->> "I" exactin "sigh" -false - ->> "I" exactin "SIGH" -true - ->> "i" exactin "sigh" -true - ->> "sIGh" in ["sigh","bcde"] -true - ->> "siGh" in ["SIGH","bcde"] -true - ->> "sIGH" in ["sigh","bcde"] -true - ->> "siGH" in ["bcde","sIgh"] -true - ->> "SIgh" in ["bcde","sigh"] -true - -// SORT -// Relative order of i, I, ı, İ are different between en-US and tr-TR - ->> Sort( [ "Z", "İ", "z", "I", "J", "j", "ı", "a", "h", "i", "A", "H"], Value ) -Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"i"},{Value:"I"},{Value:"İ"},{Value:"ı"},{Value:"j"},{Value:"J"},{Value:"z"},{Value:"Z"}) - ->> SortByColumns( [ "Z", "İ", "z", "I", "J", "j", "ı", "a", "h", "i", "A", "H"], "Value" ) -Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"i"},{Value:"I"},{Value:"İ"},{Value:"ı"},{Value:"j"},{Value:"J"},{Value:"z"},{Value:"Z"}) - ->> Concat( Sort( Split( "j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C d D e E f F g G h H i I", " " ), Value ), Value, " " ) -"a A b B c C ç Ç d D e E f F g G ğ Ğ h H i I İ ı j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" - ->> Concat( SortByColumns( Split( "d D e E f F g G h H i I j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C", " " ), "Value" ), Value, " " ) -"a A b B c C ç Ç d D e E f F g G ğ Ğ h H i I İ ı j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" - -// REGULAR EXPRESSIONS -// Always uses invariant in all locales, even in en-US and tr-TR (industry standard) - -// Results when using C# // Invariant tr-TR en-US - ->> IsMatch( "İ", "i", MatchOptions.IgnoreCase ) // false TRUE TRUE -false - ->> IsMatch( "i", "İ", MatchOptions.IgnoreCase ) // false TRUE TRUE -false - ->> IsMatch( "ı", "I", MatchOptions.IgnoreCase ) // false TRUE false -false - ->> IsMatch( "I", "ı", MatchOptions.IgnoreCase ) // false TRUE false -false - ->> IsMatch( "İ", "I", MatchOptions.IgnoreCase ) // false false TRUE -false - ->> IsMatch( "I", "İ", MatchOptions.IgnoreCase ) // false false TRUE -false - ->> IsMatch( "ı", "i", MatchOptions.IgnoreCase ) // false false false -false - ->> IsMatch( "i", "ı", MatchOptions.IgnoreCase ) // false false false -false - ->> IsMatch( "i", "I", MatchOptions.IgnoreCase ) // TRUE false TRUE -true - ->> IsMatch( "I", "i", MatchOptions.IgnoreCase ) // TRUE false TRUE -true - ->> IsMatch( "ı", "İ", MatchOptions.IgnoreCase ) // false false false -false - ->> IsMatch( "İ", "ı", MatchOptions.IgnoreCase ) // false false false -false - ->> ShowColumns( Match( "hiIıİİıIhi", "\u0130+" ), FullMatch, StartMatch ) -{FullMatch:"İİ",StartMatch:5} - ->> IsMatch( "Sıgh", "\u0131", MatchOptions.Contains ) -true diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_sv-SE.txt b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_sv-SE.txt new file mode 100644 index 0000000000..46614df1a4 --- /dev/null +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_sv-SE.txt @@ -0,0 +1,138 @@ +#SETUP: RegEx,CultureInfo("sv-SE"),PowerFxV1CompatibilityRules,ConsistentOneColumnTableResult,SupportColumnNamesAsIdentifiers + +>> Language() +"sv-SE" + +>> "Ö" = UniChar( Hex2Dec( "00d6") ) +true + +>> "ö" = UniChar( Hex2Dec( "00f6") ) +true + +// UPPER, LOWER, PROPER + +>> Upper( "ö" ) +"Ö" + +>> Lower( "Ö" ) +"ö" + +>> Upper( "ö" ) = "Ö" +true + +>> Lower( "Ö" ) = "ö" +true + +>> Lower( "något" ) = Lower( "NÅGOT" ) +true + +>> Upper( "NÅGOT" ) = Upper( "något" ) +true + +>> Proper( "något" ) +"Något" + +>> Proper( "NÅGOT" ) +"Något" + +// VALUE, DECIMAL, FLOAT +// Comma decimal seperator + +>> Value( "123,456" ) +123.456 + +>> Value( "123,456", "sv-SE" ) +123.456 + +>> Decimal( "123,456" ) +123.456 + +>> Decimal( "123,456", "sv-SE" ) +123.456 + +>> Float( "123,456" ) +123.456 + +>> Float( "123,456", "sv-SE" ) +123.456 + +// TEXT + +>> Text( DateTime(2010,1,1,14,0,0,0), "mmm ddd yyyy AM/PM" ) +"jan. fre 2010 em" + +>> Text( DateTime(2020,1,1,2,0,0,0), "mmmm dddd yyyy AM/PM" ) +"januari onsdag 2020 fm" + +>> Text( 123456789, "#,###.00" ) +"123456789,00000" + +>> Text( 123456789, "#.###,00" ) +"123456789,00000" + +// IN AND EXACTIN + +>> "ö" in "varför" +true + +>> "Ö" in "varför" +true + +>> "Ä" in "poäng" +true + +>> "ä" in "POÄNG" +true + +>> "ö" exactin "varför" +true + +>> "Ö" exactin "varför" +false + +>> "Ä" exactin "poäng" +false + +>> "varför" in ["vARFÖr","bcde"] +true + +>> "poÄng" in ["bcde", "poäng"] +true + +>> "poäng" in ["bcde", "varför"] +false + +// SORT + +>> Concat( Sort( Split( "H C å n N P X c j F Q O r A D s L ä K t b ö g k Ö z m l x U e y w S Å f p I Y W Z J B o u d G v E T i R Ä V a q M h", " " ), Value ), Value, " " ) +"a A b B c C d D e E f F g G h H i I j J k K l L m M n N o O p P q Q r R s S t T u U v V w W x X y Y z Z å Å ä Ä ö Ö" + +>> Concat( SortByColumns( Split( "B I Ö p M Y h a A X V w z J Z t l k x G Ä e f v y T W E i Å R N Q K n C u F b H L q c r d m g o S j O P å D U ö s ä", " " ), "Value" ), Value, " " ) +"a A b B c C d D e E f F g G h H i I j J k K l L m M n N o O p P q Q r R s S t T u U v V w W x X y Y z Z å Å ä Ä ö Ö" + +// REGULAR EXPRESSIONS +// Always uses invariant in all locales, even in en-US and tr-TR (industry standard) + +>> IsMatch( "ä", "Ä", MatchOptions.IgnoreCase ) +true + +>> IsMatch( "Ä", "ä", MatchOptions.IgnoreCase ) +true + +>> IsMatch( "Ä", "A", MatchOptions.IgnoreCase ) +false + +>> IsMatch( "Ä", "a", MatchOptions.IgnoreCase ) +false + +>> IsMatch( "Ö", "ö", MatchOptions.IgnoreCase ) +true + +>> IsMatch( "ö", "Ö", MatchOptions.IgnoreCase ) +true + +>> ShowColumns( Match( "poÄÄng", "\u00c4+" ), FullMatch, StartMatch ) +{FullMatch:"ÄÄ",StartMatch:3} + +>> IsMatch( "poäng", "[Ää]", MatchOptions.Contains ) +true diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_tr-TR.txt b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_tr-TR.txt index fb7cc98c49..ca5b7efe9b 100644 --- a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_tr-TR.txt +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_tr-TR.txt @@ -71,6 +71,7 @@ true "Iabc" // VALUE, DECIMAL, FLOAT +// Comma decimal seperator >> Value( "123,456" ) 123.456 @@ -153,10 +154,10 @@ Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"ı"},{Value:"I"},{ >> SortByColumns( [ "Z", "İ", "z", "I", "J", "j", "ı", "a", "h", "i", "A", "H"], "Value" ) Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"ı"},{Value:"I"},{Value:"i"},{Value:"İ"},{Value:"j"},{Value:"J"},{Value:"z"},{Value:"Z"}) ->> Concat( Sort( Split( "j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C d D e E f F g G h H i I", " " ), Value ), Value, " " ) +>> Concat( Sort( Split( "t R Ç Ü n P U h f J M N u ı o v y g ö V L Ö d O Z r ğ E T c D ç k ü Ğ s F S j G i I C A K İ e a l H Y Ş ş B p b z m", " " ), Value ), Value, " " ) "a A b B c C ç Ç d D e E f F g G ğ Ğ h H ı I i İ j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" ->> Concat( SortByColumns( Split( "d D e E f F g G h H i I j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C", " " ), "Value" ), Value, " " ) +>> Concat( SortByColumns( Split( "P Y h K i I y Ş L ı a J H s D U Z v C T n E ş ö ğ N d u m O r l z c Ö S g f p e M R F İ b V ü Ç A B t Ğ ç G j o k Ü", " " ), "Value" ), Value, " " ) "a A b B c C ç Ç d D e E f F g G ğ Ğ h H ı I i İ j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" // REGULAR EXPRESSIONS From e4d232e1dec7438ba6c56b4c04f9825149d7d143 Mon Sep 17 00:00:00 2001 From: Greg Lindhorst Date: Sat, 8 Mar 2025 23:55:20 -0800 Subject: [PATCH 4/4] Updates --- .../Microsoft.PowerFx.Interpreter/Functions/LibraryTable.cs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryTable.cs b/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryTable.cs index a151ca6e2c..f707c60fcb 100644 --- a/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryTable.cs +++ b/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryTable.cs @@ -1298,10 +1298,9 @@ private static FormulaValue SortValueType(List< var n1 = a.sortValue as TPFxPrimitive; var n2 = b.sortValue as TPFxPrimitive; - CultureInfo culture; - if (n1.Value is string n1s && n2.Value is string n2s && (culture = runner.GetService()) != null) + if (n1.Value is string n1s && n2.Value is string n2s && runner.CultureInfo != null) { - return culture.CompareInfo.Compare(n1s, n2s) * compareToResultModifier; + return runner.CultureInfo.CompareInfo.Compare(n1s, n2s) * compareToResultModifier; } else {