diff --git a/src/libraries/Microsoft.PowerFx.Core/Public/Values/StringValue.cs b/src/libraries/Microsoft.PowerFx.Core/Public/Values/StringValue.cs index 2dabf96f14..e4ff4a41d8 100644 --- a/src/libraries/Microsoft.PowerFx.Core/Public/Values/StringValue.cs +++ b/src/libraries/Microsoft.PowerFx.Core/Public/Values/StringValue.cs @@ -36,11 +36,6 @@ public override void Visit(IValueVisitor visitor) visitor.Visit(this); } - internal StringValue ToLower() - { - return new StringValue(IRContext.NotInSource(FormulaType.String), Value.ToLowerInvariant()); - } - public override void ToExpression(StringBuilder sb, FormulaValueSerializerSettings settings) { sb.Append($"\"{CharacterUtils.ExcelEscapeString(Value)}\""); diff --git a/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryOperators.cs b/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryOperators.cs index 79f1110fe5..ab477d860e 100644 --- a/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryOperators.cs +++ b/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryOperators.cs @@ -2,6 +2,7 @@ // Licensed under the MIT license. using System; +using System.Globalization; using System.Linq; using Microsoft.PowerFx.Core.IR; using Microsoft.PowerFx.Core.Utils; @@ -719,9 +720,9 @@ private static BooleanValue NotEqualPolymorphic(IRContext irContext, FormulaValu } // See in_SS in JScript membershipReplacementFunctions - public static Func StringInOperator(bool exact) + public static Func StringInOperator(bool exact) { - return (irContext, args) => + return (services, irContext, args) => { var left = args[0]; var right = args[1]; @@ -737,23 +738,25 @@ public static Func StringInOperator(boo var leftStr = (StringValue)left; var rightStr = (StringValue)right; - - return new BooleanValue(irContext, rightStr.Value.IndexOf(leftStr.Value, exact ? StringComparison.Ordinal : StringComparison.OrdinalIgnoreCase) >= 0); + + return new BooleanValue(irContext, services.GetService().CompareInfo.IndexOf(rightStr.Value, leftStr.Value, exact ? CompareOptions.Ordinal : CompareOptions.IgnoreCase) >= 0); }; } // Left is a scalar. Right is a single-column table. // See in_ST() - public static Func InScalarTableOperator(bool exact) + public static Func InScalarTableOperator(bool exact) { - return (irContext, args) => + return (services, irContext, args) => { var left = args[0]; - var right = args[1]; - + var right = args[1]; + + var cultureInfo = services.GetService(); + if (!exact && left is StringValue strLhs) { - left = strLhs.ToLower(); + left = new StringValue(IRContext.NotInSource(FormulaType.String), cultureInfo.TextInfo.ToLower(strLhs.Value)); } var source = (TableValue)right; @@ -766,7 +769,7 @@ public static Func InScalarTableOperato if (!exact && rhs is StringValue strRhs) { - rhs = strRhs.ToLower(); + rhs = new StringValue(IRContext.NotInSource(FormulaType.String), cultureInfo.TextInfo.ToLower(strRhs.Value)); } if (RuntimeHelpers.AreEqual(left, rhs)) diff --git a/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryTable.cs b/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryTable.cs index d53d204078..f707c60fcb 100644 --- a/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryTable.cs +++ b/src/libraries/Microsoft.PowerFx.Interpreter/Functions/LibraryTable.cs @@ -830,31 +830,31 @@ public static async ValueTask SortTable(EvalVisitor runner, EvalVi if (allNumbers) { - return SortValueType(pairs, irContext, compareToResultModifier); + return SortValueType(pairs, runner, irContext, compareToResultModifier); } else if (allDecimals) { - return SortValueType(pairs, irContext, compareToResultModifier); + return SortValueType(pairs, runner, irContext, compareToResultModifier); } else if (allStrings) { - return SortValueType(pairs, irContext, compareToResultModifier); + return SortValueType(pairs, runner, irContext, compareToResultModifier); } else if (allBooleans) { - return SortValueType(pairs, irContext, compareToResultModifier); + return SortValueType(pairs, runner, irContext, compareToResultModifier); } else if (allDatetimes) { - return SortValueType(pairs, irContext, compareToResultModifier); + return SortValueType(pairs, runner, irContext, compareToResultModifier); } else if (allDates) { - return SortValueType(pairs, irContext, compareToResultModifier); + return SortValueType(pairs, runner, irContext, compareToResultModifier); } else if (allTimes) { - return SortValueType(pairs, irContext, compareToResultModifier); + return SortValueType(pairs, runner, irContext, compareToResultModifier); } else if (allOptionSets) { @@ -1281,7 +1281,7 @@ private static FormulaValue DistinctValueType(List<(DValue row, For return new InMemoryTableValue(irContext, result); } - private static FormulaValue SortValueType(List<(DValue row, FormulaValue sortValue)> pairs, IRContext irContext, int compareToResultModifier) + private static FormulaValue SortValueType(List<(DValue row, FormulaValue sortValue)> pairs, EvalVisitor runner, IRContext irContext, int compareToResultModifier) where TPFxPrimitive : PrimitiveValue where TDotNetPrimitive : IComparable { @@ -1297,8 +1297,15 @@ private static FormulaValue SortValueType(List< } var n1 = a.sortValue as TPFxPrimitive; - var n2 = b.sortValue as TPFxPrimitive; - return n1.Value.CompareTo(n2.Value) * compareToResultModifier; + var n2 = b.sortValue as TPFxPrimitive; + if (n1.Value is string n1s && n2.Value is string n2s && runner.CultureInfo != null) + { + return runner.CultureInfo.CompareInfo.Compare(n1s, n2s) * compareToResultModifier; + } + else + { + return n1.Value.CompareTo(n2.Value) * compareToResultModifier; + } }); return new InMemoryTableValue(irContext, pairs.Select(pair => pair.row)); diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_da-DK.txt b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_da-DK.txt new file mode 100644 index 0000000000..a1b203bd9a --- /dev/null +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_da-DK.txt @@ -0,0 +1,144 @@ +#SETUP: RegEx,CultureInfo("da-DK"),PowerFxV1CompatibilityRules,ConsistentOneColumnTableResult,SupportColumnNamesAsIdentifiers + +>> Language() +"da-DK" + +>> "Ø" = UniChar( Hex2Dec( "00d8") ) +true + +>> "ø" = UniChar( Hex2Dec( "00f8") ) +true + +// UPPER, LOWER, PROPER + +>> Upper( "ø" ) +"Ø" + +>> Lower( "Ø" ) +"ø" + +>> Upper( "ø" ) = "Ø" +true + +>> Lower( "Ø" ) = "ø" +true + +>> Lower( "ørkesløse" ) = Lower( "ØRKESLØSE" ) +true + +>> Upper( "ørkesløse" ) = Upper( "ØRKESLØSE" ) +true + +>> Proper( "ørkesløse" ) +"Ørkesløse" + +>> Proper( "ØRKESLØSE" ) +"Ørkesløse" + +// VALUE, DECIMAL, FLOAT +// Comma decimal seperator + +>> Value( "123,456" ) +123.456 + +>> Value( "123,456", "da-DK" ) +123.456 + +>> Decimal( "123,456" ) +123.456 + +>> Decimal( "123,456", "da-DK" ) +123.456 + +>> Float( "123,456" ) +123.456 + +>> Float( "123,456", "da-DK" ) +123.456 + +// TEXT + +>> Text( DateTime(2010,1,1,14,0,0,0), "mmm ddd yyyy AM/PM" ) +"jan. fre. 2010 PM" + +>> Text( DateTime(2020,1,1,2,0,0,0), "mmmm dddd yyyy AM/PM" ) +"januar onsdag 2020 AM" + +>> Text( 123456789, "#,###.00" ) +"123456789,00000" + +>> Text( 123456789, "#.###,00" ) +"123.456.789,00" + +// IN AND EXACTIN + +>> "å" in "ømtåligt" +true + +>> "å" in "ØMTÅLIGT" +true + +>> "Å" in "ømtåligt" +true + +>> "Å" in "ØMTÅLIGT" +true + +>> "å" exactin "ømtåligt" +true + +>> "å" exactin "ØMTÅLIGT" +false + +>> "Å" exactin "ØMTÅLIGT" +true + +>> "ØMtålIGT" in ["ømtåligt","bcde"] +true + +>> "ømtålIgt" in ["bcde", "ØMTÅLiGT"] +true + +>> "ømtålIgt" in ["bcde", "MTÅLiGT"] +false + +// SORT + +>> Concat( Sort( Split( "n F X W o i j x B m I R G S h Ø L p K t A k l y J æ u v s T a ø N D z Æ e O U E H r Z å g b q Å P d f C M c Y w V Q", " " ), Value ), Value, " " ) +"A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q R r S s T t U u V v W w X x Y y Z z Æ æ Ø ø Å å" + +>> Concat( SortByColumns( Split( "U c q s X Å P L i I u d J å M E l k W v j Æ n a B K C D e ø æ f O y m Ø r Q R A x h T H N Z F V w o S g t p G Y b z", " " ), "Value" ), Value, " " ) +"A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q R r S s T t U u V v W w X x Y y Z z Æ æ Ø ø Å å" + +// REGULAR EXPRESSIONS +// Always uses invariant in all locales, even in en-US and tr-TR (industry standard) + +>> IsMatch( "å", "Å", MatchOptions.IgnoreCase ) +true + +>> IsMatch( "Å", "å", MatchOptions.IgnoreCase ) +true + +>> IsMatch( "Å", "A", MatchOptions.IgnoreCase ) +false + +>> IsMatch( "Å", "a", MatchOptions.IgnoreCase ) +false + +>> IsMatch( "ø", "Ø", MatchOptions.IgnoreCase ) +true + +>> IsMatch( "Ø", "ø", MatchOptions.IgnoreCase ) +true + +>> IsMatch( "æ", "Æ", MatchOptions.IgnoreCase ) +true + +>> IsMatch( "Æ", "æ", MatchOptions.IgnoreCase ) +true + +>> ShowColumns( Match( "ØMTÅÅLIGT", "\u00c5+" ), FullMatch, StartMatch ) +{FullMatch:"ÅÅ",StartMatch:4} + +>> IsMatch( "ØMTÅÅLIGT", "Ø", MatchOptions.Contains ) +true diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_en-US.txt b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_en-US.txt new file mode 100644 index 0000000000..ec19d5a950 --- /dev/null +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_en-US.txt @@ -0,0 +1,202 @@ +#SETUP: RegEx,CultureInfo("en-US"),PowerFxV1CompatibilityRules,ConsistentOneColumnTableResult,SupportColumnNamesAsIdentifiers + +// Compared against tr-TR here, as that is one of the harder languages to get right. This should NOT be the case in en-US: +// Four types of letter I +// Dotted Dotless +// Upper İ U+0130 I U+0049 +// Lower i U+0069 ı U+0131 + +>> Language() +"en-US" + +>> "İ" = UniChar( Hex2Dec( "0130") ) +true + +>> "ı" = UniChar( Hex2Dec( "0131" ) ) +true + +// UPPER, LOWER, PROPER + +>> Upper( "i" ) +"I" + +>> Lower( "I" ) +"i" + +>> Upper( "i" ) = "I" +true + +>> Lower( "I" ) = "i" +true + +>> Lower( "quit" ) = Lower( "QUIT" ) +true + +>> Lower( "quit" ) = Lower( "QUİT" ) +true + +>> Lower( "quıt" ) = Lower( "QUIT" ) +false + +>> Upper( "quit" ) = Upper( "QUIT" ) +true + +>> Proper( "Iabc" ) +"Iabc" + +>> Proper( "iabc" ) +"Iabc" + +// VALUE, DECIMAL, FLOAT + +>> Value( "123,456" ) +123456 + +>> Value( "123,456", "tr-TR" ) +123.456 + +>> Decimal( "123,456" ) +123456 + +>> Decimal( "123,456", "tr-TR" ) +123.456 + +>> Float( "123,456" ) +123456 + +>> Float( "123,456", "tr-TR" ) +123.456 + +// TEXT + +>> Text( DateTime(2010,1,1,14,0,0,0), "mmm ddd yyyy AM/PM" ) +"Jan Fri 2010 PM" + +>> Text( DateTime(2020,1,1,2,0,0,0), "mmmm dddd yyyy AM/PM" ) +"January Wednesday 2020 AM" + +>> Text( 123456789, "#,###.00" ) +"123,456,789.00" + +>> Text( 123456789, "#.###,00" ) +"123456789.00000" + +// IN AND EXACTIN + +>> "i" in "SIGH" +true + +>> "I" in "sigh" +true + +>> "i" exactin "SIGH" +false + +>> "I" exactin "sigh" +false + +>> "I" exactin "SIGH" +true + +>> "i" exactin "sigh" +true + +>> "sIGh" in ["sigh","bcde"] +true + +>> "siGh" in ["SIGH","bcde"] +true + +>> "sIGH" in ["sigh","bcde"] +true + +>> "siGH" in ["bcde","sIgh"] +true + +>> "SIgh" in ["bcde","sigh"] +true + +// SORT + +>> Concat( Sort( Split( "U m F M Z A j K P N k v C D T R V B u p a t c l G o f O J y I d i S b H e s z g Y h E r L n", " " ), Value ), Value, " " ) +"a A b B c C d D e E f F g G h H i I j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z" + +>> Concat( SortByColumns( Split( "M v E H h t Z n V l Y O N r C z D K R G U j o I b y p k a e S d A F g s B L m c T P f u J i", " " ), "Value" ), Value, " " ) +"a A b B c C d D e E f F g G h H i I j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z" + +// tr-TR alphabet, should not necessarily match + +>> " " & Concat( Sort( Split( "j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C d D e E f F g G h H i I", " " ), Value ), Value, " " ) +" a A b B c C ç Ç d D e E f F g G ğ Ğ h H i I İ ı j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" +// tr-TR: a A b B c C ç Ç d D e E f F g G ğ Ğ h H ı I i İ j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z + +>> " " & Concat( SortByColumns( Split( "d D e E f F g G h H i I j J k K l L m M n N o O p P r R s S t T u U v V y Y z Z Ç ç Ş ş Ü ü Ö ö İ ı Ğ ğ a A b B c C", " " ), "Value" ), Value, " " ) +" a A b B c C ç Ç d D e E f F g G ğ Ğ h H i I İ ı j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" +// tr-TR: a A b B c C ç Ç d D e E f F g G ğ Ğ h H ı I i İ j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z + +// da-DK alphabet, should not necessarily match + +>> " " & Concat( Sort( Split( "n F X W o i j x B m I R G S h Ø L p K t A k l y J æ u v s T a ø N D z Æ e O U E H r Z å g b q Å P d f C M c Y w V Q", " " ), Value ), Value, " " ) +" a A å Å æ Æ b B c C d D e E f F g G h H i I j J k K l L m M n N o O ø Ø p P q Q r R s S t T u U v V w W x X y Y z Z" +// da-DK: A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q R r S s T t U u V v W w X x Y y Z z Æ æ Ø ø Å å + +>> " " & Concat( SortByColumns( Split( "U c q s X Å P L i I u d J å M E l k W v j Æ n a B K C D e ø æ f O y m Ø r Q R A x h T H N Z F V w o S g t p G Y b z", " " ), "Value" ), Value, " " ) +" a A å Å æ Æ b B c C d D e E f F g G h H i I j J k K l L m M n N o O ø Ø p P q Q r R s S t T u U v V w W x X y Y z Z" +// da-DK: A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q R r S s T t U u V v W w X x Y y Z z Æ æ Ø ø Å å + +// sv-SE alphabet, should not necessarily match + +>> " " & Concat( Sort( Split( "H C å n N P X c j F Q O r A D s L ä K t b ö g k Ö z m l x U e y w S Å f p I Y W Z J B o u d G v E T i R Ä V a q M h", " " ), Value ), Value, " " ) +" a A å Å ä Ä b B c C d D e E f F g G h H i I j J k K l L m M n N o O ö Ö p P q Q r R s S t T u U v V w W x X y Y z Z" +// sv-SE: a A b B c C d D e E f F g G h H i I j J k K l L m M n N o O p P q Q r R s S t T u U v V w W x X y Y z Z å Å ä Ä ö Ö + +>> " " & Concat( SortByColumns( Split( "B I Ö p M Y h a A X V w z J Z t l k x G Ä e f v y T W E i Å R N Q K n C u F b H L q c r d m g o S j O P å D U ö s ä", " " ), "Value" ), Value, " " ) +" a A å Å ä Ä b B c C d D e E f F g G h H i I j J k K l L m M n N o O ö Ö p P q Q r R s S t T u U v V w W x X y Y z Z" +// sv-SE: a A b B c C d D e E f F g G h H i I j J k K l L m M n N o O p P q Q r R s S t T u U v V w W x X y Y z Z å Å ä Ä ö Ö + +// REGULAR EXPRESSIONS +// C# invariant culture is always used, even thgouth tr-TR is set. This is an industry standard for regular expressions, somewhat to prevent differences matching system strings (such as file name parsing). + +// Results when using C# // Invariant tr-TR en-US + +>> IsMatch( "İ", "i", MatchOptions.IgnoreCase ) // false TRUE TRUE +false + +>> IsMatch( "i", "İ", MatchOptions.IgnoreCase ) // false TRUE TRUE +false + +>> IsMatch( "ı", "I", MatchOptions.IgnoreCase ) // false TRUE false +false + +>> IsMatch( "I", "ı", MatchOptions.IgnoreCase ) // false TRUE false +false + +>> IsMatch( "İ", "I", MatchOptions.IgnoreCase ) // false false TRUE +false + +>> IsMatch( "I", "İ", MatchOptions.IgnoreCase ) // false false TRUE +false + +>> IsMatch( "ı", "i", MatchOptions.IgnoreCase ) // false false false +false + +>> IsMatch( "i", "ı", MatchOptions.IgnoreCase ) // false false false +false + +>> IsMatch( "i", "I", MatchOptions.IgnoreCase ) // TRUE false TRUE +true + +>> IsMatch( "I", "i", MatchOptions.IgnoreCase ) // TRUE false TRUE +true + +>> IsMatch( "ı", "İ", MatchOptions.IgnoreCase ) // false false false +false + +>> IsMatch( "İ", "ı", MatchOptions.IgnoreCase ) // false false false +false + +>> ShowColumns( Match( "hiIıİİıIhi", "\u0130+" ), FullMatch, StartMatch ) +{FullMatch:"İİ",StartMatch:5} + +>> IsMatch( "Sıgh", "\u0131", MatchOptions.Contains ) +true diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_sv-SE.txt b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_sv-SE.txt new file mode 100644 index 0000000000..46614df1a4 --- /dev/null +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_sv-SE.txt @@ -0,0 +1,138 @@ +#SETUP: RegEx,CultureInfo("sv-SE"),PowerFxV1CompatibilityRules,ConsistentOneColumnTableResult,SupportColumnNamesAsIdentifiers + +>> Language() +"sv-SE" + +>> "Ö" = UniChar( Hex2Dec( "00d6") ) +true + +>> "ö" = UniChar( Hex2Dec( "00f6") ) +true + +// UPPER, LOWER, PROPER + +>> Upper( "ö" ) +"Ö" + +>> Lower( "Ö" ) +"ö" + +>> Upper( "ö" ) = "Ö" +true + +>> Lower( "Ö" ) = "ö" +true + +>> Lower( "något" ) = Lower( "NÅGOT" ) +true + +>> Upper( "NÅGOT" ) = Upper( "något" ) +true + +>> Proper( "något" ) +"Något" + +>> Proper( "NÅGOT" ) +"Något" + +// VALUE, DECIMAL, FLOAT +// Comma decimal seperator + +>> Value( "123,456" ) +123.456 + +>> Value( "123,456", "sv-SE" ) +123.456 + +>> Decimal( "123,456" ) +123.456 + +>> Decimal( "123,456", "sv-SE" ) +123.456 + +>> Float( "123,456" ) +123.456 + +>> Float( "123,456", "sv-SE" ) +123.456 + +// TEXT + +>> Text( DateTime(2010,1,1,14,0,0,0), "mmm ddd yyyy AM/PM" ) +"jan. fre 2010 em" + +>> Text( DateTime(2020,1,1,2,0,0,0), "mmmm dddd yyyy AM/PM" ) +"januari onsdag 2020 fm" + +>> Text( 123456789, "#,###.00" ) +"123456789,00000" + +>> Text( 123456789, "#.###,00" ) +"123456789,00000" + +// IN AND EXACTIN + +>> "ö" in "varför" +true + +>> "Ö" in "varför" +true + +>> "Ä" in "poäng" +true + +>> "ä" in "POÄNG" +true + +>> "ö" exactin "varför" +true + +>> "Ö" exactin "varför" +false + +>> "Ä" exactin "poäng" +false + +>> "varför" in ["vARFÖr","bcde"] +true + +>> "poÄng" in ["bcde", "poäng"] +true + +>> "poäng" in ["bcde", "varför"] +false + +// SORT + +>> Concat( Sort( Split( "H C å n N P X c j F Q O r A D s L ä K t b ö g k Ö z m l x U e y w S Å f p I Y W Z J B o u d G v E T i R Ä V a q M h", " " ), Value ), Value, " " ) +"a A b B c C d D e E f F g G h H i I j J k K l L m M n N o O p P q Q r R s S t T u U v V w W x X y Y z Z å Å ä Ä ö Ö" + +>> Concat( SortByColumns( Split( "B I Ö p M Y h a A X V w z J Z t l k x G Ä e f v y T W E i Å R N Q K n C u F b H L q c r d m g o S j O P å D U ö s ä", " " ), "Value" ), Value, " " ) +"a A b B c C d D e E f F g G h H i I j J k K l L m M n N o O p P q Q r R s S t T u U v V w W x X y Y z Z å Å ä Ä ö Ö" + +// REGULAR EXPRESSIONS +// Always uses invariant in all locales, even in en-US and tr-TR (industry standard) + +>> IsMatch( "ä", "Ä", MatchOptions.IgnoreCase ) +true + +>> IsMatch( "Ä", "ä", MatchOptions.IgnoreCase ) +true + +>> IsMatch( "Ä", "A", MatchOptions.IgnoreCase ) +false + +>> IsMatch( "Ä", "a", MatchOptions.IgnoreCase ) +false + +>> IsMatch( "Ö", "ö", MatchOptions.IgnoreCase ) +true + +>> IsMatch( "ö", "Ö", MatchOptions.IgnoreCase ) +true + +>> ShowColumns( Match( "poÄÄng", "\u00c4+" ), FullMatch, StartMatch ) +{FullMatch:"ÄÄ",StartMatch:3} + +>> IsMatch( "poäng", "[Ää]", MatchOptions.Contains ) +true diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_tr-TR.txt b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_tr-TR.txt new file mode 100644 index 0000000000..ca5b7efe9b --- /dev/null +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestCases/Culture_tr-TR.txt @@ -0,0 +1,208 @@ +#SETUP: RegEx,CultureInfo("tr-TR"),PowerFxV1CompatibilityRules,ConsistentOneColumnTableResult,SupportColumnNamesAsIdentifiers + +// Four types of letter I +// Dotted Dotless +// Upper İ U+0130 I U+0049 +// Lower i U+0069 ı U+0131 + +>> Language() +"tr-TR" + +>> "İ" = UniChar( Hex2Dec( "0130") ) +true + +>> "ı" = UniChar( Hex2Dec( "0131" ) ) +true + +// UPPER, LOWER, PROPER + +>> Upper( "i" ) +"İ" + +>> Lower( "I" ) +"ı" + +>> Upper( "ı" ) +"I" + +>> Lower( "İ" ) +"i" + +>> Upper( "i" ) = UniChar( Hex2Dec( "0130") ) +true + +>> Lower( "I" ) = UniChar( Hex2Dec( "0131") ) +true + +>> Upper( "i" ) = "I" +false + +>> Lower( "I" ) = "i" +false + +>> Lower( "quit" ) = Lower( "QUIT" ) +false + +>> Lower( "quit" ) = Lower( "QUİT" ) +true + +>> Lower( "quıt" ) = Lower( "QUIT" ) +true + +>> Upper( "quit" ) = Upper( "QUIT" ) +false + +>> Upper( "quit" ) = Upper( "QUİT" ) +true + +>> Upper( "quıt" ) = Upper( "QUIT" ) +true + +>> Proper( "Iabc" ) +"Iabc" + +>> Proper( "iabc" ) +"İabc" + +>> Proper( "İabc" ) +"İabc" + +>> Proper( "ıabc" ) +"Iabc" + +// VALUE, DECIMAL, FLOAT +// Comma decimal seperator + +>> Value( "123,456" ) +123.456 + +>> Value( "123,456", "en-US" ) +123456 + +>> Decimal( "123,456" ) +123.456 + +>> Decimal( "123,456", "en-US" ) +123456 + +>> Float( "123,456" ) +123.456 + +>> Float( "123,456", "en-US" ) +123456 + +// TEXT + +>> Text( DateTime(2010,1,1,14,0,0,0), "mmm ddd yyyy AM/PM" ) +"Oca Cum 2010 ÖS" + +>> Text( DateTime(2020,1,1,2,0,0,0), "mmmm dddd yyyy AM/PM" ) +"Ocak Çarşamba 2020 ÖÖ" + +>> Text( 123456789, "#,###.00" ) +"123456789,00000" + +>> Text( 123456789, "#.###,00" ) +"123.456.789,00" + +// IN AND EXACTIN + +>> "ı" in "SIGH" +true + +>> "İ" in "sigh" +true + +>> "ı" in "SİGH" +false + +>> "İ" in "sıgh" +false + +>> "ı" exactin "SIGH" +false + +>> "İ" exactin "sigh" +false + +>> "ı" exactin "SİGH" +false + +>> "İ" exactin "sıgh" +false + +>> "sİGh" in ["sigh","bcde"] +true + +>> "siGh" in ["SİGH","bcde"] +true + +>> "sIGH" in ["sigh","bcde"] +false + +>> "sıGH" in ["bcde","sIgh"] +true + +>> "SIgh" in ["bcde","sıgh"] +true + +// SORT + +>> Sort( [ "Z", "İ", "z", "I", "J", "j", "ı", "a", "h", "i", "A", "H"], Value ) +Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"ı"},{Value:"I"},{Value:"i"},{Value:"İ"},{Value:"j"},{Value:"J"},{Value:"z"},{Value:"Z"}) + +>> SortByColumns( [ "Z", "İ", "z", "I", "J", "j", "ı", "a", "h", "i", "A", "H"], "Value" ) +Table({Value:"a"},{Value:"A"},{Value:"h"},{Value:"H"},{Value:"ı"},{Value:"I"},{Value:"i"},{Value:"İ"},{Value:"j"},{Value:"J"},{Value:"z"},{Value:"Z"}) + +>> Concat( Sort( Split( "t R Ç Ü n P U h f J M N u ı o v y g ö V L Ö d O Z r ğ E T c D ç k ü Ğ s F S j G i I C A K İ e a l H Y Ş ş B p b z m", " " ), Value ), Value, " " ) +"a A b B c C ç Ç d D e E f F g G ğ Ğ h H ı I i İ j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" + +>> Concat( SortByColumns( Split( "P Y h K i I y Ş L ı a J H s D U Z v C T n E ş ö ğ N d u m O r l z c Ö S g f p e M R F İ b V ü Ç A B t Ğ ç G j o k Ü", " " ), "Value" ), Value, " " ) +"a A b B c C ç Ç d D e E f F g G ğ Ğ h H ı I i İ j J k K l L m M n N o O ö Ö p P r R s S ş Ş t T u U ü Ü v V y Y z Z" + +// REGULAR EXPRESSIONS +// Always uses invariant in all locales, even in en-US and tr-TR (industry standard) + +// Results when using C# // Invariant tr-TR en-US + +>> IsMatch( "İ", "i", MatchOptions.IgnoreCase ) // false TRUE TRUE +false + +>> IsMatch( "i", "İ", MatchOptions.IgnoreCase ) // false TRUE TRUE +false + +>> IsMatch( "ı", "I", MatchOptions.IgnoreCase ) // false TRUE false +false + +>> IsMatch( "I", "ı", MatchOptions.IgnoreCase ) // false TRUE false +false + +>> IsMatch( "İ", "I", MatchOptions.IgnoreCase ) // false false TRUE +false + +>> IsMatch( "I", "İ", MatchOptions.IgnoreCase ) // false false TRUE +false + +>> IsMatch( "ı", "i", MatchOptions.IgnoreCase ) // false false false +false + +>> IsMatch( "i", "ı", MatchOptions.IgnoreCase ) // false false false +false + +>> IsMatch( "i", "I", MatchOptions.IgnoreCase ) // TRUE false TRUE +true + +>> IsMatch( "I", "i", MatchOptions.IgnoreCase ) // TRUE false TRUE +true + +>> IsMatch( "ı", "İ", MatchOptions.IgnoreCase ) // false false false +false + +>> IsMatch( "İ", "ı", MatchOptions.IgnoreCase ) // false false false +false + +>> ShowColumns( Match( "hiIıİİıIhi", "\u0130+" ), FullMatch, StartMatch ) +{FullMatch:"İİ",StartMatch:5} + +>> IsMatch( "Sıgh", "\u0131", MatchOptions.Contains ) +true diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestHelpers/TestRunner.cs b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestHelpers/TestRunner.cs index ef33b4c3be..4a3a3f9e50 100644 --- a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestHelpers/TestRunner.cs +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/ExpressionTestHelpers/TestRunner.cs @@ -102,6 +102,7 @@ public static Dictionary ParseSetupString(string setup) possible.Add("RegEx"); possible.Add("TimeZoneInfo"); possible.Add("TraceSetup"); + possible.Add("CultureInfo"); foreach (Match match in Regex.Matches(setup, @"(disable:)?(([\w]+|//)(\([^\)]*\))?)")) { diff --git a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/TestRunnerTests/InternalSetup.cs b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/TestRunnerTests/InternalSetup.cs index 7301c0178f..05fd375c29 100644 --- a/src/tests/Microsoft.PowerFx.Core.Tests.Shared/TestRunnerTests/InternalSetup.cs +++ b/src/tests/Microsoft.PowerFx.Core.Tests.Shared/TestRunnerTests/InternalSetup.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; +using System.Globalization; using System.Linq; using System.Text.RegularExpressions; using Microsoft.PowerFx.Core.Parser; @@ -19,6 +20,8 @@ internal class InternalSetup internal TimeZoneInfo TimeZoneInfo { get; set; } + internal CultureInfo CultureInfo { get; set; } + /// /// By default, we run expressions with a memory governor to enforce a limited amount of memory. /// When true, disable memory checks and allow expression to use as much memory as it needs. @@ -144,6 +147,23 @@ internal static InternalSetup Parse(string setupHandlerName, Features features, throw new ArgumentException("Invalid TimeZoneInfo setup!"); } } + else if (part.StartsWith("CultureInfo", StringComparison.OrdinalIgnoreCase)) + { + var m = new Regex(@"CultureInfo\(""(?[^)]+)""\)", RegexOptions.IgnoreCase).Match(part); + + if (m.Success) + { + var culture = m.Groups["culture"].Value; + + // This call will throw if the Language tag in invalid + iSetup.CultureInfo = new CultureInfo(culture); + parts.Remove(part); + } + else + { + throw new ArgumentException("Invalid TimeZoneInfo setup!"); + } + } } iSetup.HandlerNames = parts; diff --git a/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/FileExpressionEvaluationTests.cs b/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/FileExpressionEvaluationTests.cs index ed3f5348ca..2b6d8d1413 100644 --- a/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/FileExpressionEvaluationTests.cs +++ b/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/FileExpressionEvaluationTests.cs @@ -160,7 +160,7 @@ private static bool ShouldSkipDotNetVersion(ExpressionTestCase testCase, string [Fact] public void RunOne() { - var path = @"D:\repos\osp1\src\tests\Microsoft.PowerFx.Core.Tests\ExpressionTestCases\StronglyTypedEnum_TestEnums_PreV1.txt"; + var path = @"D:\repos\culture-tr\src\tests\Microsoft.PowerFx.Core.Tests.Shared\ExpressionTestCases\Culture_en-US.txt"; var line = 0; var runner = new InterpreterRunner(); diff --git a/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/Helpers/AsyncVerify.cs b/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/Helpers/AsyncVerify.cs index 0cc4d0bfc2..2c91f9112d 100644 --- a/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/Helpers/AsyncVerify.cs +++ b/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/Helpers/AsyncVerify.cs @@ -83,6 +83,11 @@ public async Task EvalAsync(RecalcEngine engine, string expr, Inte rtConfig.AddService(setup.TimeZoneInfo); } + if (setup.CultureInfo != null) + { + rtConfig.AddService(setup.CultureInfo); + } + var task = engine.EvalAsync(expr, CancellationToken.None, options: setup.Flags.ToParserOptions(new CultureInfo("en-US")), runtimeConfig: rtConfig); var i = 0; diff --git a/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/PowerFxEvaluationTests.cs b/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/PowerFxEvaluationTests.cs index 6c1251a9f3..31f6c6dd65 100644 --- a/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/PowerFxEvaluationTests.cs +++ b/src/tests/Microsoft.PowerFx.Interpreter.Tests.Shared/PowerFxEvaluationTests.cs @@ -963,6 +963,11 @@ protected override async Task RunAsyncInternal(string expr, string se runtimeConfig.AddService(iSetup.TimeZoneInfo); } + if (iSetup.CultureInfo != null) + { + runtimeConfig.AddService(iSetup.CultureInfo); + } + if (engine.TryGetByName("traceRecord", out _)) { var traceRecord = engine.GetValue("traceRecord"); diff --git a/src/tools/Repl/Program.cs b/src/tools/Repl/Program.cs index b4f4606a26..cf4487c70c 100644 --- a/src/tools/Repl/Program.cs +++ b/src/tools/Repl/Program.cs @@ -49,6 +49,8 @@ public static class ConsoleRepl private static StandardFormatter _standardFormatter; + private static CultureInfo _cultureInfo = CultureInfo.CurrentCulture; + private static bool _reset; private static RecalcEngine ReplRecalcEngine() @@ -97,6 +99,7 @@ private static RecalcEngine ReplRecalcEngine() config.AddFunction(new Option2Function()); config.AddFunction(new Run1Function()); config.AddFunction(new Run2Function()); + config.AddFunction(new Language1Function()); var optionsSet = new OptionSet("Options", DisplayNameUtility.MakeUnique(options)); @@ -135,6 +138,10 @@ public MyRepl() this.ValueFormatter = _standardFormatter; this.HelpProvider = new MyHelpProvider(); + var bsp = new BasicServiceProvider(); + bsp.AddService(_cultureInfo); + this.InnerServices = bsp; + this.AllowSetDefinitions = true; this.AllowUserDefinedFunctions = _enableUDFs; this.AllowImport = true; @@ -427,6 +434,26 @@ public FormulaValue Execute(StringValue option, BooleanValue value) } } + // set the language + private class Language1Function : ReflectionFunction + { + public Language1Function() + : base("Language", FormulaType.Void, new[] { FormulaType.String }) + { + } + + public FormulaValue Execute(StringValue lang) + { + var cultureInfo = new CultureInfo(lang.Value); + + _cultureInfo = cultureInfo; + + _reset = true; + + return FormulaValue.NewVoid(); + } + } + private class MyHelpProvider : HelpProvider { public override async Task Execute(PowerFxREPL repl, CancellationToken cancel, string context = null) @@ -499,6 +526,8 @@ Use Option( Options.FormatTable, false ) to disable table formatting. Use Option() to see the list of all options with their current value. Use Help( ""Options"" ) for more information. +Use Language( ""en-US"" ) to set culture info. + Once a formula is defined or a variable's type is defined, it cannot be changed. Use Reset() to clear all formulas and variables. ";