Skip to content

Commit e41b79f

Browse files
CopilotstephentoubjkotastarekghMihaZupan
authored
Add IdnMapping Span-based APIs (TryGetAscii/TryGetUnicode) (#123593)
Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: stephentoub <[email protected]> Co-authored-by: jkotas <[email protected]> Co-authored-by: tarekgh <[email protected]> Co-authored-by: MihaZupan <[email protected]> Co-authored-by: Miha Zupan <[email protected]> Co-authored-by: Stephen Toub <[email protected]>
1 parent 062177e commit e41b79f

File tree

10 files changed

+698
-118
lines changed

10 files changed

+698
-118
lines changed

src/libraries/Common/src/Interop/Interop.Idna.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4+
using System;
45
using System.Runtime.InteropServices;
56

67
internal static partial class Interop
@@ -11,9 +12,9 @@ internal static partial class Globalization
1112
internal const int UseStd3AsciiRules = 0x2;
1213

1314
[LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ToAscii", StringMarshalling = StringMarshalling.Utf16)]
14-
internal static unsafe partial int ToAscii(uint flags, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity);
15+
internal static partial int ToAscii(uint flags, ReadOnlySpan<char> src, int srcLen, Span<char> dstBuffer, int dstBufferCapacity);
1516

1617
[LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ToUnicode", StringMarshalling = StringMarshalling.Utf16)]
17-
internal static unsafe partial int ToUnicode(uint flags, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity);
18+
internal static partial int ToUnicode(uint flags, ReadOnlySpan<char> src, int srcLen, Span<char> dstBuffer, int dstBufferCapacity);
1819
}
1920
}

src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Idna.cs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4+
using System;
45
using System.Runtime.InteropServices;
56

67
internal static partial class Interop
@@ -12,19 +13,19 @@ internal static partial class Normaliz
1213
//
1314

1415
[LibraryImport("Normaliz.dll", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)]
15-
internal static unsafe partial int IdnToAscii(
16+
internal static partial int IdnToAscii(
1617
uint dwFlags,
17-
char* lpUnicodeCharStr,
18+
ReadOnlySpan<char> lpUnicodeCharStr,
1819
int cchUnicodeChar,
19-
char* lpASCIICharStr,
20+
Span<char> lpASCIICharStr,
2021
int cchASCIIChar);
2122

2223
[LibraryImport("Normaliz.dll", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)]
23-
internal static unsafe partial int IdnToUnicode(
24+
internal static partial int IdnToUnicode(
2425
uint dwFlags,
25-
char* lpASCIICharStr,
26+
ReadOnlySpan<char> lpASCIICharStr,
2627
int cchASCIIChar,
27-
char* lpUnicodeCharStr,
28+
Span<char> lpUnicodeCharStr,
2829
int cchUnicodeChar);
2930

3031
internal const int IDN_ALLOW_UNASSIGNED = 0x1;

src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ internal static StrongBidiCategory GetBidiCategory(string s, int index)
4848
ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index);
4949
}
5050

51-
return GetBidiCategoryNoBoundsChecks((uint)GetCodePointFromString(s, index));
51+
return GetBidiCategory((ReadOnlySpan<char>)s, index);
5252
}
5353

5454
internal static StrongBidiCategory GetBidiCategory(StringBuilder s, int index)
@@ -91,6 +91,33 @@ private static StrongBidiCategory GetBidiCategoryNoBoundsChecks(uint codePoint)
9191
return bidiCategory;
9292
}
9393

94+
internal static StrongBidiCategory GetBidiCategory(ReadOnlySpan<char> s, int index)
95+
{
96+
Debug.Assert(index >= 0 && index < s.Length, "index < s.Length");
97+
98+
// The logic below follows Table 3-5 in the Unicode Standard, Sec. 3.9.
99+
// First char (high surrogate) = 110110wwwwxxxxxx
100+
// Second char (low surrogate) = 110111xxxxxxxxxx
101+
102+
int c = (int)s[index];
103+
if (index < s.Length - 1)
104+
{
105+
int temp1 = c - HIGH_SURROGATE_START; // temp1 = 000000wwwwxxxxxx
106+
if ((uint)temp1 <= HIGH_SURROGATE_RANGE)
107+
{
108+
int temp2 = (int)s[index + 1] - LOW_SURROGATE_START; // temp2 = 000000xxxxxxxxxx
109+
if ((uint)temp2 <= HIGH_SURROGATE_RANGE)
110+
{
111+
// |--------temp1--||-temp2--|
112+
// 00000uuuuuuxxxxxxxxxxxxxxxx (where uuuuu = wwww + 1)
113+
c = (temp1 << 10) + temp2 + UNICODE_PLANE01_START;
114+
}
115+
}
116+
}
117+
118+
return GetBidiCategoryNoBoundsChecks((uint)c);
119+
}
120+
94121
/*
95122
* GetDecimalDigitValue
96123
* ====================
@@ -115,7 +142,7 @@ public static int GetDecimalDigitValue(string s, int index)
115142
ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index);
116143
}
117144

118-
return GetDecimalDigitValueInternalNoBoundsCheck((uint)GetCodePointFromString(s, index));
145+
return GetDecimalDigitValueInternalNoBoundsCheck((uint)GetCodePoint(s, index));
119146
}
120147

121148
private static int GetDecimalDigitValueInternalNoBoundsCheck(uint codePoint)
@@ -149,7 +176,7 @@ public static int GetDigitValue(string s, int index)
149176
ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index);
150177
}
151178

152-
return GetDigitValueInternalNoBoundsCheck((uint)GetCodePointFromString(s, index));
179+
return GetDigitValueInternalNoBoundsCheck((uint)GetCodePoint(s, index));
153180
}
154181

155182
private static int GetDigitValueInternalNoBoundsCheck(uint codePoint)
@@ -228,7 +255,7 @@ public static double GetNumericValue(string s, int index)
228255
}
229256

230257
[MethodImpl(MethodImplOptions.AggressiveInlining)]
231-
internal static double GetNumericValueInternal(string s, int index) => GetNumericValueNoBoundsCheck((uint)GetCodePointFromString(s, index));
258+
internal static double GetNumericValueInternal(string s, int index) => GetNumericValueNoBoundsCheck((uint)GetCodePoint(s, index));
232259

233260
private static double GetNumericValueNoBoundsCheck(uint codePoint)
234261
{
@@ -365,7 +392,7 @@ internal static UnicodeCategory GetUnicodeCategoryInternal(string value, int ind
365392
Debug.Assert(value != null, "value can not be null");
366393
Debug.Assert(index < value.Length);
367394

368-
return GetUnicodeCategoryNoBoundsChecks((uint)GetCodePointFromString(value, index));
395+
return GetUnicodeCategoryNoBoundsChecks((uint)GetCodePoint(value, index));
369396
}
370397

371398
/// <summary>
@@ -378,7 +405,7 @@ internal static UnicodeCategory GetUnicodeCategoryInternal(string str, int index
378405
Debug.Assert(str.Length > 0);
379406
Debug.Assert(index >= 0 && index < str.Length);
380407

381-
uint codePoint = (uint)GetCodePointFromString(str, index);
408+
uint codePoint = (uint)GetCodePoint(str, index);
382409
UnicodeDebug.AssertIsValidCodePoint(codePoint);
383410

384411
charLength = (codePoint >= UNICODE_PLANE01_START) ? 2 /* surrogate pair */ : 1 /* BMP char */;
@@ -406,9 +433,8 @@ private static UnicodeCategory GetUnicodeCategoryNoBoundsChecks(uint codePoint)
406433
/// WARNING: since it doesn't throw an exception it CAN return a value
407434
/// in the surrogate range D800-DFFF, which is not a legal scalar value.
408435
/// </summary>
409-
private static int GetCodePointFromString(string s, int index)
436+
private static int GetCodePoint(ReadOnlySpan<char> s, int index)
410437
{
411-
Debug.Assert(s != null);
412438
Debug.Assert((uint)index < (uint)s.Length, "index < s.Length");
413439

414440
int codePoint = 0;

src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs

Lines changed: 70 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@ namespace System.Globalization
77
{
88
public sealed partial class IdnMapping
99
{
10-
private unsafe string IcuGetAsciiCore(string unicodeString, char* unicode, int count)
10+
private string IcuGetAsciiCore(string unicodeString, int index, int count)
1111
{
1212
Debug.Assert(!GlobalizationMode.Invariant);
1313
Debug.Assert(!GlobalizationMode.UseNls);
14-
Debug.Assert(unicodeString != null && unicodeString.Length >= count);
1514

15+
ReadOnlySpan<char> unicode = unicodeString.AsSpan(index, count);
1616
uint flags = IcuFlags;
17-
CheckInvalidIdnCharacters(unicode, count, flags, nameof(unicode));
17+
CheckInvalidIdnCharacters(unicode, flags, nameof(unicode));
1818

1919
const int StackallocThreshold = 512;
2020
// Each unicode character is represented by up to 3 ASCII chars
@@ -23,85 +23,126 @@ private unsafe string IcuGetAsciiCore(string unicodeString, char* unicode, int c
2323
int actualLength;
2424
if ((uint)estimatedLength < StackallocThreshold)
2525
{
26-
char* outputStack = stackalloc char[estimatedLength];
26+
Span<char> outputStack = stackalloc char[estimatedLength];
2727
actualLength = Interop.Globalization.ToAscii(flags, unicode, count, outputStack, estimatedLength);
2828
if (actualLength > 0 && actualLength <= estimatedLength)
2929
{
30-
return GetStringForOutput(unicodeString, unicode, count, outputStack, actualLength);
30+
return GetStringForOutput(unicodeString, unicode, outputStack.Slice(0, actualLength));
3131
}
3232
}
3333
else
3434
{
35-
actualLength = Interop.Globalization.ToAscii(flags, unicode, count, null, 0);
35+
actualLength = Interop.Globalization.ToAscii(flags, unicode, count, Span<char>.Empty, 0);
3636
}
3737
if (actualLength == 0)
3838
{
3939
throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode));
4040
}
4141

4242
char[] outputHeap = new char[actualLength];
43-
fixed (char* pOutputHeap = &outputHeap[0])
43+
actualLength = Interop.Globalization.ToAscii(flags, unicode, count, outputHeap, actualLength);
44+
if (actualLength == 0 || actualLength > outputHeap.Length)
4445
{
45-
actualLength = Interop.Globalization.ToAscii(flags, unicode, count, pOutputHeap, actualLength);
46-
if (actualLength == 0 || actualLength > outputHeap.Length)
46+
throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode));
47+
}
48+
49+
return GetStringForOutput(unicodeString, unicode, outputHeap.AsSpan(0, actualLength));
50+
}
51+
52+
private bool IcuTryGetAsciiCore(ReadOnlySpan<char> unicode, Span<char> destination, out int charsWritten)
53+
{
54+
Debug.Assert(!GlobalizationMode.Invariant);
55+
Debug.Assert(!GlobalizationMode.UseNls);
56+
57+
uint flags = IcuFlags;
58+
CheckInvalidIdnCharacters(unicode, flags, nameof(unicode));
59+
60+
int actualLength = Interop.Globalization.ToAscii(flags, unicode, unicode.Length, destination, destination.Length);
61+
62+
if (actualLength <= destination.Length)
63+
{
64+
if (actualLength == 0)
4765
{
4866
throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode));
4967
}
50-
return GetStringForOutput(unicodeString, unicode, count, pOutputHeap, actualLength);
68+
69+
charsWritten = actualLength;
70+
return true;
5171
}
72+
73+
charsWritten = 0;
74+
return false;
5275
}
5376

54-
private unsafe string IcuGetUnicodeCore(string asciiString, char* ascii, int count)
77+
private string IcuGetUnicodeCore(string asciiString, int index, int count)
5578
{
5679
Debug.Assert(!GlobalizationMode.Invariant);
5780
Debug.Assert(!GlobalizationMode.UseNls);
58-
Debug.Assert(asciiString != null && asciiString.Length >= count);
5981

82+
ReadOnlySpan<char> ascii = asciiString.AsSpan(index, count);
6083
uint flags = IcuFlags;
61-
CheckInvalidIdnCharacters(ascii, count, flags, nameof(ascii));
84+
CheckInvalidIdnCharacters(ascii, flags, nameof(ascii));
6285

6386
const int StackAllocThreshold = 512;
6487
if ((uint)count < StackAllocThreshold)
6588
{
66-
char* output = stackalloc char[count];
67-
return IcuGetUnicodeCore(asciiString, ascii, count, flags, output, count, reattempt: true);
89+
Span<char> output = stackalloc char[count];
90+
return IcuGetUnicodeCore(asciiString, ascii, flags, output, reattempt: true);
6891
}
6992
else
7093
{
7194
char[] output = new char[count];
72-
fixed (char* pOutput = &output[0])
73-
{
74-
return IcuGetUnicodeCore(asciiString, ascii, count, flags, pOutput, count, reattempt: true);
75-
}
95+
return IcuGetUnicodeCore(asciiString, ascii, flags, output, reattempt: true);
7696
}
7797
}
7898

79-
private unsafe string IcuGetUnicodeCore(string asciiString, char* ascii, int count, uint flags, char* output, int outputLength, bool reattempt)
99+
private static string IcuGetUnicodeCore(string asciiString, ReadOnlySpan<char> ascii, uint flags, Span<char> output, bool reattempt)
80100
{
81101
Debug.Assert(!GlobalizationMode.Invariant);
82102
Debug.Assert(!GlobalizationMode.UseNls);
83-
Debug.Assert(asciiString != null && asciiString.Length >= count);
84103

85-
int realLen = Interop.Globalization.ToUnicode(flags, ascii, count, output, outputLength);
104+
int realLen = Interop.Globalization.ToUnicode(flags, ascii, ascii.Length, output, output.Length);
86105

87106
if (realLen == 0)
88107
{
89108
throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii));
90109
}
91-
else if (realLen <= outputLength)
110+
else if (realLen <= output.Length)
92111
{
93-
return GetStringForOutput(asciiString, ascii, count, output, realLen);
112+
return GetStringForOutput(asciiString, ascii, output.Slice(0, realLen));
94113
}
95114
else if (reattempt)
96115
{
97116
char[] newOutput = new char[realLen];
98-
fixed (char* pNewOutput = newOutput)
117+
return IcuGetUnicodeCore(asciiString, ascii, flags, newOutput, reattempt: false);
118+
}
119+
120+
throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii));
121+
}
122+
123+
private bool IcuTryGetUnicodeCore(ReadOnlySpan<char> ascii, Span<char> destination, out int charsWritten)
124+
{
125+
Debug.Assert(!GlobalizationMode.Invariant);
126+
Debug.Assert(!GlobalizationMode.UseNls);
127+
128+
uint flags = IcuFlags;
129+
CheckInvalidIdnCharacters(ascii, flags, nameof(ascii));
130+
131+
int actualLength = Interop.Globalization.ToUnicode(flags, ascii, ascii.Length, destination, destination.Length);
132+
133+
if (actualLength <= destination.Length)
134+
{
135+
if (actualLength == 0)
99136
{
100-
return IcuGetUnicodeCore(asciiString, ascii, count, flags, pNewOutput, realLen, reattempt: false);
137+
throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii));
101138
}
139+
140+
charsWritten = actualLength;
141+
return true;
102142
}
103143

104-
throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii));
144+
charsWritten = 0;
145+
return false;
105146
}
106147

107148
private uint IcuFlags
@@ -122,11 +163,11 @@ private uint IcuFlags
122163
/// To match Windows behavior, we walk the string ourselves looking for these
123164
/// bad characters so we can continue to throw ArgumentException in these cases.
124165
/// </summary>
125-
private static unsafe void CheckInvalidIdnCharacters(char* s, int count, uint flags, string paramName)
166+
private static void CheckInvalidIdnCharacters(ReadOnlySpan<char> s, uint flags, string paramName)
126167
{
127168
if ((flags & Interop.Globalization.UseStd3AsciiRules) == 0)
128169
{
129-
for (int i = 0; i < count; i++)
170+
for (int i = 0; i < s.Length; i++)
130171
{
131172
char c = s[i];
132173

0 commit comments

Comments
 (0)