More precise error message for unencodable \u escapes

The surrogate code points U+D800 to U+DFFF are valid code points but are not Unicode scalar values. This commit makes the error message more accurately reflect what is actually allowed in `\u` escape sequences. From https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf: > D71 High-surrogate code point: A Unicode code point in the range U+D800 to U+DBFF. > D73 Low-surrogate code point: A Unicode code point in the range U+DC00 to U+DFFF. > > 3.9 Unicode Encoding Forms > D76 Unicode scalar value: Any Unicode code point except high-surrogate and low-surrogate code points. Related: #20270
2024-11-14 16:13:24 +00:00 · 2024-06-11 22:13:22 -07:00 · 2024-06-11 22:13:22 -07:00 · 0cef727e59
commit 0cef727e59
parent 44f4abf380
3 changed files with 4 additions and 4 deletions
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@ -728,12 +728,12 @@
        </tr>
        <tr>
            <th scope="row"><code>\u{NNNNNN}</code></th>
-          <td>hexadecimal Unicode code point UTF-8 encoded (1 or more digits)</td>
+          <td>hexadecimal Unicode scalar value UTF-8 encoded (1 or more digits)</td>
        </tr>
        </tbody>
      </table>
      </div>
-      <p>Note that the maximum valid Unicode point is {#syntax#}0x10ffff{#endsyntax#}.</p>
+      <p>Note that the maximum valid Unicode scalar value is {#syntax#}0x10ffff{#endsyntax#}.</p>
      {#header_close#}
      {#header_open|Multiline String Literals#}
      <p>
--- a/lib/std/zig/AstGen.zig
+++ b/lib/std/zig/AstGen.zig
@ -11306,7 +11306,7 @@ fn failWithStrLitError(astgen: *AstGen, err: std.zig.string_literal.Error, token
            return astgen.failOff(
                token,
                offset + @as(u32, @intCast(bad_index)),
-                "unicode escape does not correspond to a valid codepoint",
+                "unicode escape does not correspond to a valid unicode scalar value",
                .{},
            );
        },
--- a/src/Package/Manifest.zig
+++ b/src/Package/Manifest.zig
@ -522,7 +522,7 @@ const Parse = struct {
                try p.appendErrorOff(
                    token,
                    offset + @as(u32, @intCast(bad_index)),
-                    "unicode escape does not correspond to a valid codepoint",
+                    "unicode escape does not correspond to a valid unicode scalar value",
                    .{},
                );
            },