【SQL Server】通过位运算将 Unicode 码点转换为正确的 UTF-8 字节序列

通过位运算将 Unicode 码点转换为正确的 UTF-8 字节序列。例如:您好→ %E4%BD%A0%E5%A5%BD

 1 CREATE FUNCTION [dbo].[UrlEncode] (@input NVARCHAR(MAX))
 2 RETURNS VARCHAR(MAX)
 3 AS
 4 BEGIN
 5     DECLARE @result VARCHAR(MAX) = '';
 6     DECLARE @i INT = 1;
 7     DECLARE @char NCHAR(1);
 8     DECLARE @unicode INT;
 9     
10     WHILE @i <= LEN(@input)
11     BEGIN
12         SET @char = SUBSTRING(@input, @i, 1);
13         SET @unicode = UNICODE(@char);
14         
15         -- 不需要编码的字符
16         IF (@unicode BETWEEN 48 AND 57) OR  -- 0-9
17            (@unicode BETWEEN 65 AND 90) OR  -- A-Z
18            (@unicode BETWEEN 97 AND 122) OR -- a-z
19            @unicode IN (45, 46, 95, 126)   -- - . _ ~
20         BEGIN
21             SET @result = @result + @char;
22         END
23         ELSE
24         BEGIN
25             -- 手动生成UTF-8字节
26             DECLARE @utf8Bytes VARBINARY(4);
27             
28             IF @unicode <= 0x7F
29                 -- 单字节 (0xxxxxxx)
30                 SET @utf8Bytes = CAST(@unicode AS VARBINARY(1));
31             ELSE IF @unicode <= 0x7FF
32                 -- 双字节 (110xxxxx 10xxxxxx)
33                 SET @utf8Bytes = CAST(
34                     ((@unicode / 64) + 192) * 256 + 
35                     (@unicode % 64 + 128) 
36                 AS VARBINARY(2));
37             ELSE IF @unicode <= 0xFFFF
38                 -- 三字节 (1110xxxx 10xxxxxx 10xxxxxx)
39                 SET @utf8Bytes = CAST(
40                     ((@unicode / 4096) + 224) * 65536 + 
41                     ((@unicode / 64) % 64 + 128) * 256 + 
42                     (@unicode % 64 + 128)
43                 AS VARBINARY(3));
44             ELSE
45                 -- 四字节 (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
46                 SET @utf8Bytes = CAST(
47                     ((@unicode / 262144) + 240) * 16777216 + 
48                     ((@unicode / 4096) % 64 + 128) * 65536 + 
49                     ((@unicode / 64) % 64 + 128) * 256 + 
50                     (@unicode % 64 + 128)
51                 AS VARBINARY(4));
52             
53             -- 编码每个UTF-8字节
54             DECLARE @j INT = 1;
55             WHILE @j <= DATALENGTH(@utf8Bytes)
56             BEGIN
57                 SET @result = @result + '%' + UPPER(SUBSTRING(master.dbo.fn_varbintohexstr(SUBSTRING(@utf8Bytes, @j, 1)), 3, 2));
58                 SET @j = @j + 1;
59             END
60         END
61         
62         SET @i = @i + 1;
63     END
64     
65     RETURN @result;
66 END

 输出结果验证L

1 SELECT dbo.UrlEncode('你好') AS Encoded;
2 -- 输出: %E4%BD%A0%E5%A5%BD
3 
4 SELECT dbo.UrlEncode('A-Z 0-9 !@#$%^&*()_+-=') AS Encoded;
5 -- 输出: A-Z%200-9%20%21%40%23%24%25%5E%26%2A%28%29_%2B-%3D
6 
7 SELECT dbo.UrlEncode('👋😊') AS Encoded;
8 -- 输出: %F0%9F%91%8B%F0%9F%98%8A

 

posted @ 2025-05-18 18:22  陆陆无为而治者  阅读(33)  评论(0)    收藏  举报