|
参考文档:http://www.linuxforum.net/books/UTF-8-Unicode.html
代码如下: =========================================== 复制代码 代码如下: <script language="VBScript"> 'http://www.linuxforum.net/books/UTF-8-Unicode.html Public Function UTF8EncodeChar(z) Dim c : c=AscW(z)'取UNICODE编码 if c>0 And c<256 Then'Asc编码直接返回 UTF8EncodeChar=z Exit Function End If If c<0 Then c=c + &H10000&'VBScript的Integer溢出,加上 Dim k : k=CLng(c)'备份一个编码,后面判断要用 Dim b() Dim i : i=0 While c>&H0&'将编码按照6位一组,分组存到字节数组 b 中 ReDim Preserve b(i) b(i)=CByte(c And &H3F&) c=c \ &H40& i=i+1 Wend If UBound(b)>0 Then '如果分开的6位组不止一个,除最高一组外,全部加上二进制10000000 For i=0 To UBound(b)-1 b(i)=b(i) + &H80 Next End If i=UBound(b)'根据字符的UNICODE编码范围,给最高组加上前缀 If k<=CLng(&H7F&) Then b(i) = b(i) + 0 ElseIf k<=CLng(&H7FF&) Then b(i) = b(i) + &HC0 ElseIf k<=Clng(&HFFFF&) Then b(i) = b(i) + &HE0 ElseIf k<=CLng(&H1FFFFF&) Then b(i) = b(i) + &HF0 ElseIf k<=CLng(&H3FFFFFF&) Then b(i) = b(i) + &HF8 Else b(i) = b(i) + &HFC End If UTF8EncodeChar="" For i=UBound(b) To 0 Step -1'将分组转换成URL编码 UTF8EncodeChar=UTF8EncodeChar & "%" & Right("00" & Hex(b(i)),2) Next Erase b End Function Public Function UTF8EncodeString(s) Dim i,l,c : l=Len(s) For i=1 To l UTF8EncodeString=UTF8EncodeString & UTF8EncodeChar(Mid(s,i,1)) Next End Function MsgBox UTF8EncodeString("圪圪 eglic ") </script>
测试方法: http://www.google.com/search?hl=zh-CN&newwindow=1&rls=GGLG%2CGGLG%3A2006-15%2CGGLG%3Azh-CN&q=你的编码 复制代码 代码如下: function revertUTF8(szInput) { var x,wch,wch1,wch2,uch="",szRet=""; for (x=0; x<szInput.length; x++) { if (szInput.charAt(x)=="%") { wch =parseInt(szInput.charAt(++x) + szInput.charAt(++x),16); if (!wch) {break;} if (!(wch & 0x80)) { wch = wch; } else if (!(wch & 0x20)) { x++; wch1 = parseInt(szInput.charAt(++x) + szInput.charAt(++x),16); wch = (wch & 0x1F)<< 6; wch1 = wch1 & 0x3F; wch = wch + wch1; } else { x++; wch1 = parseInt(szInput.charAt(++x) + szInput.charAt(++x),16); x++; wch2 = parseInt(szInput.charAt(++x) + szInput.charAt(++x),16); wch = (wch & 0x0F)<< 12; wch1 = (wch1 & 0x3F)<< 6; wch2 = (wch2 & 0x3F); wch = wch + wch1 + wch2; } szRet += String.fromCharCode(wch); } else { szRet += szInput.charAt(x); } } return(szRet); }
function u2utf8($c) { /*for($i=0;$i<count($c);$i++)*/ $str=""; if ($c < 0x80) { $str.=$c; } else if ($c < 0x800) { $str.=chr(0xC0 | $c>>6); $str.=chr(0x80 | $c & 0x3F); } else if ($c < 0x10000) { $str.=chr(0xE0 | $c>>12); $str.=chr(0x80 | $c>>6 & 0x3F); $str.=chr(0x80 | $c & 0x3F); } else if ($c < 0x200000) { $str.=chr(0xF0 | $c>>18); $str.=chr(0x80 | $c>>12 & 0x3F); $str.=chr(0x80 | $c>>6 & 0x3F); $str.=chr(0x80 | $c & 0x3F); } return $str; }
复制代码 代码如下: 'UTF8 URLEncode Public Function URLEncodeUTF8(ByVal s) Dim i, k Dim sl Dim c Dim uni Dim tp, h1, h2, h3 sl = Len(s) tp = "" k = 0 For i = 1 To sl c = Mid(s, i, 1) uni = AscW(c) If uni < 0 Then uni = uni + 65536 If uni < 128 Then tp = tp & Chr(c) k = k + 1 ElseIf uni < 2048 Then k = k + 2 h2 = "%" & Hex(&H80 XOr (uni And &H3F)) uni = uni \ (2^6) h1 = "%" & Hex(&HC0 XOr (uni And &H1F)) tp = tp & h1 & h2 Else h3 = "%" & Hex(&H80 XOr (uni And &H3F)) uni = uni \ (2^6) h2 = "%" & Hex(&H80 XOr (uni And &H3F)) uni = uni \ (2^6) h1 = "%" & Hex(&HE0 XOr (uni And &H0F)) tp = tp & h1 & h2 & h3 End If Next URLEncodeUTF8 = tp End Function
'A-Fa-f0-9 Byte Public Function isxdigit(c) isxdigit = CBool((c>=48 And c<=57) Or (c>=65 And c<=70) Or (c>=97 And c<=102)) End Function
Public Function isascii(c) isascii = CBool(c>0 And c<128) End Function
'判断是否是UTF8字节 Public Function IsUTF8Body(ByVal u) IsUTF8Body = CBool(u>=&H80 And u<=&HBF) End Function
'判断有几个UTF8字节 Private Function UTF8Byte(ByVal u) If u > &H00 And u <= &H7F Then UTF8Byte = 1 ElseIf u >= &HC0 And u <= &HDF Then UTF8Byte = 2 ElseIf u >= &HE0 And u <= &HEF Then UTF8Byte = 3 ElseIf u >= &HF0 And u <= &HF7 Then UTF8Byte = 4 ElseIf u >= &HF8 And u <= &HFB Then UTF8Byte = 5 ElseIf u >= &HFC And u <= &HFD Then UTF8Byte = 6 Else UTF8Byte = 0 End If End Function
'判断三个连续字节是不是UTF8字符 Private Function UTF8Test(ByVal u1, ByVal u2, ByVal u3) UTF8Test = False If CBool(u1>=&HC0 And u1<=&HFD) Then UTF8Test = CBool(IsUTF8Body(u2) And IsUTF8Body(u3)) End If End Function
Private Function ishex(s) ishex = False If Len(s)<2 Then Exit Function If isxdigit(Asc(Mid(s, 1, 1)))=False Then Exit Function If isxdigit(Asc(Mid(s, 2, 1)))=False Then Exit Function ishex = True End Function
Private Function isescape(s) isescape = False If Len(s)<5 Then Exit Function If UCase(Mid(s, 1, 1)) <> "U" Then Exit Function If isxdigit(Asc(Mid(s, 2, 1)))=False Then Exit Function If isxdigit(Asc(Mid(s, 3, 1)))=False Then Exit Function If isxdigit(Asc(Mid(s, 4, 1)))=False Then Exit Function If isxdigit(Asc(Mid(s, 5, 1)))=False Then Exit Function isescape = True End Function
Private Function AscX(s) AscX = CInt("&H" & Mid(s, 1, 2)) End Function
'URLDecode 完全版 '支持Server.URLEncode,UTF8 URLEncode,Escape 加密的字符串 Public Function URLDecode(s) Dim tp Dim i Dim tl Dim pp Dim a, b, c Dim h URLDecode = "" tp = Split(Replace(s, "+", " "), "%") tl = UBound(tp) If tl = -1 Then Exit Function pp = tp(0) For i = 1 To tl If isescape(tp(i)) Then pp = pp & ChrW("&H" & Mid(tp(i), 2, 4)) & Mid(tp(i), 6) ElseIf ishex(tp(i))=False Then pp = pp & tp(i) Else a = AscX(tp(i)) If isascii(a)=False And Len(tp(i))=2 Then If (i+1)>tl Then Exit For b = AscX(tp(i+1)) If (i+2)>tl Then pp = pp & Chr(a*2^8 Or b) & Mid(tp(i+1), 3) i = i + 1 Else c = AscX(tp(i+2)) If UTF8Byte(a)=3 And UTF8Test(a,b,c)=True Then h = (a And &H0F) * 2 ^12 Or (b And &H3F) * 2 ^ 6 Or (c And &H3F) If h<0 Then h = h + 65536 pp = pp & ChrW(h) & Mid(tp(i+2), 3) i = i + 2 Else pp = pp & Chr(a*2^8 Or b) & Mid(tp(i+1), 3) i = i + 1 End If End If ElseIf isascii(a)=False Then pp = pp & tp(i) Else pp = pp & Chr(a) & Mid(tp(i), 3) End If End If Next URLDecode = pp End Function
URLEncodeUTF8只写了多字节的加密,没有写ascii字节的,其实也没必要写这个,有个Server.URLEncode可以用,改成js版本的倒还有用武之地 js的版本,里面有个现成的函数 <script language=javascript> alert(encodeURI("圪圪 eglic ")) </script> 看上边有人说看不懂,我来解释下吧。梅子的第一个函数是把多字节unicode编码转成我们通常正常的编码,在JS中如果偷懒,decodeURI也可实现这个功能。不过,看到这个具体是怎么实现的,感觉好爽,回家研究去啦。复制代码 代码如下: <script language=javascript> function revertUTF8(szInput) { var x,wch,wch1,wch2,uch="",szRet=""; for (x=0; x<szInput.length; x++) { if (szInput.charAt(x)=="%") { wch =parseInt(szInput.charAt(++x) + szInput.charAt(++x),16); if (!wch) {break;} if (!(wch & 0x80)) { wch = wch; } else if (!(wch & 0x20)) { x++; wch1 = parseInt(szInput.charAt(++x) + szInput.charAt(++x),16); wch = (wch & 0x1F)<< 6; wch1 = wch1 & 0x3F; wch = wch + wch1; } else { x++; wch1 = parseInt(szInput.charAt(++x) + szInput.charAt(++x),16); x++; wch2 = parseInt(szInput.charAt(++x) + szInput.charAt(++x),16); wch = (wch & 0x0F)<< 12; wch1 = (wch1 & 0x3F)<< 6; wch2 = (wch2 & 0x3F); wch = wch + wch1 + wch2; } szRet += String.fromCharCode(wch); } else { szRet += szInput.charAt(x); } } return(szRet); } alert(revertUTF8("%E5%9C%AA%E5%9C%AA%20eglic%20 ")) alert(decodeURI("%E5%9C%AA%E5%9C%AA%20eglic%20 ")) </script>
参考文档:http://www.linuxforum.net/books/UTF-8-Unicode.html
代码如下: =========================================== 复制代码 代码如下: <script language="VBScript"> 'http://www.linuxforum.net/books/UTF-8-Unicode.html Public Function UTF8EncodeChar(z) Dim c : c=AscW(z)'取UNICODE编码 if c>0 And c<256 Then'Asc编码直接返回 UTF8EncodeChar=z Exit Function End If If c<0 Then c=c + &H10000&'VBScript的Integer溢出,加上 Dim k : k=CLng(c)'备份一个编码,后面判断要用 Dim b() Dim i : i=0 While c>&H0&'将编码按照6位一组,分组存到字节数组 b 中 ReDim Preserve b(i) b(i)=CByte(c And &H3F&) c=c \ &H40& i=i+1 Wend If UBound(b)>0 Then '如果分开的6位组不止一个,除最高一组外,全部加上二进制10000000 For i=0 To UBound(b)-1 b(i)=b(i) + &H80 Next End If i=UBound(b)'根据字符的UNICODE编码范围,给最高组加上前缀 If k<=CLng(&H7F&) Then b(i) = b(i) + 0 ElseIf k<=CLng(&H7FF&) Then b(i) = b(i) + &HC0 ElseIf k<=Clng(&HFFFF&) Then b(i) = b(i) + &HE0 ElseIf k<=CLng(&H1FFFFF&) Then b(i) = b(i) + &HF0 ElseIf k<=CLng(&H3FFFFFF&) Then b(i) = b(i) + &HF8 Else b(i) = b(i) + &HFC End If UTF8EncodeChar="" For i=UBound(b) To 0 Step -1'将分组转换成URL编码 UTF8EncodeChar=UTF8EncodeChar & "%" & Right("00" & Hex(b(i)),2) Next Erase b End Function Public Function UTF8EncodeString(s) Dim i,l,c : l=Len(s) For i=1 To l UTF8EncodeString=UTF8EncodeString & UTF8EncodeChar(Mid(s,i,1)) Next End Function MsgBox UTF8EncodeString("圪圪 eglic ") </script>
测试方法: http://www.google.com/search?hl=zh-CN&newwindow=1&rls=GGLG%2CGGLG%3A2006-15%2CGGLG%3Azh-CN&q=你的编码 复制代码 代码如下: function revertUTF8(szInput) { var x,wch,wch1,wch2,uch="",szRet=""; for (x=0; x<szInput.length; x++) { if (szInput.charAt(x)=="%") { wch =parseInt(szInput.charAt(++x) + szInput.charAt(++x),16); if (!wch) {break;} if (!(wch & 0x80)) { wch = wch; } else if (!(wch & 0x20)) { x++; wch1 = parseInt(szInput.charAt(++x) + szInput.charAt(++x),16); wch = (wch & 0x1F)<< 6; wch1 = wch1 & 0x3F; wch = wch + wch1; } else { x++; wch1 = parseInt(szInput.charAt(++x) + szInput.charAt(++x),16); x++; wch2 = parseInt(szInput.charAt(++x) + szInput.charAt(++x),16); wch = (wch & 0x0F)<< 12; wch1 = (wch1 & 0x3F)<< 6; wch2 = (wch2 & 0x3F); wch = wch + wch1 + wch2; } szRet += String.fromCharCode(wch); } else { szRet += szInput.charAt(x); } } return(szRet); }
function u2utf8($c) { /*for($i=0;$i<count($c);$i++)*/ $str=""; if ($c < 0x80) { $str.=$c; } else if ($c < 0x800) { $str.=chr(0xC0 | $c>>6); $str.=chr(0x80 | $c & 0x3F); } else if ($c < 0x10000) { $str.=chr(0xE0 | $c>>12); $str.=chr(0x80 | $c>>6 & 0x3F); $str.=chr(0x80 | $c & 0x3F); } else if ($c < 0x200000) { $str.=chr(0xF0 | $c>>18); $str.=chr(0x80 | $c>>12 & 0x3F); $str.=chr(0x80 | $c>>6 & 0x3F); $str.=chr(0x80 | $c & 0x3F); } return $str; }
复制代码 代码如下: 'UTF8 URLEncode Public Function URLEncodeUTF8(ByVal s) Dim i, k Dim sl Dim c Dim uni Dim tp, h1, h2, h3 sl = Len(s) tp = "" k = 0 For i = 1 To sl c = Mid(s, i, 1) uni = AscW(c) If uni < 0 Then uni = uni + 65536 If uni < 128 Then tp = tp & Chr(c) k = k + 1 ElseIf uni < 2048 Then k = k + 2 h2 = "%" & Hex(&H80 XOr (uni And &H3F)) uni = uni \ (2^6) h1 = "%" & Hex(&HC0 XOr (uni And &H1F)) tp = tp & h1 & h2 Else h3 = "%" & Hex(&H80 XOr (uni And &H3F)) uni = uni \ (2^6) h2 = "%" & Hex(&H80 XOr (uni And &H3F)) uni = uni \ (2^6) h1 = "%" & Hex(&HE0 XOr (uni And &H0F)) tp = tp & h1 & h2 & h3 End If Next URLEncodeUTF8 = tp End Function
'A-Fa-f0-9 Byte Public Function isxdigit(c) isxdigit = CBool((c>=48 And c<=57) Or (c>=65 And c<=70) Or (c>=97 And c<=102)) End Function
Public Function isascii(c) isascii = CBool(c>0 And c<128) End Function
'判断是否是UTF8字节 Public Function IsUTF8Body(ByVal u) IsUTF8Body = CBool(u>=&H80 And u<=&HBF) End Function
'判断有几个UTF8字节 Private Function UTF8Byte(ByVal u) If u > &H00 And u <= &H7F Then UTF8Byte = 1 ElseIf u >= &HC0 And u <= &HDF Then UTF8Byte = 2 ElseIf u >= &HE0 And u <= &HEF Then UTF8Byte = 3 ElseIf u >= &HF0 And u <= &HF7 Then UTF8Byte = 4 ElseIf u >= &HF8 And u <= &HFB Then UTF8Byte = 5 ElseIf u >= &HFC And u <= &HFD Then UTF8Byte = 6 Else UTF8Byte = 0 End If End Function
'判断三个连续字节是不是UTF8字符 Private Function UTF8Test(ByVal u1, ByVal u2, ByVal u3) UTF8Test = False If CBool(u1>=&HC0 And u1<=&HFD) Then UTF8Test = CBool(IsUTF8Body(u2) And IsUTF8Body(u3)) End If End Function
Private Function ishex(s) ishex = False If Len(s)<2 Then Exit Function If isxdigit(Asc(Mid(s, 1, 1)))=False Then Exit Function If isxdigit(Asc(Mid(s, 2, 1)))=False Then Exit Function ishex = True End Function
Private Function isescape(s) isescape = False If Len(s)<5 Then Exit Function If UCase(Mid(s, 1, 1)) <> "U" Then Exit Function If isxdigit(Asc(Mid(s, 2, 1)))=False Then Exit Function If isxdigit(Asc(Mid(s, 3, 1)))=False Then Exit Function If isxdigit(Asc(Mid(s, 4, 1)))=False Then Exit Function If isxdigit(Asc(Mid(s, 5, 1)))=False Then Exit Function isescape = True End Function
Private Function AscX(s) AscX = CInt("&H" & Mid(s, 1, 2)) End Function
'URLDecode 完全版 '支持Server.URLEncode,UTF8 URLEncode,Escape 加密的字符串 Public Function URLDecode(s) Dim tp Dim i Dim tl Dim pp Dim a, b, c Dim h URLDecode = "" tp = Split(Replace(s, "+", " "), "%") tl = UBound(tp) If tl = -1 Then Exit Function pp = tp(0) For i = 1 To tl If isescape(tp(i)) Then pp = pp & ChrW("&H" & Mid(tp(i), 2, 4)) & Mid(tp(i), 6) ElseIf ishex(tp(i))=False Then pp = pp & tp(i) Else a = AscX(tp(i)) If isascii(a)=False And Len(tp(i))=2 Then If (i+1)>tl Then Exit For b = AscX(tp(i+1)) If (i+2)>tl Then pp = pp & Chr(a*2^8 Or b) & Mid(tp(i+1), 3) i = i + 1 Else c = AscX(tp(i+2)) If UTF8Byte(a)=3 And UTF8Test(a,b,c)=True Then h = (a And &H0F) * 2 ^12 Or (b And &H3F) * 2 ^ 6 Or (c And &H3F) If h<0 Then h = h + 65536 pp = pp & ChrW(h) & Mid(tp(i+2), 3) i = i + 2 Else pp = pp & Chr(a*2^8 Or b) & Mid(tp(i+1), 3) i = i + 1 End If End If ElseIf isascii(a)=False Then pp = pp & tp(i) Else pp = pp & Chr(a) & Mid(tp(i), 3) End If End If Next URLDecode = pp End Function
URLEncodeUTF8只写了多字节的加密,没有写ascii字节的,其实也没必要写这个,有个Server.URLEncode可以用,改成js版本的倒还有用武之地 js的版本,里面有个现成的函数 <script language=javascript> alert(encodeURI("圪圪 eglic ")) </script> 看上边有人说看不懂,我来解释下吧。梅子的第一个函数是把多字节unicode编码转成我们通常正常的编码,在JS中如果偷懒,decodeURI也可实现这个功能。不过,看到这个具体是怎么实现的,感觉好爽,回家研究去啦。复制代码 代码如下: <script language=javascript> function revertUTF8(szInput) { var x,wch,wch1,wch2,uch="",szRet=""; for (x=0; x<szInput.length; x++) { if (szInput.charAt(x)=="%") { wch =parseInt(szInput.charAt(++x) + szInput.charAt(++x),16); if (!wch) {break;} if (!(wch & 0x80)) { wch = wch; } else if (!(wch & 0x20)) { x++; wch1 = parseInt(szInput.charAt(++x) + szInput.charAt(++x),16); wch = (wch & 0x1F)<< 6; wch1 = wch1 & 0x3F; wch = wch + wch1; } else { x++; wch1 = parseInt(szInput.charAt(++x) + szInput.charAt(++x),16); x++; wch2 = parseInt(szInput.charAt(++x) + szInput.charAt(++x),16); wch = (wch & 0x0F)<< 12; wch1 = (wch1 & 0x3F)<< 6; wch2 = (wch2 & 0x3F); wch = wch + wch1 + wch2; } szRet += String.fromCharCode(wch); } else { szRet += szInput.charAt(x); } } return(szRet); } alert(revertUTF8("%E5%9C%AA%E5%9C%AA%20eglic%20 ")) alert(decodeURI("%E5%9C%AA%E5%9C%AA%20eglic%20 ")) </script>
|
|