[.NET]取得字串中的Unicode的字

  • 11550
  • 0
  • .NET
  • 2012-05-18

[.NET]取得字串中的Unicode的字

以下是利用BIG5編碼與UTF8編碼的GetByteCount來取比較,取出字串中的Unicode的字,如「堃」這個字。

C#

public static string CheckHasUnicodeWord(string vstrWord)
{
    StringBuilder strbResult = new StringBuilder();
    int i = 0;
    System.Text.Encoding encBig5 = System.Text.Encoding.GetEncoding(950);
    System.Text.Encoding encUtf8 = System.Text.Encoding.UTF8;
    int iBig5ByteCnt = 0;
    int iUtf8ByteCnt = 0;

		
    for (i = 0; i <  vstrWord.Length; i++)
    {
        iBig5ByteCnt = encBig5.GetByteCount(vstrWord.Substring(i, 1));
        iUtf8ByteCnt = encUtf8.GetByteCount(vstrWord.Substring(i, 1));
        if ((iBig5ByteCnt == 1 && iUtf8ByteCnt == 3) || (iBig5ByteCnt == 1 && iUtf8ByteCnt == 2))
        {
            strbResult.Append(vstrWord.Substring(i, 1));
        }
    }
    return strbResult.ToString();
}

		
MessageBox.Show(CheckHasUnicodeWord("這是Unicode:堃哦! 烾!")); //return 堃烾

VB.NET

Public Shared Function CheckHasUnicodeWord(ByVal vstrWord As String) As String
    Dim strbResult As New System.Text.StringBuilder
    Dim i As Integer
    Dim encBig5 As System.Text.Encoding = System.Text.Encoding.GetEncoding(950)
    Dim encUtf8 As System.Text.Encoding = System.Text.Encoding.UTF8
    Dim iBig5ByteCnt, iUtf8ByteCnt As Integer

		
    For i = 1 To Len(vstrWord)
        iBig5ByteCnt = encBig5.GetByteCount(Mid(vstrWord, i, 1))
        iUtf8ByteCnt = encUtf8.GetByteCount(Mid(vstrWord, i, 1))
        If (iBig5ByteCnt = 1 And iUtf8ByteCnt = 3) Or (iBig5ByteCnt = 1 And iUtf8ByteCnt = 2) Then
            strbResult.Append(Mid(vstrWord, i, 1))
        End If
    Next
    Return strbResult.ToString
End Function

		
MessageBox.Show(CheckHasUnicodeWord("這是Unicode:堃哦! 烾!")) 'return 堃烾

 

Hi, 

亂馬客Blog已移到了 「亂馬客​ : Re:從零開始的軟體開發生活

請大家繼續支持 ^_^