Export (0) Print
Expand All
1 out of 7 rated this helpful - Rate this topic

Encoding.UTF8 Property

Gets an encoding for the UTF-8 format.

Namespace: System.Text
Assembly: mscorlib (in mscorlib.dll)

public static Encoding UTF8 { get; }
/** @property */
public static Encoding get_UTF8 ()

public static function get UTF8 () : Encoding

Property Value

An Encoding for the UTF-8 format.

The Unicode Standard assigns a code point (a number) to each character in every supported script. A Unicode Transformation Format (UTF) is a way to encode that code point. The Unicode Standard version 3.2 uses the following UTFs:

  • UTF-8, which represents each code point as a sequence of one to four bytes.

  • UTF-16, which represents each code point as a sequence of one to two 16-bit integers.

  • UTF-32, which represents each code point as a 32-bit integer.

For more information on UTFs, see The Unicode Standard at www.unicode.org.

The following code example determines the number of bytes required to encode a character array, encodes the characters, and displays the resulting bytes.

using System;
using System.Text;

public class SamplesEncoding  {

   public static void Main()  {

      // The characters to encode:
      //    Latin Small Letter Z (U+007A)
      //    Latin Small Letter A (U+0061)
      //    Combining Breve (U+0306)
      //    Latin Small Letter AE With Acute (U+01FD)
      //    Greek Small Letter Beta (U+03B2)
      //    a high-surrogate value (U+D8FF)
      //    a low-surrogate value (U+DCFF)
      char[] myChars = new char[] { 'z', 'a', '\u0306', '\u01FD', '\u03B2', '\uD8FF', '\uDCFF' };

      // Get different encodings.
      Encoding  u7    = Encoding.UTF7;
      Encoding  u8    = Encoding.UTF8;
      Encoding  u16LE = Encoding.Unicode;
      Encoding  u16BE = Encoding.BigEndianUnicode;
      Encoding  u32   = Encoding.UTF32;

      // Encode the entire array, and print out the counts and the resulting bytes.
      PrintCountsAndBytes( myChars, u7 );
      PrintCountsAndBytes( myChars, u8 );
      PrintCountsAndBytes( myChars, u16LE );
      PrintCountsAndBytes( myChars, u16BE );
      PrintCountsAndBytes( myChars, u32 );

   }


   public static void PrintCountsAndBytes( char[] chars, Encoding enc )  {

      // Display the name of the encoding used.
      Console.Write( "{0,-30} :", enc.ToString() );

      // Display the exact byte count.
      int iBC  = enc.GetByteCount( chars );
      Console.Write( " {0,-3}", iBC );

      // Display the maximum byte count.
      int iMBC = enc.GetMaxByteCount( chars.Length );
      Console.Write( " {0,-3} :", iMBC );

      // Encode the array of chars.
      byte[] bytes = enc.GetBytes( chars );

      // Display all the encoded bytes.
      PrintHexBytes( bytes );

   }


   public static void PrintHexBytes( byte[] bytes )  {

      if (( bytes == null ) || ( bytes.Length == 0 ))
         Console.WriteLine( "<none>" );
      else  {
         for ( int i = 0; i < bytes.Length; i++ )
            Console.Write( "{0:X2} ", bytes[i] );
         Console.WriteLine();
      }

   }

}


/* 
This code produces the following output.

System.Text.UTF7Encoding       : 18  23  :7A 61 2B 41 77 59 42 2F 51 4F 79 32 50 2F 63 2F 77 2D
System.Text.UTF8Encoding       : 12  24  :7A 61 CC 86 C7 BD CE B2 F1 8F B3 BF
System.Text.UnicodeEncoding    : 14  16  :7A 00 61 00 06 03 FD 01 B2 03 FF D8 FF DC
System.Text.UnicodeEncoding    : 14  16  :00 7A 00 61 03 06 01 FD 03 B2 D8 FF DC FF
System.Text.UTF32Encoding      : 24  32  :7A 00 00 00 61 00 00 00 06 03 00 00 FD 01 00 00 B2 03 00 00 FF FC 04 00

*/


import System.*;
import System.Text.*;
import System.Byte;

public class SamplesEncoding
{
    public static void main(String[] args)
    {
        // The characters to encode:
        //    Latin Small Letter Z (U+007A)
        //    Latin Small Letter A (U+0061)
        //    Combining Breve (U+0306)
        //    Latin Small Letter AE With Acute (U+01FD)
        //    Greek Small Letter Beta (U+03B2)
        //    a high-surrogate value (U+D8FF)
        //    a low-surrogate value (U+DCFF)
        char myChars[] = new char[] {
            'z', 'a', '\u0306', '\u01FD', '\u03B2', '\uD8FF', '\uDCFF'
        };

        // Get different encodings.
        Encoding u7 = Encoding.get_UTF7();
        Encoding u8 = Encoding.get_UTF8();
        Encoding u16LE = Encoding.get_Unicode();
        Encoding u16BE = Encoding.get_BigEndianUnicode();
        Encoding u32 = Encoding.get_UTF32();

        // Encode the entire array, and print out the counts
        // and the resulting bytes.
        PrintCountsAndBytes(myChars, u7);
        PrintCountsAndBytes(myChars, u8);
        PrintCountsAndBytes(myChars, u16LE);
        PrintCountsAndBytes(myChars, u16BE);
        PrintCountsAndBytes(myChars, u32);
    } //main

    public static void PrintCountsAndBytes(char chars[], Encoding enc)
    {
        // Display the name of the encoding used.
        Console.Write("{0,-30} :", enc.toString());

        // Display the exact byte count.
        int iBC = enc.GetByteCount(chars);
        Console.Write(" {0,-3}", String.valueOf(iBC));

        // Display the maximum byte count.
        int iMBC = enc.GetMaxByteCount(chars.length);
        Console.Write(" {0,-3} :", String.valueOf(iMBC));

        // Encode the array of chars.
        ubyte bytes[] = enc.GetBytes(chars);

        // Display all the encoded bytes.
        PrintHexBytes(bytes);
    } //PrintCountsAndBytes

    public static void PrintHexBytes(ubyte bytes[])
    {
        if(bytes == null || bytes.length == 0) {
            Console.WriteLine("<none>");
        }
        else {
            for(int i = 0; i < bytes.length; i++) {
                Console.Write("{0:X2} ",
                        ((System.Byte)bytes[i]).ToString("X2"));
            }
            Console.WriteLine();
        }
    } //PrintHexBytes
} //SamplesEncoding

/* 
This code produces the following output.

System.Text.UTF7Encoding       : 18  23  :7A 61 2B 41 77 59 42 2F 51 4F 79 32 50
 2F 63 2F 77 2D
System.Text.UTF8Encoding       : 12  24  :7A 61 CC 86 C7 BD CE B2 F1 8F B3 BF
System.Text.UnicodeEncoding    : 14  16  :7A 00 61 00 06 03 FD 01 B2 03 FF D8 FF
 DC
System.Text.UnicodeEncoding    : 14  16  :00 7A 00 61 03 06 01 FD 03 B2 D8 FF DC
 FF
System.Text.UTF32Encoding      : 24  32  :7A 00 00 00 61 00 00 00 06 03 00 00 FD
 01 00 00 B2 03 00 00 FF FC 04 00

*/


Windows 98, Windows 2000 SP4, Windows CE, Windows Millennium Edition, Windows Mobile for Pocket PC, Windows Mobile for Smartphone, Windows Server 2003, Windows XP Media Center Edition, Windows XP Professional x64 Edition, Windows XP SP2, Windows XP Starter Edition

The .NET Framework does not support all versions of every platform. For a list of the supported versions, see System Requirements.

.NET Framework

Supported in: 2.0, 1.1, 1.0

.NET Compact Framework

Supported in: 2.0, 1.0
Did you find this helpful?
(1500 characters remaining)
Thank you for your feedback

Community Additions

ADD
Show:
© 2014 Microsoft. All rights reserved.