Encoding.UTF8 Property

Microsoft Silverlight will reach end of support after October 2021. Learn more.

Updated: October 2010

Gets an encoding for the UTF-8 format.

Namespace:  System.Text
Assembly:  mscorlib (in mscorlib.dll)

Syntax

'Declaration
Public Shared ReadOnly Property UTF8 As Encoding
public static Encoding UTF8 { get; }

Property Value

Type: System.Text.Encoding
An encoding for the UTF-8 format.

Remarks

This property returns a UTF8Encoding object that encodes Unicode characters into a sequence of one to four bytes per character, and that decodes a UTF-8-encoded byte array to Unicode characters. For information about the character encodings supported by the .NET Framework and a discussion of which Unicode encoding to use, see Understanding Encodings.

The UTF8Encoding object that is returned by this property may not have the appropriate behavior for your application. It uses replacement fallback to replace each string that it cannot encode and each byte that it cannot decode with a question mark ("?") character. Instead, you can call the UTF8Encoding.UTF8Encoding(Boolean, Boolean) constructor to instantiate a UTF8Encoding object whose fallback is either an EncoderFallbackException or a DecoderFallbackException, as the following example illustrates.

Imports System.Text

Module Example
   Public Sub Demo(ByVal outputBlock As System.Windows.Controls.TextBlock)
      Dim enc As Encoding = New UTF8Encoding(True, True)
      Dim value As String = String.Format("{0} {1}{2} {3}", 
                            ChrW(&h00C4), ChrW(&hD802), ChrW(&h0033), ChrW(&h00AE))

      Try
         Dim bytes() As Byte = enc.GetBytes(value)
         For Each byt As Byte In bytes
            outputBlock.Text &= String.Format("{0:X2} ", byt)
         Next
         outputBlock.Text &= vbCrLf
         Dim value2 As String = enc.GetString(bytes, 0, bytes.Length)
         outputBlock.Text &= value2 & vbCrLf
      Catch e As EncoderFallbackException
         outputBlock.Text += String.Format("Unable to encode {0} at index {1}",  
                           If(e.IsUnknownSurrogate(), 
                              String.Format("U+{0:X4} U+{1:X4}", 
                                            Convert.ToUInt16(e.CharUnknownHigh),
                                            Convert.ToUInt16(e.CharUnknownLow)),
                              String.Format("U+{0:X4}", 
                                            Convert.ToUInt16(e.CharUnknown))),
                           e.Index) & vbCrLf
      End Try
   End Sub
End Module
' The example displays the following output:
'       Unable to encode U+D802 at index 2
using System;
using System.Text;

public class Example
{
   public static void Demo(System.Windows.Controls.TextBlock outputBlock)
   {
      Encoding enc = new UTF8Encoding(true, true);
      string value = "\u00C4 \uD802\u0033 \u00AE";

      try
      {
         byte[] bytes = enc.GetBytes(value);
         foreach (var byt in bytes)
            outputBlock.Text += String.Format("{0:X2} ", byt);
         outputBlock.Text += "\n";

         string value2 = enc.GetString(bytes, 0, bytes.Length);
         outputBlock.Text += value2 + "\n";
      }
      catch (EncoderFallbackException e)
      {
         outputBlock.Text += String.Format("Unable to encode {0} at index {1}",
                           e.IsUnknownSurrogate() ?
                              String.Format("U+{0:X4} U+{1:X4}",
                                            Convert.ToUInt16(e.CharUnknownHigh),
                                            Convert.ToUInt16(e.CharUnknownLow)) :
                              String.Format("U+{0:X4}",
                                            Convert.ToUInt16(e.CharUnknown)),
                           e.Index) + "\n";
      }
   }
}
// The example displays the following output:
//        Unable to encode U+D802 at index 2

Examples

The following code example determines the number of bytes required to encode a character array, encodes the characters, and displays the resulting bytes.

Imports System.Text

Public Class Example

   Private Shared outputBlock As System.Windows.Controls.TextBlock

   Public Shared Sub Demo(ByVal outBlock As System.Windows.Controls.TextBlock)

      outputBlock = outBlock   

      ' The characters to encode:
      '    Latin Small Letter Z (U+007A)
      '    Latin Small Letter A (U+0061)
      '    Combining Breve (U+0306)
      '    Latin Small Letter AE With Acute (U+01FD)
      '    Greek Small Letter Beta (U+03B2)
      '    a high-surrogate value (U+D8FF)
      '    a low-surrogate value (U+DCFF)
      Dim myChars() As Char = {"z"c, "a"c, ChrW(&H306), ChrW(&H1FD), ChrW(&H3B2), ChrW(&HD8FF), ChrW(&HDCFF)}

      ' Get different encodings.
      Dim u8 As Encoding = Encoding.UTF8
      Dim u16LE As Encoding = Encoding.Unicode
      Dim u16BE As Encoding = Encoding.BigEndianUnicode

      ' Encode the entire array, and print out the counts and the resulting bytes.
      PrintCountsAndBytes(myChars, u8)
      PrintCountsAndBytes(myChars, u16LE)
      PrintCountsAndBytes(myChars, u16BE)
   End Sub 

   Public Shared Sub PrintCountsAndBytes(ByVal chars() As Char, ByVal enc As Encoding)
      ' Display the name of the encoding used.
      outputBlock.Text += String.Format("{0,-30} :", enc.ToString())

      ' Display the exact byte count.
      Dim iBC As Integer = enc.GetByteCount(chars)
      outputBlock.Text += String.Format(" {0,-3}", iBC)

      ' Display the maximum byte count.
      Dim iMBC As Integer = enc.GetMaxByteCount(chars.Length)
      outputBlock.Text += String.Format(" {0,-3} :", iMBC)

      ' Encode the array of chars.
      Dim bytes As Byte() = enc.GetBytes(chars)

      ' Display all the encoded bytes.
      PrintHexBytes(bytes)
   End Sub 

   Public Shared Sub PrintHexBytes(ByVal bytes() As Byte)
      If bytes Is Nothing OrElse bytes.Length = 0 Then
         outputBlock.Text &= "<none>" & vbCrLf
      Else
         Dim i As Integer
         For i = 0 To bytes.Length - 1
            outputBlock.Text += String.Format("{0:X2} ", bytes(i))
         Next i
         outputBlock.Text &= vbCrLf
      End If
   End Sub 
End Class 
' This example produces the following output.
'    System.Text.UTF8Encoding       : 12  24  :7A 61 CC 86 C7 BD CE B2 F1 8F B3 BF
'    System.Text.UnicodeEncoding    : 14  16  :7A 00 61 00 06 03 FD 01 B2 03 FF D8 FF DC
'    System.Text.UnicodeEncoding    : 14  16  :00 7A 00 61 03 06 01 FD 03 B2 D8 FF DC FF
using System;
using System.Text;

public class Example
{
   private static System.Windows.Controls.TextBlock outputBlock;

   public static void Demo(System.Windows.Controls.TextBlock outBlock)
   {
      outputBlock = outBlock;

      // The characters to encode:
      //    Latin Small Letter Z (U+007A)
      //    Latin Small Letter A (U+0061)
      //    Combining Breve (U+0306)
      //    Latin Small Letter AE With Acute (U+01FD)
      //    Greek Small Letter Beta (U+03B2)
      //    a high-surrogate value (U+D8FF)
      //    a low-surrogate value (U+DCFF)
      char[] myChars = new char[] { 'z', 'a', '\u0306', '\u01FD', '\u03B2', '\uD8FF', '\uDCFF' };

      // Get different encodings.
      Encoding u8 = Encoding.UTF8;
      Encoding u16LE = Encoding.Unicode;
      Encoding u16BE = Encoding.BigEndianUnicode;

      // Encode the entire array, and print out the counts and the resulting bytes.
      PrintCountsAndBytes(myChars, u8);
      PrintCountsAndBytes(myChars, u16LE);
      PrintCountsAndBytes(myChars, u16BE);
   }

   public static void PrintCountsAndBytes(char[] chars, Encoding enc)
   {
      // Display the name of the encoding used.
      outputBlock.Text += String.Format("{0,-30} :", enc.ToString());

      // Display the exact byte count.
      int iBC = enc.GetByteCount(chars);
      outputBlock.Text += String.Format(" {0,-3}", iBC);

      // Display the maximum byte count.
      int iMBC = enc.GetMaxByteCount(chars.Length);
      outputBlock.Text += String.Format(" {0,-3} :", iMBC);

      // Encode the array of chars.
      byte[] bytes = enc.GetBytes(chars);

      // Display all the encoded bytes.
      PrintHexBytes(bytes);
   }

   public static void PrintHexBytes(byte[] bytes)
   {
      if ((bytes == null) || (bytes.Length == 0))
         outputBlock.Text += "<none>" + "\n";
      else
      {
         for (int i = 0; i < bytes.Length; i++)
            outputBlock.Text += String.Format("{0:X2} ", bytes[i]);
         outputBlock.Text += "\n";
      }
   }
}
/* 
This code produces the following output.
   System.Text.UTF8Encoding       : 12  24  :7A 61 CC 86 C7 BD CE B2 F1 8F B3 BF
   System.Text.UnicodeEncoding    : 14  16  :7A 00 61 00 06 03 FD 01 B2 03 FF D8 FF DC
   System.Text.UnicodeEncoding    : 14  16  :00 7A 00 61 03 06 01 FD 03 B2 D8 FF DC FF
*/

Version Information

Silverlight

Supported in: 5, 4, 3

Silverlight for Windows Phone

Supported in: Windows Phone OS 7.1, Windows Phone OS 7.0

XNA Framework

Supported in: Xbox 360, Windows Phone OS 7.0

Platforms

For a list of the operating systems and browsers that are supported by Silverlight, see Supported Operating Systems and Browsers.

Change History

Date

History

Reason

October 2010

Noted replacement fallback behavior.

Customer feedback.