From: 011netservice@gmail.com Date: 2024-03-04, 週1 Subject: CodeHelper\cs\KeyWord\Encodings.txt #### GetByteCount, GetMaxByteCount, using System; using System.Text; public class SamplesEncoding { public static void Main() { // The characters to encode: // Latin Small Letter Z (U+007A) // Latin Small Letter A (U+0061) // Combining Breve (U+0306) // Latin Small Letter AE With Acute (U+01FD) // Greek Small Letter Beta (U+03B2) // a high-surrogate value (U+D8FF) // a low-surrogate value (U+DCFF) String myStr = "za\u0306\u01FD\u03B2\uD8FF\uDCFF"; // Get different encodings. Encoding u7 = Encoding.UTF7; Encoding u8 = Encoding.UTF8; Encoding u16LE = Encoding.Unicode; Encoding u16BE = Encoding.BigEndianUnicode; Encoding u32 = Encoding.UTF32; // Encode the entire string, and print out the counts and the resulting bytes. Console.WriteLine( "Encoding the entire string:" ); PrintCountsAndBytes( myStr, u7 ); PrintCountsAndBytes( myStr, u8 ); PrintCountsAndBytes( myStr, u16LE ); PrintCountsAndBytes( myStr, u16BE ); PrintCountsAndBytes( myStr, u32 ); Console.WriteLine(); // Encode three characters starting at index 4, and print out the counts and the resulting bytes. Console.WriteLine( "Encoding the characters from index 4 through 6:" ); PrintCountsAndBytes( myStr, 4, 3, u7 ); PrintCountsAndBytes( myStr, 4, 3, u8 ); PrintCountsAndBytes( myStr, 4, 3, u16LE ); PrintCountsAndBytes( myStr, 4, 3, u16BE ); PrintCountsAndBytes( myStr, 4, 3, u32 ); } public static void PrintCountsAndBytes( String s, Encoding enc ) { // Display the name of the encoding used. Console.Write( "{0,-30} :", enc.ToString() ); // Display the exact byte count. int iBC = enc.GetByteCount( s ); Console.Write( " {0,-3}", iBC ); // Display the maximum byte count. int iMBC = enc.GetMaxByteCount( s.Length ); Console.Write( " {0,-3} :", iMBC ); // Encode the entire string. byte[] bytes = enc.GetBytes( s ); // Display all the encoded bytes. PrintHexBytes( bytes ); } public static void PrintCountsAndBytes( String s, int index, int count, Encoding enc ) { // Display the name of the encoding used. Console.Write( "{0,-30} :", enc.ToString() ); // Display the exact byte count. int iBC = enc.GetByteCount( s.ToCharArray(), index, count ); Console.Write( " {0,-3}", iBC ); // Display the maximum byte count. int iMBC = enc.GetMaxByteCount( count ); Console.Write( " {0,-3} :", iMBC ); // Encode a range of characters in the string. byte[] bytes = new byte[iBC]; enc.GetBytes( s, index, count, bytes, bytes.GetLowerBound(0) ); // Display all the encoded bytes. PrintHexBytes( bytes ); } public static void PrintHexBytes( byte[] bytes ) { if (( bytes == null ) || ( bytes.Length == 0 )) { Console.WriteLine( "" ); } else { for ( int i = 0; i < bytes.Length; i++ ) Console.Write( "{0:X2} ", bytes[i] ); Console.WriteLine(); } } } /* String myStr = "za\u0306\u01FD\u03B2\uD8FF\uDCFF"; This code produces the following output. Encoding the entire string: System.Text.UTF7Encoding : 18 23 :7A 61 2B 41 77 59 42 2F 51 4F 79 32 50 2F 63 2F 77 2D System.Text.UTF8Encoding : 12 24 :7A 61 CC 86 C7 BD CE B2 F1 8F B3 BF System.Text.UnicodeEncoding : 14 16 :7A 00 61 00 06 03 FD 01 B2 03 FF D8 FF DC System.Text.UnicodeEncoding : 14 16 :00 7A 00 61 03 06 01 FD 03 B2 D8 FF DC FF System.Text.UTF32Encoding : 24 32 :7A 00 00 00 61 00 00 00 06 03 00 00 FD 01 00 00 B2 03 00 00 FF FC 04 00 Encoding the characters from index 4 through 6: System.Text.UTF7Encoding : 10 11 :2B 41 37 4C 59 2F 39 7A 2F 2D System.Text.UTF8Encoding : 6 12 :CE B2 F1 8F B3 BF System.Text.UnicodeEncoding : 6 8 :B2 03 FF D8 FF DC System.Text.UnicodeEncoding : 6 8 :03 B2 D8 FF DC FF System.Text.UTF32Encoding : 8 16 :B2 03 00 00 FF FC 04 00 */ #### Unicode The Unicode Standard assigns a code point (a number) to each character in every supported script. A Unicode Transformation Format (UTF) is a way to encode that code point. The Unicode Standard uses the following UTFs: UTF-8, (1 到 4 byte) which represents each code point as a sequence of one to four bytes. UTF-16,(2 或 4 byte, 即 1 到 2 個 16-bit integer) which represents each code point as a sequence of one to two 16-bit integers. UTF-32,(固定 4 byte) which represents each code point as a 32-bit integer. For more information about the UTFs and other encodings supported by System.Text, see Character Encoding in the .NET Framework. The UnicodeEncoding class represents a UTF-16 encoding. The encoder can use either big endian byte order (most significant byte first) or little endian byte order (least significant byte first). For example, the Latin Capital Letter A (code point U+0041) is serialized as follows (in hexadecimal): Big endian byte order: 00 00 00 41 Little endian byte order: 41 00 00 00 #### CodePages 常用 CodePage, CodeName, Description, ------ ------------ ---------------------------- 936, gb2312, Chinese Simplified (GB2312) 950, big5, Chinese Traditional (Big5) 1200, utf-16, Unicode 1201, unicodeFFFE, Unicode (Big-Endian) 20127, us-ascii, US-ASCII 65001, utf-8, Unicode (UTF-8) ------ ------------ ---------------------------- The example produces the following output when run on .NET Core: Info.CodePage Info.Name Info.DisplayName 1200 utf-16 Unicode 1201 utf-16BE Unicode (Big-Endian) 12000 utf-32 Unicode (UTF-32) 12001 utf-32BE Unicode (UTF-32 Big-Endian) 20127 us-ascii US-ASCII 28591 iso-8859-1 Western European (ISO) 65000 utf-7 Unicode (UTF-7) 65001 utf-8 Unicode (UTF-8) The example produces the following output when run on .NET Framework: Info.CodePage Info.Name Info.DisplayName 37 IBM037 IBM EBCDIC (US-Canada) 437 IBM437 OEM United States 500 IBM500 IBM EBCDIC (International) 708 ASMO-708 Arabic (ASMO 708) 720 DOS-720 Arabic (DOS) 737 ibm737 Greek (DOS) 775 ibm775 Baltic (DOS) 850 ibm850 Western European (DOS) 852 ibm852 Central European (DOS) 855 IBM855 OEM Cyrillic 857 ibm857 Turkish (DOS) 858 IBM00858 OEM Multilingual Latin I 860 IBM860 Portuguese (DOS) 861 ibm861 Icelandic (DOS) 862 DOS-862 Hebrew (DOS) 863 IBM863 French Canadian (DOS) 864 IBM864 Arabic (864) 865 IBM865 Nordic (DOS) 866 cp866 Cyrillic (DOS) 869 ibm869 Greek, Modern (DOS) 870 IBM870 IBM EBCDIC (Multilingual Latin-2) 874 windows-874 Thai (Windows) 875 cp875 IBM EBCDIC (Greek Modern) 932 shift_jis Japanese (Shift-JIS) 936 gb2312 Chinese Simplified (GB2312) 949 ks_c_5601-1987 Korean 950 big5 Chinese Traditional (Big5) 1026 IBM1026 IBM EBCDIC (Turkish Latin-5) 1047 IBM01047 IBM Latin-1 1140 IBM01140 IBM EBCDIC (US-Canada-Euro) 1141 IBM01141 IBM EBCDIC (Germany-Euro) 1142 IBM01142 IBM EBCDIC (Denmark-Norway-Euro) 1143 IBM01143 IBM EBCDIC (Finland-Sweden-Euro) 1144 IBM01144 IBM EBCDIC (Italy-Euro) 1145 IBM01145 IBM EBCDIC (Spain-Euro) 1146 IBM01146 IBM EBCDIC (UK-Euro) 1147 IBM01147 IBM EBCDIC (France-Euro) 1148 IBM01148 IBM EBCDIC (International-Euro) 1149 IBM01149 IBM EBCDIC (Icelandic-Euro) 1200 utf-16 Unicode 1201 utf-16BE Unicode (Big-Endian) 1250 windows-1250 Central European (Windows) 1251 windows-1251 Cyrillic (Windows) 1252 Windows-1252 Western European (Windows) 1253 windows-1253 Greek (Windows) 1254 windows-1254 Turkish (Windows) 1255 windows-1255 Hebrew (Windows) 1256 windows-1256 Arabic (Windows) 1257 windows-1257 Baltic (Windows) 1258 windows-1258 Vietnamese (Windows) 1361 Johab Korean (Johab) 10000 macintosh Western European (Mac) 10001 x-mac-japanese Japanese (Mac) 10002 x-mac-chinesetrad Chinese Traditional (Mac) 10003 x-mac-korean Korean (Mac) 10004 x-mac-arabic Arabic (Mac) 10005 x-mac-hebrew Hebrew (Mac) 10006 x-mac-greek Greek (Mac) 10007 x-mac-cyrillic Cyrillic (Mac) 10008 x-mac-chinesesimp Chinese Simplified (Mac) 10010 x-mac-romanian Romanian (Mac) 10017 x-mac-ukrainian Ukrainian (Mac) 10021 x-mac-thai Thai (Mac) 10029 x-mac-ce Central European (Mac) 10079 x-mac-icelandic Icelandic (Mac) 10081 x-mac-turkish Turkish (Mac) 10082 x-mac-croatian Croatian (Mac) 12000 utf-32 Unicode (UTF-32) 12001 utf-32BE Unicode (UTF-32 Big-Endian) 20000 x-Chinese-CNS Chinese Traditional (CNS) 20001 x-cp20001 TCA Taiwan 20002 x-Chinese-Eten Chinese Traditional (Eten) 20003 x-cp20003 IBM5550 Taiwan 20004 x-cp20004 TeleText Taiwan 20005 x-cp20005 Wang Taiwan 20105 x-IA5 Western European (IA5) 20106 x-IA5-German German (IA5) 20107 x-IA5-Swedish Swedish (IA5) 20108 x-IA5-Norwegian Norwegian (IA5) 20127 us-ascii US-ASCII 20261 x-cp20261 T.61 20269 x-cp20269 ISO-6937 20273 IBM273 IBM EBCDIC (Germany) 20277 IBM277 IBM EBCDIC (Denmark-Norway) 20278 IBM278 IBM EBCDIC (Finland-Sweden) 20280 IBM280 IBM EBCDIC (Italy) 20284 IBM284 IBM EBCDIC (Spain) 20285 IBM285 IBM EBCDIC (UK) 20290 IBM290 IBM EBCDIC (Japanese katakana) 20297 IBM297 IBM EBCDIC (France) 20420 IBM420 IBM EBCDIC (Arabic) 20423 IBM423 IBM EBCDIC (Greek) 20424 IBM424 IBM EBCDIC (Hebrew) 20833 x-EBCDIC-KoreanExtended IBM EBCDIC (Korean Extended) 20838 IBM-Thai IBM EBCDIC (Thai) 20866 koi8-r Cyrillic (KOI8-R) 20871 IBM871 IBM EBCDIC (Icelandic) 20880 IBM880 IBM EBCDIC (Cyrillic Russian) 20905 IBM905 IBM EBCDIC (Turkish) 20924 IBM00924 IBM Latin-1 20932 EUC-JP Japanese (JIS 0208-1990 and 0212-1990) 20936 x-cp20936 Chinese Simplified (GB2312-80) 20949 x-cp20949 Korean Wansung 21025 cp1025 IBM EBCDIC (Cyrillic Serbian-Bulgarian) 21866 koi8-u Cyrillic (KOI8-U) 28591 iso-8859-1 Western European (ISO) 28592 iso-8859-2 Central European (ISO) 28593 iso-8859-3 Latin 3 (ISO) 28594 iso-8859-4 Baltic (ISO) 28595 iso-8859-5 Cyrillic (ISO) 28596 iso-8859-6 Arabic (ISO) 28597 iso-8859-7 Greek (ISO) 28598 iso-8859-8 Hebrew (ISO-Visual) 28599 iso-8859-9 Turkish (ISO) 28603 iso-8859-13 Estonian (ISO) 28605 iso-8859-15 Latin 9 (ISO) 29001 x-Europa Europa 38598 iso-8859-8-i Hebrew (ISO-Logical) 50220 iso-2022-jp Japanese (JIS) 50221 csISO2022JP Japanese (JIS-Allow 1 byte Kana) 50222 iso-2022-jp Japanese (JIS-Allow 1 byte Kana - SO/SI) 50225 iso-2022-kr Korean (ISO) 50227 x-cp50227 Chinese Simplified (ISO-2022) 51932 euc-jp Japanese (EUC) 51936 EUC-CN Chinese Simplified (EUC) 51949 euc-kr Korean (EUC) 52936 hz-gb-2312 Chinese Simplified (HZ) 54936 GB18030 Chinese Simplified (GB18030) 57002 x-iscii-de ISCII Devanagari 57003 x-iscii-be ISCII Bengali 57004 x-iscii-ta ISCII Tamil 57005 x-iscii-te ISCII Telugu 57006 x-iscii-as ISCII Assamese 57007 x-iscii-or ISCII Oriya 57008 x-iscii-ka ISCII Kannada 57009 x-iscii-ma ISCII Malayalam 57010 x-iscii-gu ISCII Gujarati 57011 x-iscii-pa ISCII Punjabi 65000 utf-7 Unicode (UTF-7) 65001 utf-8 Unicode (UTF-8) #### Encoding.ASCII Gets an encoding for the ASCII (7-bit) character set. public static System.Text.Encoding ASCII { get; } ASCII 限制為最低的 128 個 Unicode 字元, 從 U+0000 到 U+007F。 以下範例若無法轉換為 ASCII 的字元, 會被替換為 "?" The following example demonstrates the effect of the ASCII encoding on characters that are outside the ASCII range. using System; using System.Text; class EncodingExample { public static void Main() { // Create an ASCII encoding. Encoding ascii = Encoding.ASCII; // A Unicode string with two characters outside the ASCII code range. String unicodeString = "This unicode string contains two characters " + "with codes outside the ASCII code range, " + "Pi (\u03a0) and Sigma (\u03a3)."; Console.WriteLine("Original string:"); Console.WriteLine(unicodeString); // Save the positions of the special characters for later reference. int indexOfPi = unicodeString.IndexOf('\u03a0'); int indexOfSigma = unicodeString.IndexOf('\u03a3'); // Encode the string. Byte[] encodedBytes = ascii.GetBytes(unicodeString); Console.WriteLine(); Console.WriteLine("Encoded bytes:"); foreach (Byte b in encodedBytes) { Console.Write("[{0}]", b); } Console.WriteLine(); // Notice that the special characters have been replaced with // the value 63, which is the ASCII character code for '?'. Console.WriteLine(); Console.WriteLine( "Value at position of Pi character: {0}", encodedBytes[indexOfPi] ); Console.WriteLine( "Value at position of Sigma character: {0}", encodedBytes[indexOfSigma] ); // Decode bytes back to a string. // Notice missing the Pi and Sigma characters. String decodedString = ascii.GetString(encodedBytes); Console.WriteLine(); Console.WriteLine("Decoded bytes:"); Console.WriteLine(decodedString); } } /* This code produces the following output. Original string: This unicode string contains two characters with codes outside the ASCII code range, Pi (Π) and Sigma (Σ). Encoded bytes: [84][104][105][115][32][117][110][105][99][111][100][101][32][115][116][114][105][110][103][32][99][111][110][116][97][105][110][115][32][116][119][111][32][99][104][97][114][97][99][116][101][114][115][32][119][105][116][104][32][99][111][100][101][115][32][111][117][116][115][105][100][101][32][116][104][101][32][65][83][67][73][73][32][99][111][100][101][32][114][97][110][103][101][44][32][80][105][32][40][63][41][32][97][110][100][32][83][105][103][109][97][32][40][63][41][46] Value at position of Pi character: 63 Value at position of Sigma character: 63 Decoded bytes: This unicode string contains two characters with codes outside the ASCII code range, Pi (?) and Sigma (?). */ Remarks ASCII characters are limited to the lowest 128 Unicode characters, from U+0000 to U+007F. ASCII 限制為最低的 128 個 Unicode 字元, 從 U+0000 到 U+007F。 When selecting the ASCII encoding for your app, consider the following: 1. The ASCII encoding is usually appropriate for protocols that require ASCII. 2. If you require 8-bit encoding (which is sometimes incorrectly referred to as "ASCII"), the UTF-8 encoding is recommended over the ASCII encoding. For the characters 0-7F, the results are identical, but use of UTF-8 avoids data loss by allowing representation of all Unicode characters that are representable. Note that the ASCII encoding has an 8th bit ambiguity that can allow malicious use, but the UTF-8 encoding removes ambiguity about the 8th bit. 3. Prior to .NET Framework version 2.0, .NET Framework allowed spoofing by ignoring the 8th bit. Beginning with .NET Framework 2.0, non-ASCII code points fall back during decoding. The ASCIIEncoding object that is returned by this property might not have the appropriate behavior for your app. It uses replacement fallback to replace each string that it cannot encode and each byte that it cannot decode with a question mark ("?") character. Instead, you can call the GetEncoding(String, EncoderFallback, DecoderFallback) method to instantiate an ASCIIEncoding object whose fallback is either an EncoderFallbackException or a DecoderFallbackException, as the following example illustrates. 以下 ASCII 程式碼轉換, 若無法轉換時, 則會拋出 exception, 不會另行轉換為"?" using System; using System.Text; public class Example { public static void Main() { Encoding enc = Encoding.GetEncoding("us-ascii", new EncoderExceptionFallback(), new DecoderExceptionFallback()); string value = "\u00C4 \u00F6 \u00AE"; try { byte[] bytes= enc.GetBytes(value); foreach (var byt in bytes) Console.Write("{0:X2} ", byt); Console.WriteLine(); string value2 = enc.GetString(bytes); Console.WriteLine(value2); } catch (EncoderFallbackException e) { Console.WriteLine("Unable to encode {0} at index {1}", e.IsUnknownSurrogate() ? String.Format("U+{0:X4} U+{1:X4}", Convert.ToUInt16(e.CharUnknownHigh), Convert.ToUInt16(e.CharUnknownLow)) : String.Format("U+{0:X4}", Convert.ToUInt16(e.CharUnknown)), e.Index); } } } // The example displays the following output: // Unable to encode U+00C4 at index 0 #### Example: Decode Unicode https://learn.microsoft.com/en-us/dotnet/api/system.text.decoder.getchars?view=net-8.0 The following example demonstrates how to decode a range of elements from a byte array and store them in a Unicode character array. The GetCharCount method is used to calculate the number of characters needed to store the decoded elements in the array bytes. The GetChars method decodes the specified elements in the byte array and stores them in the new character array. using System; using System.Text; class UnicodeEncodingExample { public static void Main() { Char[] chars; // 備忘: Unicode 跟 c 的 string 結尾為 0 會衝突: Byte[] bytes = new Byte[] { 85, 0, 110, 0, 105, 0, 99, 0, 111, 0, 100, 0, 101, 0}; Decoder uniDecoder = Encoding.Unicode.GetDecoder(); int charCount = uniDecoder.GetCharCount(bytes, 0, bytes.Length); // 取得 char 個數. chars = new Char[charCount]; // 宣告剛好符合大小的 char[] buffer. int charsDecodedCount = uniDecoder.GetChars(bytes, 0, bytes.Length, chars, 0); // copy bytes to chars. Console.WriteLine( "{0} characters used to decode bytes.", charsDecodedCount ); Console.Write("Decoded chars: "); foreach (Char c in chars) { Console.Write("[{0}]", c); } Console.WriteLine(); } } This code example produces the following output. 7 characters used to decode bytes. Decoded chars: [U][n][i][c][o][d][e] #### Example: converts a string from one encoding to another https://learn.microsoft.com/en-us/dotnet/api/system.text.encoding.getcharcount?view=net-8.0#system-text-encoding-getcharcount(system-byte()-system-int32-system-int32) The following example converts a string from one encoding to another. using System; using System.Text; class Example { static void Main() { string unicodeString = "This string contains the unicode character Pi (\u03a0)"; // Create two different encodings. Encoding ascii = Encoding.ASCII; Encoding unicode = Encoding.Unicode; // Convert the string into a byte array. byte[] unicodeBytes = unicode.GetBytes(unicodeString); // Perform the conversion from one encoding to the other. byte[] asciiBytes = Encoding.Convert(unicode, ascii, unicodeBytes); // Convert the new byte[] into a char[] and then into a string. char[] asciiChars = new char[ascii.GetCharCount(asciiBytes, 0, asciiBytes.Length)]; ascii.GetChars(asciiBytes, 0, asciiBytes.Length, asciiChars, 0); string asciiString = new string(asciiChars); // Display the strings created before and after the conversion. Console.WriteLine("Original string: {0}", unicodeString); Console.WriteLine("Ascii converted string: {0}", asciiString); } } // The example displays the following output: // Original string: This string contains the unicode character Pi (Π) // Ascii converted string: This string contains the unicode character Pi (?) #### Example: encodes a string and decodes a range of bytes. https://learn.microsoft.com/en-us/dotnet/api/system.text.encoding.getcharcount?view=net-8.0#system-text-encoding-getcharcount(system-byte()-system-int32-system-int32) The following example encodes a string into an array of bytes, and then decodes a range of the bytes into an array of characters. using System; using System.Text; public class SamplesEncoding { public static void Main() { // Create two instances of UTF32Encoding: one with little-endian byte order and one with big-endian byte order. Encoding u32LE = Encoding.GetEncoding( "utf-32" ); Encoding u32BE = Encoding.GetEncoding( "utf-32BE" ); // Use a string containing the following characters: // Latin Small Letter Z (U+007A) // Latin Small Letter A (U+0061) // Combining Breve (U+0306) // Latin Small Letter AE With Acute (U+01FD) // Greek Small Letter Beta (U+03B2) String myStr = "za\u0306\u01FD\u03B2"; // Encode the string using the big-endian byte order. byte[] barrBE = new byte[u32BE.GetByteCount( myStr )]; u32BE.GetBytes( myStr, 0, myStr.Length, barrBE, 0 ); // Encode the string using the little-endian byte order. byte[] barrLE = new byte[u32LE.GetByteCount( myStr )]; u32LE.GetBytes( myStr, 0, myStr.Length, barrLE, 0 ); // Get the char counts, decode eight bytes starting at index 0, // and print out the counts and the resulting bytes. Console.Write( "BE array with BE encoding : " ); PrintCountsAndChars( barrBE, 0, 8, u32BE ); Console.Write( "LE array with LE encoding : " ); PrintCountsAndChars( barrLE, 0, 8, u32LE ); } public static void PrintCountsAndChars( byte[] bytes, int index, int count, Encoding enc ) { // Display the name of the encoding used. Console.Write( "{0,-25} :", enc.ToString() ); // Display the exact character count. int iCC = enc.GetCharCount( bytes, index, count ); Console.Write( " {0,-3}", iCC ); // Display the maximum character count. int iMCC = enc.GetMaxCharCount( count ); Console.Write( " {0,-3} :", iMCC ); // Decode the bytes and display the characters. char[] chars = enc.GetChars( bytes, index, count ); // The following is an alternative way to decode the bytes: // char[] chars = new char[iCC]; // enc.GetChars( bytes, index, count, chars, 0 ); Console.WriteLine( chars ); } } This code produces the following output. The question marks take the place of characters that cannot be displayed at the console. BE array with BE encoding : System.Text.UTF32Encoding : 2 6 :za LE array with LE encoding : System.Text.UTF32Encoding : 2 6 :za #### 以下舊資料確認後移到上面 ---------- using System; using System.Text; public class SamplesEncoding { public static void Main() { // Print the header. Console.Write( "Name " ); Console.Write( "CodePage " ); Console.Write( "BodyName " ); Console.Write( "HeaderName " ); Console.Write( "WebName " ); Console.WriteLine( "Encoding.EncodingName" ); // For every encoding, compare the name properties with EncodingInfo.Name. // Display only the encodings that have one or more different names. foreach( EncodingInfo ei in Encoding.GetEncodings() ) { Encoding e = ei.GetEncoding(); if (( ei.Name != e.BodyName ) || ( ei.Name != e.HeaderName ) || ( ei.Name != e.WebName )) { Console.Write( "{0,-18} ", ei.Name ); Console.Write( "{0,-9} ", e.CodePage ); Console.Write( "{0,-18} ", e.BodyName ); Console.Write( "{0,-18} ", e.HeaderName ); Console.Write( "{0,-18} ", e.WebName ); Console.WriteLine( "{0} ", e.EncodingName ); } } } } /* This code produces the following output. Name CodePage BodyName HeaderName WebName Encoding.EncodingName shift_jis 932 iso-2022-jp iso-2022-jp shift_jis Japanese (Shift-JIS) windows-1250 1250 iso-8859-2 windows-1250 windows-1250 Central European (Windows) windows-1251 1251 koi8-r windows-1251 windows-1251 Cyrillic (Windows) Windows-1252 1252 iso-8859-1 Windows-1252 Windows-1252 Western European (Windows) windows-1253 1253 iso-8859-7 windows-1253 windows-1253 Greek (Windows) windows-1254 1254 iso-8859-9 windows-1254 windows-1254 Turkish (Windows) csISO2022JP 50221 iso-2022-jp iso-2022-jp csISO2022JP Japanese (JIS-Allow 1 byte Kana) iso-2022-kr 50225 iso-2022-kr euc-kr iso-2022-kr Korean (ISO) */ ---------- using System; using System.Text; public class SamplesEncoding { public static void Main() { // Print the header. Console.Write( "CodePage identifier and name " ); Console.Write( "BrDisp BrSave " ); Console.Write( "MNDisp MNSave " ); Console.WriteLine( "1-Byte ReadOnly " ); // For every encoding, get the property values. foreach( EncodingInfo ei in Encoding.GetEncodings() ) { Encoding e = ei.GetEncoding(); Console.Write( "{0,-6} {1,-25} ", ei.CodePage, ei.Name ); Console.Write( "{0,-8} {1,-8} ", e.IsBrowserDisplay, e.IsBrowserSave ); Console.Write( "{0,-8} {1,-8} ", e.IsMailNewsDisplay, e.IsMailNewsSave ); Console.WriteLine( "{0,-8} {1,-8} ", e.IsSingleByte, e.IsReadOnly ); } } } /* This code produces the following output. CodePage identifier and name BrDisp BrSave MNDisp MNSave 1-Byte ReadOnly 37 IBM037 False False False False True True 437 IBM437 False False False False True True 500 IBM500 False False False False True True 708 ASMO-708 True True False False True True 720 DOS-720 True True False False True True 737 ibm737 False False False False True True 775 ibm775 False False False False True True 850 ibm850 False False False False True True 852 ibm852 True True False False True True 855 IBM855 False False False False True True 857 ibm857 False False False False True True 858 IBM00858 False False False False True True 860 IBM860 False False False False True True 861 ibm861 False False False False True True 862 DOS-862 True True False False True True 863 IBM863 False False False False True True 864 IBM864 False False False False True True 865 IBM865 False False False False True True 866 cp866 True True False False True True 869 ibm869 False False False False True True 870 IBM870 False False False False True True 874 windows-874 True True True True True True 875 cp875 False False False False True True 932 shift_jis True True True True False True 936 gb2312 True True True True False True 949 ks_c_5601-1987 True True True True False True 950 big5 True True True True False True 1026 IBM1026 False False False False True True 1047 IBM01047 False False False False True True 1140 IBM01140 False False False False True True 1141 IBM01141 False False False False True True 1142 IBM01142 False False False False True True 1143 IBM01143 False False False False True True 1144 IBM01144 False False False False True True 1145 IBM01145 False False False False True True 1146 IBM01146 False False False False True True 1147 IBM01147 False False False False True True 1148 IBM01148 False False False False True True 1149 IBM01149 False False False False True True 1200 utf-16 False True False False False True 1201 unicodeFFFE False False False False False True 1250 windows-1250 True True True True True True 1251 windows-1251 True True True True True True 1252 Windows-1252 True True True True True True 1253 windows-1253 True True True True True True 1254 windows-1254 True True True True True True 1255 windows-1255 True True True True True True 1256 windows-1256 True True True True True True 1257 windows-1257 True True True True True True 1258 windows-1258 True True True True True True 1361 Johab False False False False False True 10000 macintosh False False False False True True 10001 x-mac-japanese False False False False False True 10002 x-mac-chinesetrad False False False False False True 10003 x-mac-korean False False False False False True 10004 x-mac-arabic False False False False True True 10005 x-mac-hebrew False False False False True True 10006 x-mac-greek False False False False True True 10007 x-mac-cyrillic False False False False True True 10008 x-mac-chinesesimp False False False False False True 10010 x-mac-romanian False False False False True True 10017 x-mac-ukrainian False False False False True True 10021 x-mac-thai False False False False True True 10029 x-mac-ce False False False False True True 10079 x-mac-icelandic False False False False True True 10081 x-mac-turkish False False False False True True 10082 x-mac-croatian False False False False True True 12000 utf-32 False False False False False True 12001 utf-32BE False False False False False True 20000 x-Chinese-CNS False False False False False True 20001 x-cp20001 False False False False False True 20002 x-Chinese-Eten False False False False False True 20003 x-cp20003 False False False False False True 20004 x-cp20004 False False False False False True 20005 x-cp20005 False False False False False True 20105 x-IA5 False False False False True True 20106 x-IA5-German False False False False True True 20107 x-IA5-Swedish False False False False True True 20108 x-IA5-Norwegian False False False False True True 20127 us-ascii False False True True True True 20261 x-cp20261 False False False False False True 20269 x-cp20269 False False False False True True 20273 IBM273 False False False False True True 20277 IBM277 False False False False True True 20278 IBM278 False False False False True True 20280 IBM280 False False False False True True 20284 IBM284 False False False False True True 20285 IBM285 False False False False True True 20290 IBM290 False False False False True True 20297 IBM297 False False False False True True 20420 IBM420 False False False False True True 20423 IBM423 False False False False True True 20424 IBM424 False False False False True True 20833 x-EBCDIC-KoreanExtended False False False False True True 20838 IBM-Thai False False False False True True 20866 koi8-r True True True True True True 20871 IBM871 False False False False True True 20880 IBM880 False False False False True True 20905 IBM905 False False False False True True 20924 IBM00924 False False False False True True 20932 EUC-JP False False False False False True 20936 x-cp20936 False False False False False True 20949 x-cp20949 False False False False False True 21025 cp1025 False False False False True True 21866 koi8-u True True True True True True 28591 iso-8859-1 True True True True True True 28592 iso-8859-2 True True True True True True 28593 iso-8859-3 False False True True True True 28594 iso-8859-4 True True True True True True 28595 iso-8859-5 True True True True True True 28596 iso-8859-6 True True True True True True 28597 iso-8859-7 True True True True True True 28598 iso-8859-8 True True False False True True 28599 iso-8859-9 True True True True True True 28603 iso-8859-13 False False True True True True 28605 iso-8859-15 False True True True True True 29001 x-Europa False False False False True True 38598 iso-8859-8-i True True True True True True 50220 iso-2022-jp False False True True False True 50221 csISO2022JP False True True True False True 50222 iso-2022-jp False False False False False True 50225 iso-2022-kr False False True False False True 50227 x-cp50227 False False False False False True 51932 euc-jp True True True True False True 51936 EUC-CN False False False False False True 51949 euc-kr False False True True False True 52936 hz-gb-2312 True True True True False True 54936 GB18030 True True True True False True 57002 x-iscii-de False False False False False True 57003 x-iscii-be False False False False False True 57004 x-iscii-ta False False False False False True 57005 x-iscii-te False False False False False True 57006 x-iscii-as False False False False False True 57007 x-iscii-or False False False False False True 57008 x-iscii-ka False False False False False True 57009 x-iscii-ma False False False False False True 57010 x-iscii-gu False False False False False True 57011 x-iscii-pa False False False False False True 65000 utf-7 False False True True False True 65001 utf-8 True True True True False True */