当前位置: 移动技术网 > IT编程>开发语言>Java > java实现十六进制字符unicode与中英文转换示例

java实现十六进制字符unicode与中英文转换示例

2019年07月22日  | 移动技术网IT编程  | 我要评论

关于unicode和utf的关系,可以简单的记忆:unicode是一个编码组织、一个编码规范、在java中指utf-16;utf是unicode编码的translation转换格式,以便于很好地在网络中传递、在存储媒介汇总保存,于是utf存在多种格式,如8、16、32,而关联le、te的区别,unicode编码格式才会有以下过程中的10种。

复制代码 代码如下:

public static void main(string[] args) throws unsupportedencodingexception {
                 stringutil.str2all("0 产品型号描述");
  stringutil.str4all("30000900a74ec1548b57f753cf63f08f");
 }

/**
  * 尝试所有编码格式对十六进制数字字符串进行编码
  *
  * @param hexstr
  * @throws unsupportedencodingexception
  */
 public static void str4all(string ustr) throws unsupportedencodingexception{

  system.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++");

  byte[] bs = new byte[ustr.length()/2];
  for (int i = 0; i < bs.length; i++) {
   bs[i] = (byte) integer.parseint(ustr.substring(i*2, i*2+2), 16);
  }

  system.out.println(new string(bs, "utf-8"));
  // 16
  system.out.println(new string(bs, "utf-16")); // 同unicode
  system.out.println(new string(bs, "utf-16le"));
  system.out.println(new string(bs, "x-utf-16le-bom"));
  system.out.println(new string(bs, "utf-16be"));
//  system.out.println(new string(bs, "x-utf-16be-bom")); // unsupportedencodingexception
  // 32
  system.out.println(new string(bs, "utf-32"));
  system.out.println(new string(bs, "utf-32le"));
  system.out.println(new string(bs, "x-utf-32le-bom"));
  system.out.println(new string(bs, "utf-32be"));
  system.out.println(new string(bs, "x-utf-32le-bom"));
 }
/**
  * 列出所有编码对应的解码后的十六进制数字字符串
  *
  * @param ustr
  * @throws unsupportedencodingexception
  */
 public static void str2all(string ustr) throws unsupportedencodingexception{

  system.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++");

  byte[] bs = new byte[]{};

  bs = ustr.getbytes("utf-8");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
  // 16
  bs = ustr.getbytes("utf-16");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
  bs = ustr.getbytes("utf-16le");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
  bs = ustr.getbytes("x-utf-16le-bom");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
  bs = ustr.getbytes("utf-16be");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
//  bs = ustr.getbytes("x-utf-16be-bom"); // unsupportedencodingexception
  // 32
  bs = ustr.getbytes("utf-32");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
  bs = ustr.getbytes("utf-32le");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
  bs = ustr.getbytes("x-utf-32le-bom");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
  bs = ustr.getbytes("utf-32be");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
  bs = ustr.getbytes("x-utf-32le-bom");
  for(byte b:bs){
   system.out.print(integer.tohexstring(b & 0xff));
  }
  system.out.println();
 }

编码名称收集

复制代码 代码如下:

charset us-ascii %s
    historicalname ascii
    # iana aliases
    alias iso-ir-6
    alias ansi_x3.4-1986
    alias iso_646.irv:1991
    alias ascii
    alias iso646-us
    alias us
    alias ibm367
    alias cp367
    alias csascii
    alias default
    # other aliases
    alias 646 # solaris posix locale
    alias iso_646.irv:1983
    alias ansi_x3.4-1968 # linux posix locale (redhat)
    alias ascii7

charset utf-8 utf_8
    historicalname utf8
    alias utf8
    alias unicode-1-1-utf-8

charset utf-16 utf_16
    historicalname utf-16
    alias utf_16
    alias utf16
    alias unicode
    alias unicodebig

charset utf-16be utf_16be
    historicalname unicodebigunmarked
    alias utf_16be
    alias iso-10646-ucs-2
    alias x-utf-16be
    alias unicodebigunmarked

charset utf-16le utf_16le
    historicalname unicodelittleunmarked
    alias utf_16le
    alias x-utf-16le
    alias unicodelittleunmarked

charset x-utf-16le-bom utf_16le_bom
    historicalname unicodelittle
    alias unicodelittle

charset utf-32 utf_32
    alias utf_32
    alias utf32

charset utf-32le utf_32le
    alias utf_32le
    alias x-utf-32le

charset utf-32be utf_32be
    alias utf_32be
    alias x-utf-32be

charset x-utf-32le-bom utf_32le_bom
    alias utf_32le_bom
    alias utf-32le-bom

charset x-utf-32be-bom utf_32be_bom
    alias utf_32be_bom
    alias utf-32be-bom

charset iso-8859-1 %s
    historicalname iso8859_1
    # iana aliases
    alias iso-ir-100
    alias iso_8859-1
    alias latin1
    alias l1
    alias ibm819
    alias cp819
    alias csisolatin1
    # other aliases
    alias 819
    alias ibm-819
    alias iso8859_1
    alias iso_8859-1:1987
    alias iso_8859_1
    alias 8859_1
    alias iso8859-1

charset iso-8859-2 %s
    historicalname iso8859_2
    alias iso8859_2
    alias 8859_2
    alias iso-ir-101
    alias iso_8859-2
    alias iso_8859-2:1987
    alias iso8859-2
    alias latin2
    alias l2
    alias ibm912
    alias ibm-912
    alias cp912
    alias 912
    alias csisolatin2

charset iso-8859-4 %s
    historicalname iso8859_4
    alias iso8859_4
    alias iso8859-4
    alias 8859_4
    alias iso-ir-110
    alias iso_8859-4
    alias iso_8859-4:1988
    alias latin4
    alias l4
    alias ibm914
    alias ibm-914
    alias cp914
    alias 914
    alias csisolatin4

charset iso-8859-5 %s
    historicalname iso8859_5
    alias iso8859_5
    alias 8859_5
    alias iso-ir-144
    alias iso_8859-5
    alias iso_8859-5:1988
    alias iso8859-5
    alias cyrillic
    alias ibm915
    alias ibm-915
    alias cp915
    alias 915
    alias csisolatincyrillic

charset iso-8859-7 %s
    historicalname iso8859_7
    alias iso8859_7
    alias 8859_7
    alias iso-ir-126
    alias iso_8859-7
    alias iso_8859-7:1987
    alias elot_928
    alias ecma-118
    alias greek
    alias greek8
    alias csisolatingreek
    alias sun_eu_greek # solaris 7/8 compatibility
    alias ibm813
    alias ibm-813
    alias 813
    alias cp813
    alias iso8859-7 # solaris 9 compatibility

charset iso-8859-9 %s
    historicalname iso8859_9
    alias iso8859_9
    alias 8859_9
    alias iso-ir-148
    alias iso_8859-9
    alias iso_8859-9:1989
    alias iso8859-9
    alias latin5
    alias l5
    alias ibm920
    alias ibm-920
    alias 920
    alias cp920
    alias csisolatin5

charset iso-8859-13 %s
    historicalname iso8859_13
    alias iso8859_13
    alias 8859_13
    alias iso_8859-13
    alias iso8859-13

charset iso-8859-15 %s
    historicalname iso8859_15
    # iana alias
    alias iso_8859-15
    # other aliases
    alias 8859_15
    alias iso8859_15
    alias iso8859-15
    alias ibm923
    alias ibm-923
    alias cp923
    alias 923
    alias latin0
    alias latin9
    alias l9
    alias csisolatin0
    alias csisolatin9
    alias iso8859_15_fdis

charset koi8-r %s
    historicalname koi8_r
    alias koi8_r
    alias koi8
    alias cskoi8r

charset koi8-u %s
    alias koi8_u

charset windows-1250 %s
    historicalname cp1250
    alias cp1250
    alias cp5346 # euro ibm ccsid

charset windows-1251 %s
    historicalname cp1251
    alias cp1251
    alias cp5347 # euro ibm ccsid
    alias ansi-1251 # solaris compatibility

charset windows-1252 %s
    historicalname cp1252
    alias cp1252
    alias cp5348 # euro ibm ccsid

charset windows-1253 %s
    historicalname cp1253
    alias cp1253
    alias cp5349 # euro ibm ccsid

charset windows-1254 %s
    historicalname cp1254
    alias cp1254
    alias cp5350 # euro ibm ccsid

charset windows-1257 %s
    historicalname cp1257
    alias cp1257
    alias cp5353 # euro ibm ccsid


charset ibm437 %s
    historicalname cp437
    alias cp437
    alias ibm-437
    alias 437
    alias cspc8codepage437
    alias windows-437

charset x-ibm737 %s
    historicalname cp737
    alias cp737
    alias ibm737
    alias ibm-737
    alias 737

charset ibm775 %s
    historicalname cp775
    alias cp775
    alias ibm-775
    alias 775

charset ibm850 %s
    historicalname cp850
    alias cp850
    alias ibm-850
    alias 850
    alias cspc850multilingual

charset ibm852 %s
    historicalname cp852
    alias cp852
    alias ibm-852
    alias 852
    alias cspcp852

charset ibm855 %s
    historicalname cp855
    alias cp855
    alias ibm-855
    alias 855
    alias cspcp855

charset ibm857 %s
    historicalname cp857
    alias cp857
    alias ibm-857
    alias 857
    alias csibm857

charset ibm00858 %s
    historicalname cp858
    alias cp858
    alias ccsid00858
    alias cp00858
    alias 858

charset ibm862 %s
    historicalname cp862
    alias cp862
    alias ibm-862
    alias 862
    alias csibm862
    alias cspc862latinhebrew

charset ibm866 %s
    historicalname cp866
    alias cp866
    alias ibm-866
    alias 866
    alias csibm866

charset x-ibm874 %s
    historicalname cp874
    alias cp874
    alias ibm874
    alias ibm-874
    alias 874

如对本文有疑问, 点击进行留言回复!!

相关文章:

验证码:
移动技术网