天视利车载mp3,青岛换客网,湖北性息
public static string getcharset(file file) {
string charset = "gbk";
byte[] first3bytes = new byte[3];
try {
boolean checked = false;
bufferedinputstream bis = new bufferedinputstream(
new fileinputstream(file));
bis.mark(0);
int read = bis.read(first3bytes, 0, 3);
if (read == -1)
return charset;
if (first3bytes[0] == (byte) 0xff && first3bytes[1] == (byte) 0xfe) {
charset = "utf-16le";
checked = true;
} else if (first3bytes[0] == (byte) 0xfe && first3bytes[1]
== (byte) 0xff) {
charset = "utf-16be";
checked = true;
} else if (first3bytes[0] == (byte) 0xef && first3bytes[1]
== (byte) 0xbb
&& first3bytes[2] == (byte) 0xbf) {
charset = "utf-8";
checked = true;
}
bis.reset();
if (!checked) {
int loc = 0;
while ((read = bis.read()) != -1) {
loc++;
if (read >= 0xf0)
break;
//单独出现bf以下的,也算是gbk
if (0x80 <= read && read <= 0xbf)
break;
if (0xc0 <= read && read <= 0xdf) {
read = bis.read();
if (0x80 <= read && read <= 0xbf)// 双字节 (0xc0 - 0xdf)
// (0x80 -
// 0xbf),也可能在gb编码内
continue;
else
break;
// 也有可能出错,但是几率较小
} else if (0xe0 <= read && read <= 0xef) {
read = bis.read();
if (0x80 <= read && read <= 0xbf) {
read = bis.read();
if (0x80 <= read && read <= 0xbf) {
charset = "utf-8";
break;
} else
break;
} else
break;
}
}
system.out.println(loc + " " + integer.tohexstring(read));
}
bis.close();
} catch (exception e) {
e.printstacktrace();
}
return charset;
}
如对本文有疑问,请在下面进行留言讨论,广大热心网友会与你互动!!
点击进行留言回复
网友评论