Пример #1
0
 private static string sniffEncoding(PeterO.Support.InputStream s)
 {
     byte[] data=new byte[4];
     int count=0;
     s.mark(data.Length+2);
     try {
       count=s.Read(data,0,data.Length);
     } finally {
       s.reset();
     }
     if(count>=2 && (data[0]&0xFF)==0xfe && (data[1]&0xFF)==0xff)
       return "utf-16be";
     if(count>=2 && (data[0]&0xFF)==0xff && (data[1]&0xFF)==0xfe)
       return "utf-16le";
     if(count>=3 && (data[0]&0xFF)==0xef && (data[1]&0xFF)==0xbb &&
     (data[2]&0xFF)==0xbf)
       return "utf-8";
     if(count>=4 && (data[0]&0xFF)==0x00 && data[1]==0x3c &&
     data[2]==0x00 && data[3]==0x3f)
       return "utf-16be";
     if(count>=4 && data[0]==0x3c && data[1]==0x00 &&
     data[2]==0x3f && data[3]==0x00)
       return "utf-16le";
     if(count>=4 && data[0]==0x3c && data[1]==0x3f &&
     data[2]==0x78 && data[3]==0x6d){ // <?xm
       data=new byte[128];
       s.mark(data.Length+2);
       try {
     count=s.Read(data,0,data.Length);
       } finally {
     s.reset();
       }
       int i=4;
       if(i+1>count)return "utf-8";
       if(data[i++]!='l')return "utf-8"; // l in <?xml
       bool space=false;
       while(i<count){
     if(data[i]==0x09||data[i]==0x0a||data[i]==0x0d||data[i]==0x20)
     { space=true; i++; } else {
       break;
     }
       }
       if(!space || i+7>count)return "utf-8";
       if(!(data[i]=='v' && data[i+1]=='e' && data[i+2]=='r' &&
       data[i+3]=='s' && data[i+4]=='i' && data[i+5]=='o' &&
       data[i+6]=='n'))return "utf-8";
       i+=7;
       while(i<count){
     if(data[i]==0x09||data[i]==0x0a||data[i]==0x0d||data[i]==0x20) {
       i++;
     } else {
       break;
     }
       }
       if(i+1>count || data[i++]!='=')return "utf-8";
       while(i<count){
     if(data[i]==0x09||data[i]==0x0a||data[i]==0x0d||data[i]==0x20) {
       i++;
     } else {
       break;
     }
       }
       if(i+1>count)return "utf-8";
       int ch=data[i++];
       if(ch!='"' && ch!='\'')return "utf-8";
       while(i<count){
     if(data[i]==ch){ i++; break; }
     i++;
       }
       space=false;
       while(i<count){
     if(data[i]==0x09||data[i]==0x0a||data[i]==0x0d||data[i]==0x20)
     { space=true; i++; } else {
       break;
     }
       }
       if(i+8>count)return "utf-8";
       if(!(data[i]=='e' && data[i+1]=='n' && data[i+2]=='c' &&
       data[i+3]=='o' && data[i+4]=='d' && data[i+5]=='i' &&
       data[i+6]=='n' && data[i+7]=='g'))return "utf-8";
       i+=8;
       while(i<count){
     if(data[i]==0x09||data[i]==0x0a||data[i]==0x0d||data[i]==0x20) {
       i++;
     } else {
       break;
     }
       }
       if(i+1>count || data[i++]!='=')return "utf-8";
       while(i<count){
     if(data[i]==0x09||data[i]==0x0a||data[i]==0x0d||data[i]==0x20) {
       i++;
     } else {
       break;
     }
       }
       if(i+1>count)return "utf-8";
       ch=data[i++];
       if(ch!='"' && ch!='\'')return "utf-8";
       StringBuilder builder=new StringBuilder();
       while(i<count){
     if(data[i]==ch){
       string encoding=TextEncoding.resolveEncoding(builder.ToString());
       if(encoding==null)
     return null;
       if(encoding.Equals("utf-16le") || encoding.Equals("utf-16be"))
     return null;
       return builder.ToString();
     }
     builder.Append((char)data[i]);
     i++;
       }
       return "utf-8";
     }
     return "utf-8";
 }