private bool IsBigEndian(EncodingDetectionContext context, int numBytesPerUnit) { return(context.EolIndexes.Any(eolIndex => { bool isPrecededByNullBytes = IsPrecededByNullBytes(context.Data, eolIndex, numNullBytes: numBytesPerUnit - 1); bool isLastByteInUnit = eolIndex % numBytesPerUnit == numBytesPerUnit - 1; return isLastByteInUnit && isPrecededByNullBytes; })); }
private bool IsLittleEndian(EncodingDetectionContext context, int numBytesPerUnit) { return(context.EolIndexes.Any(eolIndex => { bool isFollowedByNullBytes = IsFollowedByNullBytes(context.Data, context.DataLength, eolIndex, numNullBytes: numBytesPerUnit - 1); bool isFirstByteInUnit = eolIndex % numBytesPerUnit == 0; return isFirstByteInUnit && isFollowedByNullBytes; })); }
private bool IsUtf8(EncodingDetectionContext context) { // If there is any 0 within the data then can't be utf-8 if (context.Data.Take(context.DataLength).Any(b => b == ByteCode.Null)) { return(false); } return(context.EolIndexes.Any(eolIndex => { // Check eol is not sorrounded by null bytes, checking also corner cases (eol is first or last bytes) bool notPrecededByNull = (eolIndex == 0 || context.Data[eolIndex - 1] != ByteCode.Null); bool notFollowedByNull = (eolIndex == context.DataLength - 1 || context.Data[eolIndex + 1] != ByteCode.Null); return notPrecededByNull && notFollowedByNull; })); }
private bool IsFromEncoding( EncodingDetectionContext context, EncodingType encoding, Func <EncodingDetectionContext, int, bool> IsFromEndiannes) { int numBytesPerUnit = encoding.GetNumBytesPerUnit(); // If there is a entire null unit within the data then can't be this encoding if (ContainsNullBytesUnit(context.Data, context.DataLength, numBytesPerUnit)) { return(false); } return(IsFromEndiannes(context, numBytesPerUnit)); }
/// <summary> /// Returns the encoding of the data if can be detected through any of the end of line bytes. /// </summary> /// <param name="data">Data where to detect encoding.</param> /// <param name="dataLength">Length of the data.</param> /// <returns>The encoding if detected.</returns> internal EncodingType GetEncodingFromEolBytes(byte[] data, int dataLength) { var eolIndexes = data.FindEolByteIndexes(); if (!eolIndexes.Any()) { // If there are not eol bytes then the encoding is not needed because the file will not need eol replacement. return(EncodingType.None); } var context = new EncodingDetectionContext(data, dataLength, eolIndexes); foreach (var encodingToCheck in encodingsToCheckOrdered) { var encoding = encodingToCheck.Key; var checkEncodingFunction = encodingToCheck.Value; if (checkEncodingFunction(context)) { return(encoding); } } return(EncodingType.None); }
private bool IsBigEndianEncoding(EncodingDetectionContext context, EncodingType encoding) { return(IsFromEncoding(context, encoding, IsBigEndian)); }
private bool IsUtf16BE(EncodingDetectionContext context) { return(IsBigEndianEncoding(context, EncodingType.Utf16BE)); }
private bool IsUtf16LE(EncodingDetectionContext context) { return(IsLittleEndianEncoding(context, EncodingType.Utf16LE)); }