Exemple #1
0
 private bool IsBigEndian(EncodingDetectionContext context, int numBytesPerUnit)
 {
     return(context.EolIndexes.Any(eolIndex => {
         bool isPrecededByNullBytes = IsPrecededByNullBytes(context.Data, eolIndex, numNullBytes: numBytesPerUnit - 1);
         bool isLastByteInUnit = eolIndex % numBytesPerUnit == numBytesPerUnit - 1;
         return isLastByteInUnit && isPrecededByNullBytes;
     }));
 }
Exemple #2
0
 private bool IsLittleEndian(EncodingDetectionContext context, int numBytesPerUnit)
 {
     return(context.EolIndexes.Any(eolIndex => {
         bool isFollowedByNullBytes = IsFollowedByNullBytes(context.Data, context.DataLength, eolIndex, numNullBytes: numBytesPerUnit - 1);
         bool isFirstByteInUnit = eolIndex % numBytesPerUnit == 0;
         return isFirstByteInUnit && isFollowedByNullBytes;
     }));
 }
Exemple #3
0
        private bool IsUtf8(EncodingDetectionContext context)
        {
            // If there is any 0 within the data then can't be utf-8
            if (context.Data.Take(context.DataLength).Any(b => b == ByteCode.Null))
            {
                return(false);
            }

            return(context.EolIndexes.Any(eolIndex => {
                // Check eol is not sorrounded by null bytes, checking also corner cases (eol is first or last bytes)
                bool notPrecededByNull = (eolIndex == 0 || context.Data[eolIndex - 1] != ByteCode.Null);
                bool notFollowedByNull = (eolIndex == context.DataLength - 1 || context.Data[eolIndex + 1] != ByteCode.Null);
                return notPrecededByNull && notFollowedByNull;
            }));
        }
Exemple #4
0
        private bool IsFromEncoding(
            EncodingDetectionContext context,
            EncodingType encoding,
            Func <EncodingDetectionContext, int, bool> IsFromEndiannes)
        {
            int numBytesPerUnit = encoding.GetNumBytesPerUnit();

            // If there is a entire null unit within the data then can't be this encoding
            if (ContainsNullBytesUnit(context.Data, context.DataLength, numBytesPerUnit))
            {
                return(false);
            }

            return(IsFromEndiannes(context, numBytesPerUnit));
        }
Exemple #5
0
        /// <summary>
        /// Returns the encoding of the data if can be detected through any of the end of line bytes.
        /// </summary>
        /// <param name="data">Data where to detect encoding.</param>
        /// <param name="dataLength">Length of the data.</param>
        /// <returns>The encoding if detected.</returns>
        internal EncodingType GetEncodingFromEolBytes(byte[] data, int dataLength)
        {
            var eolIndexes = data.FindEolByteIndexes();

            if (!eolIndexes.Any())
            {
                // If there are not eol bytes then the encoding is not needed because the file will not need eol replacement.
                return(EncodingType.None);
            }

            var context = new EncodingDetectionContext(data, dataLength, eolIndexes);

            foreach (var encodingToCheck in encodingsToCheckOrdered)
            {
                var encoding = encodingToCheck.Key;
                var checkEncodingFunction = encodingToCheck.Value;
                if (checkEncodingFunction(context))
                {
                    return(encoding);
                }
            }

            return(EncodingType.None);
        }
Exemple #6
0
 private bool IsBigEndianEncoding(EncodingDetectionContext context, EncodingType encoding)
 {
     return(IsFromEncoding(context, encoding, IsBigEndian));
 }
Exemple #7
0
 private bool IsUtf16BE(EncodingDetectionContext context)
 {
     return(IsBigEndianEncoding(context, EncodingType.Utf16BE));
 }
Exemple #8
0
 private bool IsUtf16LE(EncodingDetectionContext context)
 {
     return(IsLittleEndianEncoding(context, EncodingType.Utf16LE));
 }