Exemple #1
0
        public static bool IsPlainText(string fileName)
        {
            var fileInfo = new FileInfo(fileName);

            if (fileInfo.Length < 20)
            {
                return(false); // too short to be plain text
            }

            if (fileInfo.Length > 5000000)
            {
                return(false); // too large to be plain text
            }

            var enc = LanguageAutoDetect.GetEncodingFromFile(fileName);
            var s   = ReadAllTextShared(fileName, enc);

            int numberCount = 0;
            int letterCount = 0;
            int len         = s.Length;

            for (int i = 0; i < len; i++)
            {
                var ch = s[i];
                if (char.IsLetter(ch) || " -,.!?[]()\r\n".Contains(ch))
                {
                    letterCount++;
                }
                else if (char.IsControl(ch) && ch != '\t') // binary found
                {
                    return(false);
                }
                else if (CharUtils.IsDigit(ch))
                {
                    numberCount++;
                }
            }
            if (len < 100)
            {
                return(numberCount < 5 && letterCount > 20);
            }

            var numberPatternMatches = new Regex(@"\d+[.:,; -]\d+").Matches(s);

            if (numberPatternMatches.Count > 30)
            {
                return(false); // looks like time codes
            }

            var largeBlocksOfLargeNumbers = new Regex(@"\d{3,8}").Matches(s);

            if (largeBlocksOfLargeNumbers.Count > 30)
            {
                return(false); // looks like time codes
            }

            if (len < 1000 && largeBlocksOfLargeNumbers.Count > 10)
            {
                return(false); // looks like time codes
            }

            var partsWithMoreThan100CharsOfNonNumbers = new Regex(@"[^\d]{150,100000}").Matches(s);

            if (partsWithMoreThan100CharsOfNonNumbers.Count > 10)
            {
                return(true); // looks like text
            }

            var numberThreshold = len * 0.015 + 25;
            var letterThreshold = len * 0.8;

            return(numberCount < numberThreshold && letterCount > letterThreshold);
        }
Exemple #2
0
        public SubtitleFormat LoadSubtitle(string fileName, out Encoding encoding, Encoding useThisEncoding, bool batchMode)
        {
            FileName = fileName;

            _paragraphs = new List <Paragraph>();

            var          lines = new List <string>();
            StreamReader sr;

            if (useThisEncoding != null)
            {
                try
                {
                    sr = new StreamReader(fileName, useThisEncoding);
                }
                catch (Exception exception)
                {
                    System.Diagnostics.Debug.WriteLine(exception.Message);
                    encoding = Encoding.UTF8;
                    return(null);
                }
            }
            else
            {
                try
                {
                    sr = new StreamReader(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName), true);
                }
                catch
                {
                    try
                    {
                        Stream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
                        sr = new StreamReader(fs);
                    }
                    catch (Exception exception)
                    {
                        System.Diagnostics.Debug.WriteLine(exception.Message);
                        encoding = Encoding.UTF8;
                        return(null);
                    }
                }
            }

            encoding = sr.CurrentEncoding;
            while (!sr.EndOfStream)
            {
                lines.Add(sr.ReadLine());
            }
            sr.Close();

            foreach (SubtitleFormat subtitleFormat in SubtitleFormat.AllSubtitleFormats)
            {
                if (subtitleFormat.IsMine(lines, fileName))
                {
                    Header = null;
                    subtitleFormat.BatchMode = batchMode;
                    subtitleFormat.LoadSubtitle(this, lines, fileName);
                    _format = subtitleFormat;
                    _wasLoadedWithFrameNumbers = _format.IsFrameBased;
                    if (_wasLoadedWithFrameNumbers)
                    {
                        CalculateTimeCodesFromFrameNumbers(Configuration.Settings.General.CurrentFrameRate);
                    }
                    return(subtitleFormat);
                }
            }

            if (useThisEncoding == null)
            {
                return(LoadSubtitle(fileName, out encoding, Encoding.Unicode));
            }

            return(null);
        }
        public SubtitleFormat LoadSubtitle(string fileName, out Encoding encoding, Encoding useThisEncoding, bool batchMode, double?sourceFrameRate = null, bool loadSubtitle = true)
        {
            FileName   = fileName;
            Paragraphs = new List <Paragraph>();
            StreamReader sr;

            if (useThisEncoding != null)
            {
                try
                {
                    sr = new StreamReader(fileName, useThisEncoding);
                }
                catch (Exception exception)
                {
                    System.Diagnostics.Debug.WriteLine(exception.Message);
                    encoding = Encoding.UTF8;
                    return(null);
                }
            }
            else
            {
                try
                {
                    sr = new StreamReader(fileName, LanguageAutoDetect.GetEncodingFromFile(fileName), true);
                }
                catch
                {
                    try
                    {
                        var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
                        sr = new StreamReader(fs);
                    }
                    catch (Exception exception)
                    {
                        System.Diagnostics.Debug.WriteLine(exception.Message);
                        encoding = Encoding.UTF8;
                        return(null);
                    }
                }
            }

            encoding = sr.CurrentEncoding;
            var lines = sr.ReadToEnd().SplitToLines();

            sr.Close();

            var ext = Path.GetExtension(fileName).ToLowerInvariant();

            foreach (var subtitleFormat in SubtitleFormat.AllSubtitleFormats.Where(p => p.Extension == ext && !p.Name.StartsWith("Unknown", StringComparison.Ordinal)))
            {
                if (subtitleFormat.IsMine(lines, fileName))
                {
                    return(FinalizeFormat(fileName, batchMode, sourceFrameRate, lines, subtitleFormat, loadSubtitle));
                }
            }
            foreach (var subtitleFormat in SubtitleFormat.AllSubtitleFormats.Where(p => p.Extension != ext || p.Name.StartsWith("Unknown", StringComparison.Ordinal)))
            {
                if (subtitleFormat.IsMine(lines, fileName))
                {
                    return(FinalizeFormat(fileName, batchMode, sourceFrameRate, lines, subtitleFormat, loadSubtitle));
                }
            }

            if (useThisEncoding == null)
            {
                return(LoadSubtitle(fileName, out encoding, Encoding.Unicode));
            }
            return(null);
        }