private static List <string> ExtractVttContent(string token, string vttUrl)
        {
            List <string> sentences = new List <string>();

            using (var client = new WebClient())
            {
                client.Headers.Clear();

                client.Headers.Add("authorization", token);
                var content = client.DownloadData(vttUrl);
                using (var stream = new MemoryStream(content))
                {
                    string     fileName = "file.txt";
                    string     path     = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, fileName);
                    FileStream file     = new FileStream(path, FileMode.Create, FileAccess.Write);
                    stream.WriteTo(file);
                    file.Close();

                    SubtitlesParser.Classes.Parsers.SubParser parser = new SubtitlesParser.Classes.Parsers.SubParser();

                    using (var fileStream = new FileStream(path, FileMode.Open, FileAccess.Read))
                    {
                        try
                        {
                            var mostLikelyFormat = parser.GetMostLikelyFormat(fileStream.Name);
                            var items            = parser.ParseStream(fileStream, Encoding.UTF8, mostLikelyFormat);

                            string sentence = " ";
                            foreach (var item in items)
                            {
                                foreach (var line in item.Lines)
                                {
                                    foreach (var word in line.ToCharArray())
                                    {
                                        sentence = sentence + word;

                                        if (word.Equals('.') || word.Equals('?'))
                                        {
                                            sentences.Add(sentence);
                                            sentence = string.Empty;
                                        }
                                    }
                                    sentence = sentence + " ";
                                }
                            }
                            //Remove small talks from trans script sentences
                            RemoveSmallTalks(sentences);
                        }
                        catch (Exception ex)
                        {
                        }
                    }
                }
            }
            return(sentences);
        }
        public static IEnumerable <Sample> SearchVocabularyInSubtitles(string[] vocab, string SubtilesFolder)
        {
            var samplesWithVocab = new List <Sample>();

            String[] files = System.IO.Directory.GetFiles(SubtilesFolder);
            Console.WriteLine($"Files : {files[0]}");

            Parallel.ForEach(files, (currentFile) => {
                String fileName = System.IO.Path.GetFullPath(currentFile);
                var parser      = new SubtitlesParser.Classes.Parsers.SubParser();

                using (var fileStream = File.OpenRead(fileName))
                {
                    try
                    {
                        var mostLikelyFormat = parser.GetMostLikelyFormat(fileName);
                        var items            = parser.ParseStream(fileStream, Encoding.UTF8, mostLikelyFormat);
                        foreach (var sequence in items)
                        {
                            foreach (var sentence in sequence.Lines)
                            {
                                // On enlève la ponctuation
                                var sentenceStripped = Regex.Replace(sentence, @"[^\w\s]", "");
                                sentenceStripped.ToLower();

                                // Recherche du vocabulaire dans la séquence
                                if (sentenceStripped.Split(" ").Contains(vocab[0]))
                                {
                                    samplesWithVocab.Add(new Sample
                                    {
                                        StartTime   = new TimeSpan(sequence.StartTime),
                                        EndTime     = new TimeSpan(sequence.EndTime),
                                        SubFileName = fileName
                                    });

                                    var strConcat = "";
                                    foreach (var str in sequence.Lines)
                                    {
                                        strConcat += str + " ";
                                    }
                                    Console.WriteLine(strConcat);
                                }
                            }
                        }
                    }catch (Exception ex) {
                        Console.WriteLine("Parsing of file {0}: FAILURE\n{1}", fileName, ex);
                    }
                }
            });

            return(samplesWithVocab);
        }
示例#3
0
        static void Main(string[] args)
        {
            var parser = new SubtitlesParser.Classes.Parsers.SubParser();

            var allFiles = BrowseTestSubtitlesFiles();

            foreach (var file in allFiles)
            {
                var fileName = Path.GetFileName(file);
                using (var fileStream = File.OpenRead(file))
                {
                    try
                    {
                        var mostLikelyFormat = parser.GetMostLikelyFormat(fileName);
                        var items            = parser.ParseStream(fileStream, Encoding.UTF8, mostLikelyFormat);
                        if (items.Any())
                        {
                            Console.WriteLine("Parsing of file {0}: SUCCESS ({1} items - {2}% corrupted)",
                                              fileName, items.Count, (items.Count(it => it.StartTime <= 0 || it.EndTime <= 0) * 100) / items.Count);

                            /*foreach (var item in items)
                             * {
                             *  Console.WriteLine(item);
                             * }*/
                            /*var duplicates =
                             *  items.GroupBy(it => new {it.StartTime, it.EndTime}).Where(grp => grp.Count() > 1);
                             * Console.WriteLine("{0} duplicate items", duplicates.Count());*/
                            Console.WriteLine("----------------");
                        }
                        else
                        {
                            throw new ArgumentException("Not items found!");
                        }
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine("Parsing of file {0}: FAILURE\n{1}", fileName, ex);
                    }
                }
                Console.WriteLine("----------------------");
            }

            Console.ReadLine();
        }
示例#4
0
        static void Main(string[] args)
        {
            var parser = new SubtitlesParser.Classes.Parsers.SubParser();

            var allFiles = BrowseTestSubtitlesFiles();
            foreach (var file in allFiles)
            {
                var fileName = Path.GetFileName(file);
                using (var fileStream = File.OpenRead(file))
                {
                    try
                    {
                        var mostLikelyFormat = parser.GetMostLikelyFormat(fileName);
                        var items = parser.ParseStream(fileStream, Encoding.UTF8, mostLikelyFormat);
                        if (items.Any())
                        {
                            Console.WriteLine("Parsing of file {0}: SUCCESS ({1} items - {2}% corrupted)", 
                                fileName, items.Count, (items.Count(it => it.StartTime <= 0 || it.EndTime <= 0) * 100)/ items.Count);
                            /*foreach (var item in items)
                            {
                                Console.WriteLine(item);
                            }*/
                            /*var duplicates =
                                items.GroupBy(it => new {it.StartTime, it.EndTime}).Where(grp => grp.Count() > 1);
                            Console.WriteLine("{0} duplicate items", duplicates.Count());*/
                            Console.WriteLine("----------------");
                        }
                        else
                        {
                            throw new ArgumentException("Not items found!");
                        }
                        
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine("Parsing of file {0}: FAILURE\n{1}", fileName, ex);
                    }
                }
                Console.WriteLine("----------------------");
            }

            Console.ReadLine();
        }