Ejemplo n.º 1
0
        public ExtractText(string fileName, bool formatted)
        {
            //ExStart:ExtractText
            int linesPerPage         = Console.WindowHeight;
            ExtractorFactory factory = new ExtractorFactory();

            TextExtractor extractor = formatted
                ? factory.CreateFormattedTextExtractor(fileName)
                : factory.CreateTextExtractor(fileName);

            if (extractor == null)
            {
                Console.WriteLine("The document's format is not supported");
                return;
            }

            try
            {
                string line = null;
                do
                {
                    Console.Clear();
                    Console.WriteLine("{0}", fileName);

                    int lineNumber = 0;
                    do
                    {
                        line = extractor.ExtractLine();
                        lineNumber++;
                        if (line != null)
                        {
                            Console.WriteLine(line);
                        }
                    }while (line != null && lineNumber < linesPerPage);

                    Console.WriteLine();
                    Console.WriteLine("Press Esc to exit or any other key to move to the next page");
                }while (line != null && Console.ReadKey().Key != ConsoleKey.Escape);
            }
            finally
            {
                extractor.Dispose();
            }
            //ExEnd:ExtractText
        }
Ejemplo n.º 2
0
        public WordStatistic(string fileName, int maxWordLength)
        {
            //ExStart:WordStatistic
            ExtractorFactory         factory   = new ExtractorFactory();
            Dictionary <string, int> statistic = new Dictionary <string, int>();

            TextExtractor extractor = factory.CreateTextExtractor(fileName);

            if (extractor == null)
            {
                Console.WriteLine("The document's format is not supported");
                return;
            }

            try
            {
                string line = null;
                do
                {
                    line = extractor.ExtractLine();
                    if (line != null)
                    {
                        string[] words = line.Split(' ', ',', ';', '.');
                        foreach (string w in words)
                        {
                            string word = w.Trim().ToLower();
                            if (word.Length > maxWordLength)
                            {
                                if (!statistic.ContainsKey(word))
                                {
                                    statistic[word] = 0;
                                }

                                statistic[word]++;
                            }
                        }
                    }
                }while (line != null);
            }
            finally
            {
                extractor.Dispose();
            }

            Console.WriteLine("Top words:");

            for (int i = 0; i < 10; i++)
            {
                int    count  = -1;
                string maxKey = null;
                foreach (string key in statistic.Keys)
                {
                    if (statistic[key] > count)
                    {
                        count  = statistic[key];
                        maxKey = key;
                    }
                }

                if (maxKey == null)
                {
                    break;
                }

                Console.WriteLine("{0}: {1}", maxKey, count);
                statistic.Remove(maxKey);
            }
            //ExEnd:WordStatistic
        }
Ejemplo n.º 3
0
        public ActionResult ExtractText([FromBody] string fileName, string password = null)
        {
            //ExStart:ExtractText
            ExtractorFactory factory       = new ExtractorFactory();
            string           path          = Server.MapPath("../App_Data//Uploads//" + fileName);
            string           ext           = Path.GetExtension(path);
            List <string>    extractedText = new List <string>();

            try
            {
                string line = null;
                //If file password procted
                if (!string.IsNullOrWhiteSpace(password))
                {
                    if (ext == ".one")
                    {
                        var loadOptions = new LoadOptions();
                        loadOptions.Password = password;

                        using (var extractor = new NoteTextExtractor(path, loadOptions))
                        {
                            do
                            {
                                int lineNumber = 0;
                                do
                                {
                                    line = extractor.ExtractLine();
                                    lineNumber++;
                                    if (line != null)
                                    {
                                        extractedText.Add(line);
                                    }
                                }while (line != null);
                            }while (line != null);
                        }
                    }
                    else
                    {
                        LoadOptions loadOptions = new LoadOptions();
                        loadOptions.Password = password;
                        WordsTextExtractor protectedDocument = new WordsTextExtractor(path, loadOptions);
                        do
                        {
                            int lineNumber = 0;
                            do
                            {
                                line = protectedDocument.ExtractLine();
                                lineNumber++;
                                if (line != null)
                                {
                                    extractedText.Add(line);
                                }
                            }while (line != null);
                        }while (line != null);
                    }
                }
                else
                {
                    //if file type is zip
                    if (ext == ".zip")
                    {
                        using (var container = new ZipContainer(path))
                        {
                            for (int i = 0; i < container.Entities.Count; i++)
                            {
                                using (TextExtractor extractor = factory.CreateTextExtractor(container.Entities[i].OpenStream()))
                                {
                                    int lineNumber = 0;
                                    do
                                    {
                                        line = extractor.ExtractLine();
                                        lineNumber++;
                                        if (line != null)
                                        {
                                            extractedText.Add(line);
                                        }
                                    }while (line != null);
                                }
                            }
                        }
                    }
                    else
                    {
                        TextExtractor extractor = factory.CreateTextExtractor(path);
                        do
                        {
                            int lineNumber = 0;
                            do
                            {
                                try
                                {
                                    line = extractor.ExtractLine();
                                }
                                catch (Exception)
                                {
                                    if (ext == ".one")
                                    {
                                        extractedText.Add("Invalid password");
                                        break;
                                    }
                                }

                                lineNumber++;
                                if (line != null)
                                {
                                    extractedText.Add(line);
                                }
                            }while (line != null);
                        }while (line != null);
                    }
                }

                //extractedText.Add(extractor.ExtractAll());
            }
            catch (Exception ex)
            {
                extractedText.Add(ex.Message);
            }
            return(Json(extractedText, JsonRequestBehavior.AllowGet));
        }
Ejemplo n.º 4
0
        public ActionResult CountStatistics([FromBody] string fileName)
        {
            List <string> extractedText = new List <string>();
            string        filePath      = Server.MapPath("../App_Data//Uploads//" + fileName);

            try
            {
                string[] arguments = new string[] { filePath };

                int maxWordLength = 0;
                for (int i = 0; i < arguments.Length; i++)
                {
                    if (arguments[i].Length == 1 || !int.TryParse(arguments[i], out maxWordLength))
                    {
                        maxWordLength = 5;
                    }
                }
                ExtractorFactory         factory   = new ExtractorFactory();
                Dictionary <string, int> statistic = new Dictionary <string, int>();

                TextExtractor extractor = factory.CreateTextExtractor(filePath);
                if (extractor == null)
                {
                    extractedText.Add("The document's format is not supported");
                }
                try
                {
                    string line = null;
                    do
                    {
                        line = extractor.ExtractLine();
                        if (line != null)
                        {
                            string[] words = line.Split(' ', ',', ';', '.');
                            foreach (string w in words)
                            {
                                string word = w.Trim().ToLower();
                                if (word.Length > maxWordLength)
                                {
                                    if (!statistic.ContainsKey(word))
                                    {
                                        statistic[word] = 0;
                                    }

                                    statistic[word]++;
                                }
                            }
                        }
                    }while (line != null);
                }
                finally
                {
                    extractor.Dispose();
                }

                extractedText.Add("Top words:");

                for (int i = 0; i < 10; i++)
                {
                    int    count  = -1;
                    string maxKey = null;
                    foreach (string key in statistic.Keys)
                    {
                        if (statistic[key] > count)
                        {
                            count  = statistic[key];
                            maxKey = key;
                        }
                    }

                    if (maxKey == null)
                    {
                        break;
                    }

                    extractedText.Add(maxKey + " : " + count);
                    statistic.Remove(maxKey);
                }
            }
            catch (Exception ex)
            {
                extractedText.Add(ex.Message);
            }
            return(Json(extractedText, JsonRequestBehavior.AllowGet));
        }