public ExtractText(string fileName, bool formatted) { //ExStart:ExtractText int linesPerPage = Console.WindowHeight; ExtractorFactory factory = new ExtractorFactory(); TextExtractor extractor = formatted ? factory.CreateFormattedTextExtractor(fileName) : factory.CreateTextExtractor(fileName); if (extractor == null) { Console.WriteLine("The document's format is not supported"); return; } try { string line = null; do { Console.Clear(); Console.WriteLine("{0}", fileName); int lineNumber = 0; do { line = extractor.ExtractLine(); lineNumber++; if (line != null) { Console.WriteLine(line); } }while (line != null && lineNumber < linesPerPage); Console.WriteLine(); Console.WriteLine("Press Esc to exit or any other key to move to the next page"); }while (line != null && Console.ReadKey().Key != ConsoleKey.Escape); } finally { extractor.Dispose(); } //ExEnd:ExtractText }
public WordStatistic(string fileName, int maxWordLength) { //ExStart:WordStatistic ExtractorFactory factory = new ExtractorFactory(); Dictionary <string, int> statistic = new Dictionary <string, int>(); TextExtractor extractor = factory.CreateTextExtractor(fileName); if (extractor == null) { Console.WriteLine("The document's format is not supported"); return; } try { string line = null; do { line = extractor.ExtractLine(); if (line != null) { string[] words = line.Split(' ', ',', ';', '.'); foreach (string w in words) { string word = w.Trim().ToLower(); if (word.Length > maxWordLength) { if (!statistic.ContainsKey(word)) { statistic[word] = 0; } statistic[word]++; } } } }while (line != null); } finally { extractor.Dispose(); } Console.WriteLine("Top words:"); for (int i = 0; i < 10; i++) { int count = -1; string maxKey = null; foreach (string key in statistic.Keys) { if (statistic[key] > count) { count = statistic[key]; maxKey = key; } } if (maxKey == null) { break; } Console.WriteLine("{0}: {1}", maxKey, count); statistic.Remove(maxKey); } //ExEnd:WordStatistic }
public ActionResult ExtractText([FromBody] string fileName, string password = null) { //ExStart:ExtractText ExtractorFactory factory = new ExtractorFactory(); string path = Server.MapPath("../App_Data//Uploads//" + fileName); string ext = Path.GetExtension(path); List <string> extractedText = new List <string>(); try { string line = null; //If file password procted if (!string.IsNullOrWhiteSpace(password)) { if (ext == ".one") { var loadOptions = new LoadOptions(); loadOptions.Password = password; using (var extractor = new NoteTextExtractor(path, loadOptions)) { do { int lineNumber = 0; do { line = extractor.ExtractLine(); lineNumber++; if (line != null) { extractedText.Add(line); } }while (line != null); }while (line != null); } } else { LoadOptions loadOptions = new LoadOptions(); loadOptions.Password = password; WordsTextExtractor protectedDocument = new WordsTextExtractor(path, loadOptions); do { int lineNumber = 0; do { line = protectedDocument.ExtractLine(); lineNumber++; if (line != null) { extractedText.Add(line); } }while (line != null); }while (line != null); } } else { //if file type is zip if (ext == ".zip") { using (var container = new ZipContainer(path)) { for (int i = 0; i < container.Entities.Count; i++) { using (TextExtractor extractor = factory.CreateTextExtractor(container.Entities[i].OpenStream())) { int lineNumber = 0; do { line = extractor.ExtractLine(); lineNumber++; if (line != null) { extractedText.Add(line); } }while (line != null); } } } } else { TextExtractor extractor = factory.CreateTextExtractor(path); do { int lineNumber = 0; do { try { line = extractor.ExtractLine(); } catch (Exception) { if (ext == ".one") { extractedText.Add("Invalid password"); break; } } lineNumber++; if (line != null) { extractedText.Add(line); } }while (line != null); }while (line != null); } } //extractedText.Add(extractor.ExtractAll()); } catch (Exception ex) { extractedText.Add(ex.Message); } return(Json(extractedText, JsonRequestBehavior.AllowGet)); }
public ActionResult CountStatistics([FromBody] string fileName) { List <string> extractedText = new List <string>(); string filePath = Server.MapPath("../App_Data//Uploads//" + fileName); try { string[] arguments = new string[] { filePath }; int maxWordLength = 0; for (int i = 0; i < arguments.Length; i++) { if (arguments[i].Length == 1 || !int.TryParse(arguments[i], out maxWordLength)) { maxWordLength = 5; } } ExtractorFactory factory = new ExtractorFactory(); Dictionary <string, int> statistic = new Dictionary <string, int>(); TextExtractor extractor = factory.CreateTextExtractor(filePath); if (extractor == null) { extractedText.Add("The document's format is not supported"); } try { string line = null; do { line = extractor.ExtractLine(); if (line != null) { string[] words = line.Split(' ', ',', ';', '.'); foreach (string w in words) { string word = w.Trim().ToLower(); if (word.Length > maxWordLength) { if (!statistic.ContainsKey(word)) { statistic[word] = 0; } statistic[word]++; } } } }while (line != null); } finally { extractor.Dispose(); } extractedText.Add("Top words:"); for (int i = 0; i < 10; i++) { int count = -1; string maxKey = null; foreach (string key in statistic.Keys) { if (statistic[key] > count) { count = statistic[key]; maxKey = key; } } if (maxKey == null) { break; } extractedText.Add(maxKey + " : " + count); statistic.Remove(maxKey); } } catch (Exception ex) { extractedText.Add(ex.Message); } return(Json(extractedText, JsonRequestBehavior.AllowGet)); }