public ActionResult ExtractHighlight([FromBody] string fileName) { List <string> extractedText = new List <string>(); ExtractorFactory factory = new ExtractorFactory(); string filePath = Server.MapPath("../App_Data//Uploads//" + fileName); try { using (WordsTextExtractor extractor = new WordsTextExtractor(filePath)) { IList <string> highlights = extractor.ExtractHighlights( HighlightOptions.CreateFixedLengthOptions(HighlightDirection.Left, 15, 10), HighlightOptions.CreateFixedLengthOptions(HighlightDirection.Right, 20, 10)); for (int i = 0; i < highlights.Count; i++) { extractedText.Add(highlights[i]); } } } catch (Exception ex) { extractedText.Add("File Format not supported"); } return(Json(extractedText, JsonRequestBehavior.AllowGet)); }
public ActionResult SearchText([FromBody] string fileName, [FromBody] string keyWord) { List <string> extractedText = new List <string>(); ExtractorFactory factory = new ExtractorFactory(); string filePath = Server.MapPath("../App_Data//Uploads//" + fileName); try { //ExStart:SearchTextInDocuments //get file actual path using (WordsTextExtractor extractor = new WordsTextExtractor(filePath)) { ListSearchHandler handler = new ListSearchHandler(); extractor.Search(new SearchOptions(SearchHighlightOptions.CreateFixedLengthOptions(10)), handler, null, new string[] { keyWord }); if (handler.List.Count == 0) { Console.WriteLine("Not found"); } else { for (int i = 0; i < handler.List.Count; i++) { extractedText.Add("Text at Left: " + handler.List[i].LeftText); extractedText.Add("Found Text: " + handler.List[i].FoundText); extractedText.Add("Text at Right: " + handler.List[i].RightText); } } } //ExEnd:SearchTextInDocuments } catch (Exception ex) { extractedText.Add(ex.Message); } return(Json(extractedText, JsonRequestBehavior.AllowGet)); }
private IDocumentContentExtractor GetContentExtractor(string inFilePath, string fileExt) { IDocumentContentExtractor extractor = null; if (Array.Exists(WordTypesForImage, E => E == fileExt)) { extractor = new WordsTextExtractor(inFilePath); } else if (Array.Exists(ExcelTypesForImage, E => E == fileExt)) { extractor = new CellsTextExtractor(inFilePath); } else if (Array.Exists(SlidesTypesForImage, E => E == fileExt)) { extractor = new SlidesTextExtractor(inFilePath); } else if (Array.Exists(PdfTypesForImage, E => E == fileExt)) { extractor = new PdfTextExtractor(inFilePath); } return(extractor); }
public ActionResult ExtractText([FromBody] string fileName, string password = null) { //ExStart:ExtractText ExtractorFactory factory = new ExtractorFactory(); string path = Server.MapPath("../App_Data//Uploads//" + fileName); string ext = Path.GetExtension(path); List <string> extractedText = new List <string>(); try { string line = null; //If file password procted if (!string.IsNullOrWhiteSpace(password)) { if (ext == ".one") { var loadOptions = new LoadOptions(); loadOptions.Password = password; using (var extractor = new NoteTextExtractor(path, loadOptions)) { do { int lineNumber = 0; do { line = extractor.ExtractLine(); lineNumber++; if (line != null) { extractedText.Add(line); } }while (line != null); }while (line != null); } } else { LoadOptions loadOptions = new LoadOptions(); loadOptions.Password = password; WordsTextExtractor protectedDocument = new WordsTextExtractor(path, loadOptions); do { int lineNumber = 0; do { line = protectedDocument.ExtractLine(); lineNumber++; if (line != null) { extractedText.Add(line); } }while (line != null); }while (line != null); } } else { //if file type is zip if (ext == ".zip") { using (var container = new ZipContainer(path)) { for (int i = 0; i < container.Entities.Count; i++) { using (TextExtractor extractor = factory.CreateTextExtractor(container.Entities[i].OpenStream())) { int lineNumber = 0; do { line = extractor.ExtractLine(); lineNumber++; if (line != null) { extractedText.Add(line); } }while (line != null); } } } } else { TextExtractor extractor = factory.CreateTextExtractor(path); do { int lineNumber = 0; do { try { line = extractor.ExtractLine(); } catch (Exception) { if (ext == ".one") { extractedText.Add("Invalid password"); break; } } lineNumber++; if (line != null) { extractedText.Add(line); } }while (line != null); }while (line != null); } } //extractedText.Add(extractor.ExtractAll()); } catch (Exception ex) { extractedText.Add(ex.Message); } return(Json(extractedText, JsonRequestBehavior.AllowGet)); }