public OperateResult <string> Extract(string extensionName, byte[] data) { OperateResult <string> operateResult = new OperateResult <string>(); try { if (extensionName.IsNullOrWhiteSpace()) { operateResult.Status = OperateStatus.Failure; operateResult.Description = "extension不能为空"; } else if (data == null || data.Length == 0) { operateResult.Status = OperateStatus.Failure; operateResult.Description = "fileData不能为空"; } else { ICollection <DocumentExtractor> extractors = ExtractorFactory.GetExtractors(extensionName); if (extractors == null || extractors.Count == 0) { operateResult.Status = OperateStatus.Failure; operateResult.Description = "没有对应的处理程序"; } else { bool flag = false; foreach (DocumentExtractor extractor in extractors) { try { ExtractedResult extractedResult = extractor.Extract(extensionName, data, ExtractOption.Text); if (extractedResult != null && extractedResult.Text != null) { operateResult.Data = extractedResult.Text; flag = true; break; } } catch (Exception exception) { flag = false; LoggerWrapper.Logger.Warn("抽取时发生错误", exception); } } if (!flag) { operateResult.Status = OperateStatus.Failure; operateResult.Description = "抽取出错"; } } } } catch (Exception exception) { operateResult.Description = "抽取出错"; LoggerWrapper.Logger.Error("ExtractText", exception); } return(operateResult); }
protected void UpdateCoreInfos() { if (!TryCreateDirectory(_infoDirectory)) { return; } if (!TryCreateAbsoluteUrl(_baseUrl, _infoUrl, out Uri uri)) { ServiceRegistration.Get <ILogger>().Error("CoreHandler: Unable to create absolute core info url from settings, base url: '{0}', info url: '{1}'", _baseUrl, _infoUrl); return; } try { byte[] data = _downloader.DownloadDataAsync(uri.AbsoluteUri).Result; if (data == null || data.Length == 0) { ServiceRegistration.Get <ILogger>().Error("CoreInfoHandler: Failed to download core infos from '{0}', response was null or empty", uri.AbsoluteUri); return; } using (Stream stream = new MemoryStream(data)) using (IExtractor extractor = ExtractorFactory.Create(uri.AbsoluteUri, stream)) extractor.ExtractAll(_infoDirectory); } catch (Exception ex) { ServiceRegistration.Get <ILogger>().Error("CoreInfoHandler: Exception updating core infos", ex); } }
public ActionResult ExtractHighlight([FromBody] string fileName) { List <string> extractedText = new List <string>(); ExtractorFactory factory = new ExtractorFactory(); string filePath = Server.MapPath("../App_Data//Uploads//" + fileName); try { using (WordsTextExtractor extractor = new WordsTextExtractor(filePath)) { IList <string> highlights = extractor.ExtractHighlights( HighlightOptions.CreateFixedLengthOptions(HighlightDirection.Left, 15, 10), HighlightOptions.CreateFixedLengthOptions(HighlightDirection.Right, 20, 10)); for (int i = 0; i < highlights.Count; i++) { extractedText.Add(highlights[i]); } } } catch (Exception ex) { extractedText.Add("File Format not supported"); } return(Json(extractedText, JsonRequestBehavior.AllowGet)); }
/// <summary> /// Extract from OST container /// </summary> public static void ExtractFromOstContainer(string fileName) { //ExStart:ExtractFromOstContainer ExtractorFactory factory = new ExtractorFactory(); //get OST file's path string filePath = Common.getFilePath(fileName); using (var container = new PersonalStorageContainer(filePath)) { for (int i = 0; i < container.Entities.Count; i++) { Console.WriteLine(container.Entities[i].Name); Console.WriteLine(container.Entities[i].Path.ToString()); Console.WriteLine(container.Entities[i].MediaType); Console.WriteLine(container.Entities[i][PersonalStorageContainer.EmailSubject]); Console.WriteLine(container.Entities[i][PersonalStorageContainer.EmailSender]); Console.WriteLine(container.Entities[i][PersonalStorageContainer.EmailReceiver]); using (TextExtractor extractor = factory.CreateTextExtractor(container.Entities[i].OpenStream())) { Console.WriteLine("Content:"); Console.WriteLine(extractor != null ? extractor.ExtractAll() : "The document format is not supported"); } } } //ExEnd:ExtractFromOstContainer }
public ActionResult ExtractTextWithMarkDown([FromBody] string fileName) { List <string> extractedText = new List <string>(); ExtractorFactory factory = new ExtractorFactory(); string filePath = Server.MapPath("../App_Data//Uploads//" + fileName); try { WordsFormattedTextExtractor extractor = new WordsFormattedTextExtractor(filePath); extractor.DocumentFormatter = new MarkdownDocumentFormatter(); if (extractor == null) { extractedText.Add("The document format is not supported"); } string line = null; do { int lineNumber = 0; do { line = extractor.ExtractLine(); lineNumber++; if (line != null) { extractedText.Add(line); } }while (line != null); }while (line != null); } catch (Exception ex) { extractedText.Add(ex.Message); } return(Json(extractedText, JsonRequestBehavior.AllowGet)); }
//protected override void FinishedNotify() //{ //} protected override void SpecificProcessing() { IExtractor <Image> extractor = ExtractorFactory.CreateExtractor("datetime"); foreach (var item in _filesInfo) { try { using (Image img = Image.FromFile(item)) { string creationTime = extractor.Extract(img, 0x9003).Replace("\0", string.Empty).Replace('/', '_').Replace(':', '_');// GetImageDate(img); File.Copy(item, _destinationPath.FullName + "\\" + Path.GetFileNameWithoutExtension(item) + "_" + (string.IsNullOrWhiteSpace(creationTime) ? (File.GetCreationTime(item).ToString().Replace(':', '_').Replace('/', '_')) : creationTime) + Path.GetExtension(item), true); } } catch (OutOfMemoryException ex) { string msg = ex.Message; } catch (FileNotFoundException ex) { string msg = ex.Message; } catch (ArgumentException ex) { string msg = ex.Message; } //Thread.Sleep(); } Console.WriteLine(); Console.WriteLine("RENAME DATETIME FINISHED!!!!"); Console.WriteLine(); }
public ActionResult ExtractMetadata([FromBody] string fileName) { List <string> extractedText = new List <string>(); ExtractorFactory factory = new ExtractorFactory(); string path = Server.MapPath("../App_Data//Uploads//" + fileName); try { MetadataCollection metadata = factory.ExtractMetadata(path); if (metadata == null) { extractedText.Add("The document format is not supported"); } foreach (string key in metadata.Keys) { extractedText.Add(string.Format("{0} = {1}", key, metadata[key])); } } catch (Exception ex) { extractedText.Add(ex.Message); } return(Json(extractedText, JsonRequestBehavior.AllowGet)); }
static async Task GetRawStream(Stream outStream, string packArchiveFileName, Models.File file) { using (outStream) { var extractor = ExtractorFactory.GetFileExtractor(packArchiveFileName); using (var stream = await extractor.ExtractFile(packArchiveFileName, file.FileName)) { stream.CopyTo(outStream); } } }
protected override void SpecificProcessing() { IExtractor <Image> extractor = ExtractorFactory.CreateExtractor("datetime"); foreach (var item in _filesInfo) { using (FileStream readfs = new FileStream(item, FileMode.Open)) //filestream to read image { using (Image img = Image.FromStream(readfs)) { using (Graphics grph = Graphics.FromImage(img)) using (Font font = new Font(new FontFamily("Arial"), (float)(0.015 * img.Height))) using (SolidBrush sbrush = new SolidBrush(Color.Black)) //using graphical tools { //need to add verifying of min image size and correct label font size and area RectangleF rect = new RectangleF(new PointF((int)(img.Width - 385), (int)(img.Height * 0.001)), new SizeF(385, (int)(img.Height * .02))); //put a rectangle in top right corner of the image grph.FillRectangle(Brushes.White, rect); //fill rectangle with white color because label will be black string res = extractor.Extract(img, 0x9003); //get information from image grph.DrawString(res, font, sbrush, rect); //draw label on the image } using (FileStream writefs = new FileStream(_destinationPath.FullName + "\\" + Path.GetFileName(item), FileMode.OpenOrCreate)) { ImageFormat imgFormat = ImageFormat.Bmp; switch (Path.GetFileName(item)) { case ".jpg": imgFormat = ImageFormat.Jpeg; break; case ".jpeg": imgFormat = ImageFormat.Jpeg; break; case ".png": imgFormat = ImageFormat.Png; break; case ".gif": imgFormat = ImageFormat.Gif; break; } img.Save(writefs, imgFormat); //save image in new folder } } } } Console.WriteLine(); Console.WriteLine("ADD LABEL FINISHED!!!!"); Console.WriteLine(); }
protected XSSFExcelExtractor GetExtractor(String sampleName) { ExtractorFactory.SetAllThreadsPreferEventExtractors(false); ExtractorFactory.SetThreadPrefersEventExtractors(false); try { return((XSSFExcelExtractor)ExtractorFactory.CreateExtractor(HSSFTestDataSamples.OpenSampleFileStream(sampleName))); } catch (Exception e) { throw new RuntimeException(e); } }
private async Task BeginExtraction(CancellationToken cancellationToken) { try { using var scope = _serviceProvider.CreateScope(); var repository = scope.ServiceProvider.GetRequiredService <IItemRepository>(); var mediator = scope.ServiceProvider.GetRequiredService <IMediator>(); var commandList = new ConcurrentBag <UpdatePriceCommand>(); var items = await repository.ListAll(); var tasks = items.Select(async(x) => { try { if (!cancellationToken.IsCancellationRequested) { var extractor = ExtractorFactory.Create(x.Url); if (await extractor.ExtractValues(x, cancellationToken)) { _logger.LogInformation($"Price changed for {x.Name} to {extractor.InCashValue}"); commandList.Add(new UpdatePriceCommand(x.Id, extractor.InCashValue, extractor.NormalValue, extractor.FullValue, extractor.IsAvailable)); } } } catch (Exception e) { _logger.LogError($"Error calling url for product {x.Name}.", e); } }); await Task.WhenAll(tasks); foreach (var cmd in commandList) { await mediator.Send(cmd, cancellationToken); } } catch (OperationCanceledException e) { _logger.LogInformation("Cancelling " + e.Message); if (cancellationToken.IsCancellationRequested) { _logger.LogInformation("Cancelling per user request."); cancellationToken.ThrowIfCancellationRequested(); } } }
static async Task GetStream(Stream outStream, string packArchiveFileName, Models.File file, Converter converter, ConvertInfo convertInfo) { using (outStream) { convertInfo.ExtractFile = async destFile => { var extractor = ExtractorFactory.GetFileExtractor(packArchiveFileName); await extractor.ExtractFile(packArchiveFileName, file.FileName, destFile); }; using (var stream = await converter.Convert(convertInfo)) { stream.CopyTo(outStream); } } }
/// <summary> /// Shows the usage of CreateMetadataExtractor method, the method is supported in version 17.03 or greater /// </summary> /// <param name="fileName"></param> public static void CreateMetadataExtractorMethodUsage(string fileName) { //ExStart:CreateMetadataExtractorMethodUsage //get file actual path String filePath = Common.GetFilePath(fileName); var factory = new ExtractorFactory(); var extractor = factory.CreateMetadataExtractor(filePath); var metadata = extractor.ExtractMetadata(filePath); foreach (string key in metadata.Keys) { Console.WriteLine(string.Format("{0} = {1}", key, metadata[key])); } //ExEnd:CreateMetadataExtractorMethodUsage }
public OperateResult <ExtractedResult> Extract(string path, ExtractOption[] options) { OperateResult <ExtractedResult> operateResult = new OperateResult <ExtractedResult>(); ExtractOption extractOption = CombineOptions(options); string extension = Path.GetExtension(path); ICollection <DocumentExtractor> extractors = ExtractorFactory.GetExtractors(extension); if (extractors == null || extractors.Count == 0) { operateResult.Status = OperateStatus.Failure; operateResult.Description = "没有对应的处理程序"; return(operateResult); } bool flag = false; if (!File.Exists(path)) { operateResult.Status = OperateStatus.Failure; operateResult.Description = string.Concat("不存在该文件:", path); return(operateResult); } byte[] numArray = File.ReadAllBytes(path); try { foreach (DocumentExtractor extractor in extractors) { operateResult.Data = extractor.Extract(extension, numArray, extractOption); if (operateResult.Data == null) { continue; } flag = true; break; } } catch (Exception exception) { operateResult.Status = OperateStatus.Failure; operateResult.Description = string.Concat("抽取出错:", exception.Message, Environment.NewLine, exception.StackTrace); LoggerWrapper.Logger.Error("ExtractText", exception); } if (!flag) { operateResult.Status = OperateStatus.Failure; operateResult.Description = "抽取出错"; } return(operateResult); }
public ExtractText(string fileName, bool formatted) { //ExStart:ExtractText int linesPerPage = Console.WindowHeight; ExtractorFactory factory = new ExtractorFactory(); TextExtractor extractor = formatted ? factory.CreateFormattedTextExtractor(fileName) : factory.CreateTextExtractor(fileName); if (extractor == null) { Console.WriteLine("The document's format is not supported"); return; } try { string line = null; do { Console.Clear(); Console.WriteLine("{0}", fileName); int lineNumber = 0; do { line = extractor.ExtractLine(); lineNumber++; if (line != null) { Console.WriteLine(line); } }while (line != null && lineNumber < linesPerPage); Console.WriteLine(); Console.WriteLine("Press Esc to exit or any other key to move to the next page"); }while (line != null && Console.ReadKey().Key != ConsoleKey.Escape); } finally { extractor.Dispose(); } //ExEnd:ExtractText }
protected bool ExtractCore(string path) { bool extracted; using (IExtractor extractor = ExtractorFactory.Create(path)) { if (!extractor.IsArchive()) { return(true); } extracted = extractor.ExtractAll(Path.GetDirectoryName(path)); } if (extracted) { TryDeleteFile(path); } return(extracted); }
public ActionResult ExtractDocumentEndocing([FromBody] string fileName) { List <string> extractedText = new List <string>(); ExtractorFactory factory = new ExtractorFactory(); string filePath = Server.MapPath("../App_Data//Uploads//" + fileName); try { EncodingDetector detector = new EncodingDetector(Encoding.GetEncoding(1251)); Stream stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); extractedText.Add(detector.Detect(stream).ToString()); } catch (Exception ex) { extractedText.Add("File Format not supported"); } return(Json(extractedText, JsonRequestBehavior.AllowGet)); }
/// <summary> /// Logs messages using NotificationReceiver /// </summary> /// <param name="fileName"></param> public static void LoggerWithExtractorFactory(string fileName) { //ExStart:LoggerWithExtractorFactory //get file actual path String filePath = Common.GetFilePath(fileName); var receiverForFactory = new NotificationReceiver(); var factory = new ExtractorFactory(null, null, null, receiverForFactory); var receiver = new NotificationReceiver(); LoadOptions loadOptions = new LoadOptions(); loadOptions.NotificationReceiver = receiver; using (var extractor = new CellsTextExtractor(filePath, loadOptions)) { Console.WriteLine(extractor.ExtractAll()); } //ExEnd:LoggerWithExtractorFactory }
public ActionResult ExtractRowAndColumn([FromBody] string fileName, [FromBody] int rowIndex, [FromBody] string columnIndex) { List <string> extractedText = new List <string>(); ExtractorFactory factory = new ExtractorFactory(); string filePath = Server.MapPath("../App_Data//Uploads//" + fileName); try { CellsTextExtractor extractor = new CellsTextExtractor(filePath); int sheetIndex = 0; CellsSheetInfo sheetInfo = extractor.GetSheetInfo(sheetIndex); extractedText.Add(sheetInfo.ExtractRow(rowIndex, columnIndex)); } catch (Exception ex) { extractedText.Add(ex.Message); } return(Json(extractedText, JsonRequestBehavior.AllowGet)); }
private void extractButton_Click(object sender, System.EventArgs e) { //load an extractor for each input file //use a factory? if (String.IsNullOrEmpty(outFilePath.Text)) { MessageBox.Show("Please enter an output file path!"); return; } try { ExtractorFactory exFac = new ExtractorFactory(inFileWindows.Text, inFileAndroid.Text, inFileiOS.Text, outFilePath.Text); exFac.ExtractThemStrings(); } catch (Exception ex) { MessageBox.Show(ex.Message); } }
public static void UsingExtractorFactory(string fileName) { //ExStart:UsingExtractorFactory //get file actual path String filePath = Common.getFilePath(fileName); ExtractorFactory factory = new ExtractorFactory(); //ExtractMetadata methods in ExtractorFactory class are marked as Obsolete from version 17.03 onwards(use Extractor class instead). MetadataCollection metadata = factory.ExtractMetadata(filePath); if (metadata == null) { Console.WriteLine("The document format is not supported"); } foreach (string key in metadata.Keys) { Console.WriteLine(string.Format("{0} = {1}", key, metadata[key])); } //ExEnd:UsingExtractorFactory }
public ActionResult ExtractTableWithFormat([FromBody] string fileName) { List <string> extractedText = new List <string>(); ExtractorFactory factory = new ExtractorFactory(); string filePath = Server.MapPath("../App_Data//Uploads//" + fileName); try { WordsFormattedTextExtractor extractor = new WordsFormattedTextExtractor(filePath); PlainTableFrame frame = new PlainTableFrame( PlainTableFrameAngle.ASCII, PlainTableFrameEdge.ASCII, PlainTableFrameIntersection.ASCII, new PlainTableFrameConfig(true, true, true, false)); extractor.DocumentFormatter = new PlainDocumentFormatter(frame); if (extractor == null) { extractedText.Add("The document format is not supported"); } string line = null; do { int lineNumber = 0; do { line = extractor.ExtractLine(); lineNumber++; if (line != null) { extractedText.Add(line); } }while (line != null); }while (line != null); //extractedText.Add(extractor.ExtractAll()); } catch (Exception ex) { extractedText.Add(ex.Message); } return(Json(extractedText, JsonRequestBehavior.AllowGet)); }
protected override void SpecificProcessing() { IExtractor <Image> extractor = ExtractorFactory.CreateExtractor("datetime"); DateTime dtextr; foreach (var item in _filesInfo) { using (FileStream fs = new FileStream(item, FileMode.Open)) { using (Image img = Image.FromStream(fs)) { string data = extractor.Extract(img, 0x9003).Split(' ')[0].Replace(':', '/'); //extracting datetime get from there year/month/day and parse it to dateTime and get year DateTime.TryParse(data, out dtextr); if (dtextr == DateTime.MinValue) { dtextr = File.GetCreationTime(item); } if (!Directory.Exists(_destinationPath.FullName + "\\" + dtextr.Year)) { Directory.CreateDirectory(Path.Combine(_destinationPath.FullName, dtextr.Year + "")); } try { File.Copy(item, _destinationPath.FullName + "\\" + dtextr.Year + "\\" + Path.GetFileName(item), true); } catch (Exception ex) { string s = ex.Message; } //File.Copy(item, _destinationPath.FullName + "\\" + dtextr.Year + "\\" + Path.GetFileName(item), true); } } } Console.WriteLine(); Console.WriteLine("SORT BY YEARS FINISHED!!!!"); Console.WriteLine(); }
/// <summary> /// For enumerating all the entities of the group of containers ContainerEnumerator class is used /// </summary> public static void EnumeratingAllEntities() { //ExStart:EnumeratingAllEntities IContainerFactory containerFactory = null; MediaTypeDetector containerMediaTypeDetector = null; Container container = null; ExtractorFactory readerFactory = new ExtractorFactory(); var enumerator = new ContainerEnumerator(containerFactory, containerMediaTypeDetector, container); while (enumerator.MoveNext()) { using (var stream = enumerator.Current.OpenStream()) { using (var extractor = readerFactory.CreateTextExtractor(stream)) { Console.WriteLine(extractor == null ? "document isn't supported" : extractor.ExtractAll()); } } } //ExEnd:EnumeratingAllEntities }
protected override void SpecificProcessing() { IExtractor <Image> extractor = ExtractorFactory.CreateExtractor("coordinates"); foreach (var item in _filesInfo) { using (Image img = Image.FromFile(item)) { //var data = extractor.Extract(img, 0).Split(' '); ExifLib.ExifReader reader = new ExifLib.ExifReader(item); double lat; reader.GetTagValue(ExifLib.ExifTags.GPSLatitude, out lat); double lon; reader.GetTagValue(ExifLib.ExifTags.GPSLongitude, out lon); //double alt = Double.Parse( data[0]); //double lon = Double.Parse(data[1]); } } }
protected void DoExtract(ILocalFsResourceAccessor accessor, string selectedItem) { string resourcePath = accessor.CanonicalLocalResourcePath.LastPathSegment.Path; string extractionPath = GetExtractionPath(resourcePath, selectedItem); Logger.Debug("GoodMergeExtractor: Extracting '{0}' from '{1}' to '{2}'", selectedItem, resourcePath, extractionPath); bool result; using (IExtractor extractor = ExtractorFactory.Create(accessor.LocalFileSystemPath)) { extractor.ExtractionProgress += OnExtractionProgress; result = extractor.ExtractArchiveFile(selectedItem, extractionPath); } if (!result) { //Sometimes an empty file has been created when extraction fails DeleteExtractedFile(extractionPath); } _extractionThread = null; OnExtractionCompleted(new ExtractionCompletedEventArgs(selectedItem, extractionPath, result)); }
public static void Check(string text, string rules, IEnumerable <ExtractionDic> etalon, ExtractorSettings settings = null, IEnumerable <IExtension> extensions = null, IMorphAnalizer morph = null, params string[] rulesToExtract) { if (settings == null) { settings = new ExtractorSettings(); } var extractor = ExtractorFactory.Create(rules, settings, extensions: extensions, morph: morph); var result = extractor.Parse(text, rulesToExtract); _check(etalon.ToArray(), result.ToArray()); }
public ActionResult SearchText([FromBody] string fileName, [FromBody] string keyWord) { List <string> extractedText = new List <string>(); ExtractorFactory factory = new ExtractorFactory(); string filePath = Server.MapPath("../App_Data//Uploads//" + fileName); try { //ExStart:SearchTextInDocuments //get file actual path using (WordsTextExtractor extractor = new WordsTextExtractor(filePath)) { ListSearchHandler handler = new ListSearchHandler(); extractor.Search(new SearchOptions(SearchHighlightOptions.CreateFixedLengthOptions(10)), handler, null, new string[] { keyWord }); if (handler.List.Count == 0) { Console.WriteLine("Not found"); } else { for (int i = 0; i < handler.List.Count; i++) { extractedText.Add("Text at Left: " + handler.List[i].LeftText); extractedText.Add("Found Text: " + handler.List[i].FoundText); extractedText.Add("Text at Right: " + handler.List[i].RightText); } } } //ExEnd:SearchTextInDocuments } catch (Exception ex) { extractedText.Add(ex.Message); } return(Json(extractedText, JsonRequestBehavior.AllowGet)); }
/// <summary> /// Reads concrete files from a ZIP folder /// </summary> /// <param name="folderName">Name of the zipped folder</param> public static void ReadConcreteFile(string folderName) { //ExStart:ReadConcreteFile //get ZIP folder's path string folderPath = Common.getFilePath(folderName); ExtractorFactory extractorFactory = new ExtractorFactory(); //initialize ZIP container using (var container = new ZipContainer(folderPath)) { //loop through all the entities in the folder for (int i = 0; i < container.Entities.Count; i++) { //extract content of each entity by creating a textextractor using extractfactory's CreateTextExtractor function using (TextExtractor extractor = extractorFactory.CreateTextExtractor(container.Entities[i].OpenStream())) { //display the extracted text Console.WriteLine(extractor.ExtractAll()); } } } //ExEnd:ReadConcreteFile }
/// <summary> /// Extracts text from the entity of ZIP container: /// </summary> /// <param name="folderName">Name of the zipped folder</param> public static void RetrieveEntity(string folderName) { //ExStart:RetrieveEntity_17.12 //get ZIP folder's path string folderPath = Common.GetFilePath(folderName); ExtractorFactory extractorFactory = new ExtractorFactory(); //initialize ZIP container using (var container = new ZipContainer(folderPath)) { Container.Entity containerEntry = container.GetEntity("META-INF\\container.xml"); // If the entity isn't found if (containerEntry == null) { throw new GroupDocsTextException("File not found"); } // Try to create a text extractor TextExtractor extractor = extractorFactory.CreateTextExtractor(containerEntry.OpenStream()); try { // Extract a text (if the document type is supported) Console.WriteLine(extractor == null ? "Document type isn't supported" : extractor.ExtractAll()); } finally { // Cleanup if (extractor != null) { extractor.Dispose(); } } } //ExEnd:RetrieveEntity_17.12 }