Example #1
0
        public OperateResult <string> Extract(string extensionName, byte[] data)
        {
            OperateResult <string> operateResult = new OperateResult <string>();

            try
            {
                if (extensionName.IsNullOrWhiteSpace())
                {
                    operateResult.Status      = OperateStatus.Failure;
                    operateResult.Description = "extension不能为空";
                }
                else if (data == null || data.Length == 0)
                {
                    operateResult.Status      = OperateStatus.Failure;
                    operateResult.Description = "fileData不能为空";
                }
                else
                {
                    ICollection <DocumentExtractor> extractors = ExtractorFactory.GetExtractors(extensionName);
                    if (extractors == null || extractors.Count == 0)
                    {
                        operateResult.Status      = OperateStatus.Failure;
                        operateResult.Description = "没有对应的处理程序";
                    }
                    else
                    {
                        bool flag = false;
                        foreach (DocumentExtractor extractor in extractors)
                        {
                            try
                            {
                                ExtractedResult extractedResult = extractor.Extract(extensionName, data, ExtractOption.Text);
                                if (extractedResult != null && extractedResult.Text != null)
                                {
                                    operateResult.Data = extractedResult.Text;
                                    flag = true;
                                    break;
                                }
                            }
                            catch (Exception exception)
                            {
                                flag = false;
                                LoggerWrapper.Logger.Warn("抽取时发生错误", exception);
                            }
                        }
                        if (!flag)
                        {
                            operateResult.Status      = OperateStatus.Failure;
                            operateResult.Description = "抽取出错";
                        }
                    }
                }
            }
            catch (Exception exception)
            {
                operateResult.Description = "抽取出错";
                LoggerWrapper.Logger.Error("ExtractText", exception);
            }
            return(operateResult);
        }
Example #2
0
        protected void UpdateCoreInfos()
        {
            if (!TryCreateDirectory(_infoDirectory))
            {
                return;
            }

            if (!TryCreateAbsoluteUrl(_baseUrl, _infoUrl, out Uri uri))
            {
                ServiceRegistration.Get <ILogger>().Error("CoreHandler: Unable to create absolute core info url from settings, base url: '{0}', info url: '{1}'", _baseUrl, _infoUrl);
                return;
            }

            try
            {
                byte[] data = _downloader.DownloadDataAsync(uri.AbsoluteUri).Result;
                if (data == null || data.Length == 0)
                {
                    ServiceRegistration.Get <ILogger>().Error("CoreInfoHandler: Failed to download core infos from '{0}', response was null or empty", uri.AbsoluteUri);
                    return;
                }
                using (Stream stream = new MemoryStream(data))
                    using (IExtractor extractor = ExtractorFactory.Create(uri.AbsoluteUri, stream))
                        extractor.ExtractAll(_infoDirectory);
            }
            catch (Exception ex)
            {
                ServiceRegistration.Get <ILogger>().Error("CoreInfoHandler: Exception updating core infos", ex);
            }
        }
Example #3
0
        public ActionResult ExtractHighlight([FromBody] string fileName)
        {
            List <string>    extractedText = new List <string>();
            ExtractorFactory factory       = new ExtractorFactory();
            string           filePath      = Server.MapPath("../App_Data//Uploads//" + fileName);

            try
            {
                using (WordsTextExtractor extractor = new WordsTextExtractor(filePath))
                {
                    IList <string> highlights = extractor.ExtractHighlights(
                        HighlightOptions.CreateFixedLengthOptions(HighlightDirection.Left, 15, 10),
                        HighlightOptions.CreateFixedLengthOptions(HighlightDirection.Right, 20, 10));

                    for (int i = 0; i < highlights.Count; i++)
                    {
                        extractedText.Add(highlights[i]);
                    }
                }
            }
            catch (Exception ex)
            {
                extractedText.Add("File Format not supported");
            }
            return(Json(extractedText, JsonRequestBehavior.AllowGet));
        }
        /// <summary>
        /// Extract from OST container
        /// </summary>
        public static void ExtractFromOstContainer(string fileName)
        {
            //ExStart:ExtractFromOstContainer
            ExtractorFactory factory = new ExtractorFactory();
            //get OST file's path
            string filePath = Common.getFilePath(fileName);

            using (var container = new PersonalStorageContainer(filePath))
            {
                for (int i = 0; i < container.Entities.Count; i++)
                {
                    Console.WriteLine(container.Entities[i].Name);
                    Console.WriteLine(container.Entities[i].Path.ToString());
                    Console.WriteLine(container.Entities[i].MediaType);
                    Console.WriteLine(container.Entities[i][PersonalStorageContainer.EmailSubject]);
                    Console.WriteLine(container.Entities[i][PersonalStorageContainer.EmailSender]);
                    Console.WriteLine(container.Entities[i][PersonalStorageContainer.EmailReceiver]);

                    using (TextExtractor extractor = factory.CreateTextExtractor(container.Entities[i].OpenStream()))
                    {
                        Console.WriteLine("Content:");
                        Console.WriteLine(extractor != null ? extractor.ExtractAll() : "The document format is not supported");
                    }
                }
            }
            //ExEnd:ExtractFromOstContainer
        }
Example #5
0
        public ActionResult ExtractTextWithMarkDown([FromBody] string fileName)
        {
            List <string>    extractedText = new List <string>();
            ExtractorFactory factory       = new ExtractorFactory();
            string           filePath      = Server.MapPath("../App_Data//Uploads//" + fileName);

            try
            {
                WordsFormattedTextExtractor extractor = new WordsFormattedTextExtractor(filePath);
                extractor.DocumentFormatter = new MarkdownDocumentFormatter();
                if (extractor == null)
                {
                    extractedText.Add("The document format is not supported");
                }
                string line = null;
                do
                {
                    int lineNumber = 0;
                    do
                    {
                        line = extractor.ExtractLine();
                        lineNumber++;
                        if (line != null)
                        {
                            extractedText.Add(line);
                        }
                    }while (line != null);
                }while (line != null);
            }
            catch (Exception ex)
            {
                extractedText.Add(ex.Message);
            }
            return(Json(extractedText, JsonRequestBehavior.AllowGet));
        }
Example #6
0
        //protected override void FinishedNotify()
        //{

        //}

        protected override void SpecificProcessing()
        {
            IExtractor <Image> extractor = ExtractorFactory.CreateExtractor("datetime");

            foreach (var item in _filesInfo)
            {
                try
                {
                    using (Image img = Image.FromFile(item))
                    {
                        string creationTime = extractor.Extract(img, 0x9003).Replace("\0", string.Empty).Replace('/', '_').Replace(':', '_');// GetImageDate(img);

                        File.Copy(item, _destinationPath.FullName + "\\" + Path.GetFileNameWithoutExtension(item) + "_"
                                  + (string.IsNullOrWhiteSpace(creationTime) ? (File.GetCreationTime(item).ToString().Replace(':', '_').Replace('/', '_')) : creationTime)
                                  + Path.GetExtension(item), true);
                    }
                }
                catch (OutOfMemoryException ex)
                {
                    string msg = ex.Message;
                }
                catch (FileNotFoundException ex)
                {
                    string msg = ex.Message;
                }
                catch (ArgumentException ex)
                {
                    string msg = ex.Message;
                }
                //Thread.Sleep();
            }
            Console.WriteLine();
            Console.WriteLine("RENAME DATETIME FINISHED!!!!");
            Console.WriteLine();
        }
Example #7
0
        public ActionResult ExtractMetadata([FromBody] string fileName)
        {
            List <string>    extractedText = new List <string>();
            ExtractorFactory factory       = new ExtractorFactory();
            string           path          = Server.MapPath("../App_Data//Uploads//" + fileName);

            try
            {
                MetadataCollection metadata = factory.ExtractMetadata(path);
                if (metadata == null)
                {
                    extractedText.Add("The document format is not supported");
                }

                foreach (string key in metadata.Keys)
                {
                    extractedText.Add(string.Format("{0} = {1}", key, metadata[key]));
                }
            }
            catch (Exception ex)
            {
                extractedText.Add(ex.Message);
            }
            return(Json(extractedText, JsonRequestBehavior.AllowGet));
        }
 static async Task GetRawStream(Stream outStream, string packArchiveFileName, Models.File file)
 {
     using (outStream)
     {
         var extractor = ExtractorFactory.GetFileExtractor(packArchiveFileName);
         using (var stream = await extractor.ExtractFile(packArchiveFileName, file.FileName))
         {
             stream.CopyTo(outStream);
         }
     }
 }
        protected override void SpecificProcessing()
        {
            IExtractor <Image> extractor = ExtractorFactory.CreateExtractor("datetime");

            foreach (var item in _filesInfo)
            {
                using (FileStream readfs = new FileStream(item, FileMode.Open)) //filestream to read image
                {
                    using (Image img = Image.FromStream(readfs))
                    {
                        using (Graphics grph = Graphics.FromImage(img))
                            using (Font font = new Font(new FontFamily("Arial"), (float)(0.015 * img.Height)))
                                using (SolidBrush sbrush = new SolidBrush(Color.Black)) //using graphical tools
                                {
                                    //need to add verifying of min image size and correct label font size and area
                                    RectangleF rect = new RectangleF(new PointF((int)(img.Width - 385), (int)(img.Height * 0.001)), new SizeF(385, (int)(img.Height * .02))); //put a rectangle in top right corner of the image
                                    grph.FillRectangle(Brushes.White, rect);                                                                                                  //fill rectangle with white color because label will be black

                                    string res = extractor.Extract(img, 0x9003);                                                                                              //get information from image
                                    grph.DrawString(res, font, sbrush, rect);                                                                                                 //draw label on the image
                                }


                        using (FileStream writefs = new FileStream(_destinationPath.FullName + "\\" + Path.GetFileName(item), FileMode.OpenOrCreate))
                        {
                            ImageFormat imgFormat = ImageFormat.Bmp;
                            switch (Path.GetFileName(item))
                            {
                            case ".jpg":
                                imgFormat = ImageFormat.Jpeg;
                                break;

                            case ".jpeg":
                                imgFormat = ImageFormat.Jpeg;
                                break;

                            case ".png":
                                imgFormat = ImageFormat.Png;
                                break;

                            case ".gif":
                                imgFormat = ImageFormat.Gif;
                                break;
                            }
                            img.Save(writefs, imgFormat);  //save image in new folder
                        }
                    }
                }
            }
            Console.WriteLine();
            Console.WriteLine("ADD LABEL FINISHED!!!!");
            Console.WriteLine();
        }
 protected XSSFExcelExtractor GetExtractor(String sampleName)
 {
     ExtractorFactory.SetAllThreadsPreferEventExtractors(false);
     ExtractorFactory.SetThreadPrefersEventExtractors(false);
     try
     {
         return((XSSFExcelExtractor)ExtractorFactory.CreateExtractor(HSSFTestDataSamples.OpenSampleFileStream(sampleName)));
     }
     catch (Exception e)
     {
         throw new RuntimeException(e);
     }
 }
Example #11
0
        private async Task BeginExtraction(CancellationToken cancellationToken)
        {
            try
            {
                using var scope = _serviceProvider.CreateScope();
                var repository = scope.ServiceProvider.GetRequiredService <IItemRepository>();
                var mediator   = scope.ServiceProvider.GetRequiredService <IMediator>();

                var commandList = new ConcurrentBag <UpdatePriceCommand>();

                var items = await repository.ListAll();

                var tasks = items.Select(async(x) =>
                {
                    try
                    {
                        if (!cancellationToken.IsCancellationRequested)
                        {
                            var extractor = ExtractorFactory.Create(x.Url);
                            if (await extractor.ExtractValues(x, cancellationToken))
                            {
                                _logger.LogInformation($"Price changed for {x.Name} to {extractor.InCashValue}");
                                commandList.Add(new UpdatePriceCommand(x.Id, extractor.InCashValue, extractor.NormalValue, extractor.FullValue, extractor.IsAvailable));
                            }
                        }
                    }
                    catch (Exception e)
                    {
                        _logger.LogError($"Error calling url for product {x.Name}.", e);
                    }
                });

                await Task.WhenAll(tasks);

                foreach (var cmd in commandList)
                {
                    await mediator.Send(cmd, cancellationToken);
                }
            }
            catch (OperationCanceledException e)
            {
                _logger.LogInformation("Cancelling " + e.Message);

                if (cancellationToken.IsCancellationRequested)
                {
                    _logger.LogInformation("Cancelling per user request.");
                    cancellationToken.ThrowIfCancellationRequested();
                }
            }
        }
 static async Task GetStream(Stream outStream, string packArchiveFileName, Models.File file, Converter converter, ConvertInfo convertInfo)
 {
     using (outStream)
     {
         convertInfo.ExtractFile = async destFile =>
         {
             var extractor = ExtractorFactory.GetFileExtractor(packArchiveFileName);
             await extractor.ExtractFile(packArchiveFileName, file.FileName, destFile);
         };
         using (var stream = await converter.Convert(convertInfo))
         {
             stream.CopyTo(outStream);
         }
     }
 }
Example #13
0
        /// <summary>
        /// Shows the usage of CreateMetadataExtractor method, the method is supported in version 17.03 or greater
        /// </summary>
        /// <param name="fileName"></param>
        public static void CreateMetadataExtractorMethodUsage(string fileName)
        {
            //ExStart:CreateMetadataExtractorMethodUsage
            //get file actual path
            String filePath  = Common.GetFilePath(fileName);
            var    factory   = new ExtractorFactory();
            var    extractor = factory.CreateMetadataExtractor(filePath);
            var    metadata  = extractor.ExtractMetadata(filePath);

            foreach (string key in metadata.Keys)
            {
                Console.WriteLine(string.Format("{0} = {1}", key, metadata[key]));
            }
            //ExEnd:CreateMetadataExtractorMethodUsage
        }
        public OperateResult <ExtractedResult> Extract(string path, ExtractOption[] options)
        {
            OperateResult <ExtractedResult> operateResult = new OperateResult <ExtractedResult>();
            ExtractOption extractOption = CombineOptions(options);
            string        extension     = Path.GetExtension(path);
            ICollection <DocumentExtractor> extractors = ExtractorFactory.GetExtractors(extension);

            if (extractors == null || extractors.Count == 0)
            {
                operateResult.Status      = OperateStatus.Failure;
                operateResult.Description = "没有对应的处理程序";
                return(operateResult);
            }
            bool flag = false;

            if (!File.Exists(path))
            {
                operateResult.Status      = OperateStatus.Failure;
                operateResult.Description = string.Concat("不存在该文件:", path);
                return(operateResult);
            }
            byte[] numArray = File.ReadAllBytes(path);
            try
            {
                foreach (DocumentExtractor extractor in extractors)
                {
                    operateResult.Data = extractor.Extract(extension, numArray, extractOption);
                    if (operateResult.Data == null)
                    {
                        continue;
                    }
                    flag = true;
                    break;
                }
            }
            catch (Exception exception)
            {
                operateResult.Status      = OperateStatus.Failure;
                operateResult.Description = string.Concat("抽取出错:", exception.Message, Environment.NewLine, exception.StackTrace);
                LoggerWrapper.Logger.Error("ExtractText", exception);
            }
            if (!flag)
            {
                operateResult.Status      = OperateStatus.Failure;
                operateResult.Description = "抽取出错";
            }
            return(operateResult);
        }
Example #15
0
        public ExtractText(string fileName, bool formatted)
        {
            //ExStart:ExtractText
            int linesPerPage         = Console.WindowHeight;
            ExtractorFactory factory = new ExtractorFactory();

            TextExtractor extractor = formatted
                ? factory.CreateFormattedTextExtractor(fileName)
                : factory.CreateTextExtractor(fileName);

            if (extractor == null)
            {
                Console.WriteLine("The document's format is not supported");
                return;
            }

            try
            {
                string line = null;
                do
                {
                    Console.Clear();
                    Console.WriteLine("{0}", fileName);

                    int lineNumber = 0;
                    do
                    {
                        line = extractor.ExtractLine();
                        lineNumber++;
                        if (line != null)
                        {
                            Console.WriteLine(line);
                        }
                    }while (line != null && lineNumber < linesPerPage);

                    Console.WriteLine();
                    Console.WriteLine("Press Esc to exit or any other key to move to the next page");
                }while (line != null && Console.ReadKey().Key != ConsoleKey.Escape);
            }
            finally
            {
                extractor.Dispose();
            }
            //ExEnd:ExtractText
        }
Example #16
0
        protected bool ExtractCore(string path)
        {
            bool extracted;

            using (IExtractor extractor = ExtractorFactory.Create(path))
            {
                if (!extractor.IsArchive())
                {
                    return(true);
                }
                extracted = extractor.ExtractAll(Path.GetDirectoryName(path));
            }
            if (extracted)
            {
                TryDeleteFile(path);
            }
            return(extracted);
        }
Example #17
0
        public ActionResult ExtractDocumentEndocing([FromBody] string fileName)
        {
            List <string>    extractedText = new List <string>();
            ExtractorFactory factory       = new ExtractorFactory();
            string           filePath      = Server.MapPath("../App_Data//Uploads//" + fileName);

            try
            {
                EncodingDetector detector = new EncodingDetector(Encoding.GetEncoding(1251));
                Stream           stream   = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
                extractedText.Add(detector.Detect(stream).ToString());
            }
            catch (Exception ex)
            {
                extractedText.Add("File Format not supported");
            }
            return(Json(extractedText, JsonRequestBehavior.AllowGet));
        }
Example #18
0
            /// <summary>
            /// Logs messages using NotificationReceiver
            /// </summary>
            /// <param name="fileName"></param>
            public static void LoggerWithExtractorFactory(string fileName)
            {
                //ExStart:LoggerWithExtractorFactory
                //get file actual path
                String filePath           = Common.GetFilePath(fileName);
                var    receiverForFactory = new NotificationReceiver();
                var    factory            = new ExtractorFactory(null, null, null, receiverForFactory);

                var         receiver    = new NotificationReceiver();
                LoadOptions loadOptions = new LoadOptions();

                loadOptions.NotificationReceiver = receiver;

                using (var extractor = new CellsTextExtractor(filePath, loadOptions))
                {
                    Console.WriteLine(extractor.ExtractAll());
                }
                //ExEnd:LoggerWithExtractorFactory
            }
        public ActionResult ExtractRowAndColumn([FromBody] string fileName, [FromBody] int rowIndex, [FromBody] string columnIndex)
        {
            List <string>    extractedText = new List <string>();
            ExtractorFactory factory       = new ExtractorFactory();
            string           filePath      = Server.MapPath("../App_Data//Uploads//" + fileName);

            try
            {
                CellsTextExtractor extractor = new CellsTextExtractor(filePath);
                int            sheetIndex    = 0;
                CellsSheetInfo sheetInfo     = extractor.GetSheetInfo(sheetIndex);
                extractedText.Add(sheetInfo.ExtractRow(rowIndex, columnIndex));
            }
            catch (Exception ex)
            {
                extractedText.Add(ex.Message);
            }
            return(Json(extractedText, JsonRequestBehavior.AllowGet));
        }
Example #20
0
 private void extractButton_Click(object sender, System.EventArgs e)
 {
     //load an extractor for each input file
     //use a factory?
     if (String.IsNullOrEmpty(outFilePath.Text))
     {
         MessageBox.Show("Please enter an output file path!");
         return;
     }
     try
     {
         ExtractorFactory exFac = new ExtractorFactory(inFileWindows.Text, inFileAndroid.Text, inFileiOS.Text, outFilePath.Text);
         exFac.ExtractThemStrings();
     }
     catch (Exception ex)
     {
         MessageBox.Show(ex.Message);
     }
 }
        public static void UsingExtractorFactory(string fileName)
        {
            //ExStart:UsingExtractorFactory
            //get file actual path
            String           filePath = Common.getFilePath(fileName);
            ExtractorFactory factory  = new ExtractorFactory();
            //ExtractMetadata methods in ExtractorFactory class are marked as Obsolete from version 17.03 onwards(use Extractor class instead).
            MetadataCollection metadata = factory.ExtractMetadata(filePath);

            if (metadata == null)
            {
                Console.WriteLine("The document format is not supported");
            }

            foreach (string key in metadata.Keys)
            {
                Console.WriteLine(string.Format("{0} = {1}", key, metadata[key]));
            }
            //ExEnd:UsingExtractorFactory
        }
Example #22
0
        public ActionResult ExtractTableWithFormat([FromBody] string fileName)
        {
            List <string>    extractedText = new List <string>();
            ExtractorFactory factory       = new ExtractorFactory();
            string           filePath      = Server.MapPath("../App_Data//Uploads//" + fileName);

            try
            {
                WordsFormattedTextExtractor extractor = new WordsFormattedTextExtractor(filePath);
                PlainTableFrame             frame     = new PlainTableFrame(
                    PlainTableFrameAngle.ASCII,
                    PlainTableFrameEdge.ASCII,
                    PlainTableFrameIntersection.ASCII,
                    new PlainTableFrameConfig(true, true, true, false));
                extractor.DocumentFormatter = new PlainDocumentFormatter(frame);
                if (extractor == null)
                {
                    extractedText.Add("The document format is not supported");
                }
                string line = null;
                do
                {
                    int lineNumber = 0;
                    do
                    {
                        line = extractor.ExtractLine();
                        lineNumber++;
                        if (line != null)
                        {
                            extractedText.Add(line);
                        }
                    }while (line != null);
                }while (line != null);
                //extractedText.Add(extractor.ExtractAll());
            }
            catch (Exception ex)
            {
                extractedText.Add(ex.Message);
            }
            return(Json(extractedText, JsonRequestBehavior.AllowGet));
        }
        protected override void SpecificProcessing()
        {
            IExtractor <Image> extractor = ExtractorFactory.CreateExtractor("datetime");

            DateTime dtextr;

            foreach (var item in _filesInfo)
            {
                using (FileStream fs = new FileStream(item, FileMode.Open))
                {
                    using (Image img = Image.FromStream(fs))
                    {
                        string data = extractor.Extract(img, 0x9003).Split(' ')[0].Replace(':', '/');   //extracting datetime get from there year/month/day and parse it to dateTime and get year

                        DateTime.TryParse(data, out dtextr);

                        if (dtextr == DateTime.MinValue)
                        {
                            dtextr = File.GetCreationTime(item);
                        }

                        if (!Directory.Exists(_destinationPath.FullName + "\\" + dtextr.Year))
                        {
                            Directory.CreateDirectory(Path.Combine(_destinationPath.FullName, dtextr.Year + ""));
                        }
                        try
                        {
                            File.Copy(item, _destinationPath.FullName + "\\" + dtextr.Year + "\\" + Path.GetFileName(item), true);
                        }
                        catch (Exception ex)
                        {
                            string s = ex.Message;
                        }
                        //File.Copy(item, _destinationPath.FullName + "\\" + dtextr.Year + "\\" + Path.GetFileName(item), true);
                    }
                }
            }
            Console.WriteLine();
            Console.WriteLine("SORT BY YEARS FINISHED!!!!");
            Console.WriteLine();
        }
Example #24
0
        /// <summary>
        /// For enumerating all the entities of the group of containers ContainerEnumerator class is used
        /// </summary>
        public static void EnumeratingAllEntities()
        {
            //ExStart:EnumeratingAllEntities
            IContainerFactory containerFactory           = null;
            MediaTypeDetector containerMediaTypeDetector = null;
            Container         container     = null;
            ExtractorFactory  readerFactory = new ExtractorFactory();
            var enumerator = new ContainerEnumerator(containerFactory, containerMediaTypeDetector, container);

            while (enumerator.MoveNext())
            {
                using (var stream = enumerator.Current.OpenStream())
                {
                    using (var extractor = readerFactory.CreateTextExtractor(stream))
                    {
                        Console.WriteLine(extractor == null ? "document isn't supported" : extractor.ExtractAll());
                    }
                }
            }
            //ExEnd:EnumeratingAllEntities
        }
Example #25
0
        protected override void SpecificProcessing()
        {
            IExtractor <Image> extractor = ExtractorFactory.CreateExtractor("coordinates");

            foreach (var item in _filesInfo)
            {
                using (Image img = Image.FromFile(item))
                {
                    //var data = extractor.Extract(img, 0).Split(' ');

                    ExifLib.ExifReader reader = new ExifLib.ExifReader(item);
                    double             lat;
                    reader.GetTagValue(ExifLib.ExifTags.GPSLatitude, out lat);
                    double lon;
                    reader.GetTagValue(ExifLib.ExifTags.GPSLongitude, out lon);

                    //double alt = Double.Parse( data[0]);
                    //double lon = Double.Parse(data[1]);
                }
            }
        }
Example #26
0
        protected void DoExtract(ILocalFsResourceAccessor accessor, string selectedItem)
        {
            string resourcePath   = accessor.CanonicalLocalResourcePath.LastPathSegment.Path;
            string extractionPath = GetExtractionPath(resourcePath, selectedItem);

            Logger.Debug("GoodMergeExtractor: Extracting '{0}' from '{1}' to '{2}'", selectedItem, resourcePath, extractionPath);
            bool result;

            using (IExtractor extractor = ExtractorFactory.Create(accessor.LocalFileSystemPath))
            {
                extractor.ExtractionProgress += OnExtractionProgress;
                result = extractor.ExtractArchiveFile(selectedItem, extractionPath);
            }
            if (!result)
            {
                //Sometimes an empty file has been created when extraction fails
                DeleteExtractedFile(extractionPath);
            }
            _extractionThread = null;
            OnExtractionCompleted(new ExtractionCompletedEventArgs(selectedItem, extractionPath, result));
        }
Example #27
0
        public static void Check(string text,
                                 string rules,
                                 IEnumerable <ExtractionDic> etalon,
                                 ExtractorSettings settings          = null,
                                 IEnumerable <IExtension> extensions = null,
                                 IMorphAnalizer morph = null,
                                 params string[] rulesToExtract)
        {
            if (settings == null)
            {
                settings = new ExtractorSettings();
            }

            var extractor = ExtractorFactory.Create(rules,
                                                    settings,
                                                    extensions: extensions,
                                                    morph: morph);
            var result = extractor.Parse(text, rulesToExtract);

            _check(etalon.ToArray(), result.ToArray());
        }
Example #28
0
        public ActionResult SearchText([FromBody] string fileName, [FromBody] string keyWord)
        {
            List <string>    extractedText = new List <string>();
            ExtractorFactory factory       = new ExtractorFactory();
            string           filePath      = Server.MapPath("../App_Data//Uploads//" + fileName);

            try
            {
                //ExStart:SearchTextInDocuments
                //get file actual path

                using (WordsTextExtractor extractor = new WordsTextExtractor(filePath))
                {
                    ListSearchHandler handler = new ListSearchHandler();
                    extractor.Search(new SearchOptions(SearchHighlightOptions.CreateFixedLengthOptions(10)), handler, null, new string[] { keyWord });

                    if (handler.List.Count == 0)
                    {
                        Console.WriteLine("Not found");
                    }
                    else
                    {
                        for (int i = 0; i < handler.List.Count; i++)
                        {
                            extractedText.Add("Text at Left: " + handler.List[i].LeftText);

                            extractedText.Add("Found Text: " + handler.List[i].FoundText);

                            extractedText.Add("Text at Right: " + handler.List[i].RightText);
                        }
                    }
                }
                //ExEnd:SearchTextInDocuments
            }
            catch (Exception ex)
            {
                extractedText.Add(ex.Message);
            }
            return(Json(extractedText, JsonRequestBehavior.AllowGet));
        }
        /// <summary>
        /// Reads concrete files from a ZIP folder
        /// </summary>
        /// <param name="folderName">Name of the zipped folder</param>
        public static void ReadConcreteFile(string folderName)
        {
            //ExStart:ReadConcreteFile
            //get ZIP folder's path
            string           folderPath       = Common.getFilePath(folderName);
            ExtractorFactory extractorFactory = new ExtractorFactory();

            //initialize ZIP container
            using (var container = new ZipContainer(folderPath))
            {
                //loop through all the entities in the folder
                for (int i = 0; i < container.Entities.Count; i++)
                {
                    //extract content of each entity by creating a textextractor using extractfactory's CreateTextExtractor function
                    using (TextExtractor extractor = extractorFactory.CreateTextExtractor(container.Entities[i].OpenStream()))
                    {
                        //display the extracted text
                        Console.WriteLine(extractor.ExtractAll());
                    }
                }
            }
            //ExEnd:ReadConcreteFile
        }
Example #30
0
        /// <summary>
        /// Extracts text from the entity of ZIP container:
        /// </summary>
        /// <param name="folderName">Name of the zipped folder</param>
        public static void RetrieveEntity(string folderName)
        {
            //ExStart:RetrieveEntity_17.12
            //get ZIP folder's path
            string folderPath = Common.GetFilePath(folderName);

            ExtractorFactory extractorFactory = new ExtractorFactory();

            //initialize ZIP container
            using (var container = new ZipContainer(folderPath))
            {
                Container.Entity containerEntry = container.GetEntity("META-INF\\container.xml");
                // If the entity isn't found
                if (containerEntry == null)
                {
                    throw new GroupDocsTextException("File not found");
                }

                // Try to create a text extractor
                TextExtractor extractor = extractorFactory.CreateTextExtractor(containerEntry.OpenStream());
                try
                {
                    // Extract a text (if the document type is supported)
                    Console.WriteLine(extractor == null ? "Document type isn't supported" : extractor.ExtractAll());
                }
                finally
                {
                    // Cleanup
                    if (extractor != null)
                    {
                        extractor.Dispose();
                    }
                }
            }

            //ExEnd:RetrieveEntity_17.12
        }