Example #1
0
        /// <summary>
        /// Provides the means to extract the text to be indexed from the file specified
        /// </summary>
        /// <param name="file"></param>
        /// <returns></returns>
        protected virtual string ExtractTextFromFile(FileInfo file)
        {
            if (!SupportedExtensions.Select(x => x.ToUpper()).Contains(file.Extension.ToUpper()))
            {
                throw new NotSupportedException("The file with the extension specified is not supported");
            }

            var mediaParser = new MediaParser();

            Action <Exception> onError = (e) => OnIndexingError(new IndexingErrorEventArgs("Could not read media item", -1, e));

            var txt = mediaParser.ParseMediaText(file.FullName, onError, out _extractedMetaFromTika);

            return(txt);
        }
Example #2
0
        private ExtractionResult ExtractContentFromStream(Stream stream)
        {
            byte[] data;
            var    metaData = new Dictionary <string, string>();

            var extractionResult = new ExtractionResult();

            Action <Exception> onError = (e) => OnIndexingError(new IndexingErrorEventArgs("Could not read media item", -1, e));

            using (MemoryStream ms = new MemoryStream())
            {
                stream.CopyTo(ms);
                data = ms.ToArray();
            }

            if (data != null && data.Length > 0)
            {
                var mediaParser = new MediaParser();
                extractionResult.ExtractedText = mediaParser.ParseMediaText(data, onError, out metaData);
                extractionResult.MetaData      = metaData;
            }

            return(extractionResult);
        }