DocumentIdentifier C# (CSharp) примеры использования

Пример #1

0

Показать файл

Файл: RTStroke.cs Проект: lamost710/videoconference

 public override string ToString()
 {
     return(string.Format(CultureInfo.CurrentCulture,
                          "RTStrokeAdd [ DocumentIdentifier: {0}, PageIdentifier: {1}, StrokeIdentifier: {2}, StrokeFinished: {3}, InkData Size: {4} ]",
                          DocumentIdentifier.ToString(), PageIdentifier.ToString(), StrokeIdentifier.ToString(),
                          StrokeFinished.ToString(), InkData.Length));
 }

Пример #2

0

Показать файл

Файл: InMemoryStorage.cs Проект: Amichai/Prax

 public Document GetDocument(DocumentIdentifier id)
 {
     InMemoryDocument doc;
     lock (list)
         doc = FindDoc(id);
     return doc == null ? null : doc.Clone();
 }

Пример #3

0

Показать файл

Файл: AzureStorageClient.cs Проект: Amichai/Prax

        public Document GetDocument(DocumentIdentifier id)
        {
            var blob = container.GetBlobReference(id.FileName());
            try {
                blob.FetchAttributes();
            } catch (StorageClientException) { return null; }	//If the blob was deleted

            return new BlobDocument(blob);
        }

Пример #4

0

Показать файл

Файл: RTStroke.cs Проект: cucacutexice/AIGA

 public override string ToString()
 {
     return("RTStrokeAdd " +
            "{ DocumentIdentifier: " + DocumentIdentifier.ToString() +
            ", PageIdentifier: " + PageIdentifier.ToString() +
            ", StrokeIdentifier: " + StrokeIdentifier.ToString() +
            ", StrokeFinished: " + StrokeFinished.ToString() +
            ", InkData Size: " + InkData.Length +
            " }");
 }

Пример #5

0

Показать файл

Файл: AzureStorageClient.cs Проект: Amichai/Prax

        public void DeleteDocument(DocumentIdentifier id)
        {
            var doc = (BlobDocument)GetDocument(id);

            foreach (var alternateStream in doc.AlternateStreamNames) {
                doc.CreateAlternateBlob(alternateStream).Delete();
            }

            doc.Blob.Delete();
        }

Пример #6

0

Показать файл

        /// <summary>
        /// Override
        /// </summary>
        protected override void ProcessRecord()
        {
            IdResult result = null;

            using (var stream = File.OpenRead(Path))
            {
                result = DocumentIdentifier.Identify(stream, Path);
            }

            WriteObject(result);
        }

Пример #7

0

Показать файл

Файл: AzureStorageClient.cs Проект: Amichai/Prax

        public Guid UploadDocument(Guid userId, string name, string mimeType, Stream document, long length)
        {
            var id = new DocumentIdentifier(userId, Guid.NewGuid());
            var doc = new BlobDocument(id, name, CreateBlob(id));

            doc.UpdateMetadata();
            doc.Blob.Properties.ContentType = mimeType;

            doc.Blob.UploadFromStream(document);

            return id.DocumentId;
        }

Пример #8

0

Показать файл

Файл: DocumentProvenance.cs Проект: tonybetts-student/clinical-document-library-dotnet

        /// <summary>
        /// Validate the CDA Context for Document
        /// </summary>
        /// <param name="path">The path to this object as a string</param>
        /// <param name="messages">the validation messages, these may be added to within this method</param>
        public void Validate(string path, List <ValidationMessage> messages)
        {
            var vb = new ValidationBuilder(path, messages);

            vb.ArgumentRequiredCheck("DocumentType", DocumentType);

            if (DocumentIdentifier != null)
            {
                DocumentIdentifier.Validate(vb.Path + "DocumentIdentifier", messages);
            }

            if (vb.ArgumentRequiredCheck("Author", Author))
            {
                Author.Validate(vb.Path + "Author", messages);
            }
        }

Пример #9

0

Показать файл

Файл: MainWindow.xaml.cs Проект: dotfurther/OpenDiscoverSDK

        /// <summary>
        /// Constructor.
        /// </summary>
        public MainWindow()
        {
            Application.Current.DispatcherUnhandledException += Current_DispatcherUnhandledException;

            InitializeComponent();

            Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);

            _classificationNames = Enum.GetNames(typeof(IdClassification));
            Array.Sort(_classificationNames);

            _docClassComboBox.ItemsSource = _classificationNames;
            _allFileFormats = DocumentIdentifier.SupportedFormats();
            _totalFileFormatsTextBlock.Text = _allFileFormats.Count.ToString();

            _parallelForOptions.MaxDegreeOfParallelism = 8;
            Loaded += MainWindow_Loaded;

            var assembly = Assembly.GetExecutingAssembly();

            using (var textStreamReader = new StreamReader(assembly.GetManifestResourceStream("DocumentIdentifierExample.Directions.txt")))
            {
                _identificationDirectionsTextBox.Text = textStreamReader.ReadToEnd();
            }

            //
            // Set application title with Open Discover SDK Version and Framework Version:
            //
            var openDiscoverSDKVersion = "unknown";
            var names = Assembly.GetExecutingAssembly().GetReferencedAssemblies();

            foreach (var name in names)
            {
                if (name.Name.StartsWith("OpenDiscoverSDK"))
                {
                    openDiscoverSDKVersion = name.Version.ToString();
                    break;
                }
            }

            var frameworkVer = Assembly.GetEntryAssembly()?.GetCustomAttribute <TargetFrameworkAttribute>()?.FrameworkName;

            Title = string.Format("{0}          OpenDiscoverSDK Version = {1}         Framework Version = {2}", Title, openDiscoverSDKVersion,
                                  frameworkVer != null ? frameworkVer : "Unknown");
        }

Пример #10

0

Показать файл

Файл: ThreadedDocumentExecutor.cs Проект: Amichai/Prax

        public void Execute(DocumentIdentifier id)
        {
            try {
                using (var reporter = new AsyncProgressReporter(StorageClient, id)) {
                    reporter.StartReporter();

                    var document = StorageClient.GetDocument(id);
                    if (document == null) return;
                    document.State = DocumentState.Scanning;
                    document.ScanProgress = 0;
                    StorageClient.UpdateDocument(document);

                    var stream = new MemoryStream();
                    using (var source = document.OpenRead())
                        source.CopyTo(stream);
                    var results = new ReadOnlyCollection<RecognizedSegment>(
                        Recognizer.Recognize(stream, reporter).ToList()
                    );
                    reporter.StopReporter();

                    document = StorageClient.GetDocument(document.Id);	//Refresh properties before saving them (eg, if it was renamed)
                    if (document == null) return;

                    foreach (var converter in ResultConverters) {
                        var convertedStream = converter.Convert(document.OpenRead(), results);
                        document.UploadStream(converter.OutputFormat.ToString(), convertedStream, convertedStream.Length);
                    }

                    document.ScanProgress = 100;
                    document.State = DocumentState.Scanned;
                    StorageClient.UpdateDocument(document);
                }
            } catch (Exception ex) {
                var document = StorageClient.GetDocument(id);
                if (document == null) return;

                document.State = DocumentState.Error;
                document.UploadString("Error", ex.ToString());
                StorageClient.UpdateDocument(document);
            }
        }

Пример #11

0

Показать файл

Файл: SimpleDocumentExecutor.cs Проект: Amichai/Prax

        public void Execute(DocumentIdentifier id)
        {
            var doc = StorageClient.GetDocument(id);
            if (doc == null) return;

            try {
                doc.State = DocumentState.Scanning;
                doc.ScanProgress = 0;
                StorageClient.UpdateDocument(doc);

                var stream = new MemoryStream();
                using (var source = doc.OpenRead())
                    source.CopyTo(stream);
                var results = new ReadOnlyCollection<RecognizedSegment>(
                    Recognizer.Recognize(stream, new StorageProgressReporter(this, id)).ToList()
                );

                doc = StorageClient.GetDocument(doc.Id);
                if (doc == null) return;
                foreach (var converter in ResultConverters) {
                    var convertedStream = converter.Convert(doc.OpenRead(), results);
                    doc.UploadStream(converter.OutputFormat.ToString(), convertedStream, convertedStream.Length);
                }

                doc.ScanProgress = 100;
                doc.State = DocumentState.Scanned;
                StorageClient.UpdateDocument(doc);
            } catch (Exception ex) {
                doc = StorageClient.GetDocument(id);
                if (doc == null) return;

                doc.State = DocumentState.Error;
                doc.UploadString("Error", ex.ToString());
                StorageClient.UpdateDocument(doc);
            }
        }

Пример #12

0

Показать файл

Файл: InMemoryStorage.cs Проект: Amichai/Prax

 ///<summary>Finds the document with the given ID.</summary>
 ///<remarks>Must be called inside lock(list).</remarks>
 InMemoryDocument FindDoc(DocumentIdentifier id)
 {
     return list.SingleOrDefault(d => d.Id == id);
 }

Пример #13

0

Показать файл

Файл: AzureStorageClient.cs Проект: Amichai/Prax

 ///<summary>Creates a new BlobDocument.</summary>
 internal BlobDocument(DocumentIdentifier id, string name, CloudBlob emptyBlob)
     : base(id)
 {
     this.Blob = emptyBlob;
     this.Name = name;
     SetInitialValues();
 }

Пример #14

0

Показать файл

Файл: MainWindow.xaml.cs Проект: dotfurther/OpenDiscoverSDK

        //
        // Threading:
        //
        #region private void WorkerThread()
        private void WorkerThread()
        {
            try
            {
                var allFiles       = Directory.GetFiles(_rootPath, "*", SearchOption.AllDirectories);
                var totalFiles     = allFiles.Length;
                var totalStopWatch = Stopwatch.StartNew();
                var uniqueIdSet    = new ConcurrentDictionary <Id, int>();

                Dispatcher.Invoke((Action) delegate
                {
                    try
                    {
                        _progressBar.Visibility = Visibility.Visible;
                        _progressBar.Value      = 0;
                    }
                    catch { }
                });

                var lastUpdateStopWatch = Stopwatch.StartNew();

                //
                // Parallelize the file format identification:
                //
                Parallel.ForEach(allFiles, _parallelForOptions, file =>
                {
                    try
                    {
                        IdResult docFormat = null;

                        // WARNING: This example does not support long path names (> 255 chars) - .NET solutions for getting valid FileStreams for 'long file paths'
                        // can be found on the internet - also .NET 4.6.2 supports long file paths (web search for how to enable)
                        // Note: Minimum recommended buffer size of 16kb for file identification
                        using (var stream = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, 16384))
                        {
                            docFormat = DocumentIdentifier.Identify(stream, file);
                        }

                        uniqueIdSet[docFormat.ID] = 1;

                        var docIdResult = new DocumentIdResult(file, docFormat);

                        if (docFormat.ID == Id.Unknown)
                        {
                            Interlocked.Increment(ref _totalUnknownFiles);
                        }

                        _resultsQueue.Enqueue(docIdResult);

                        var numFilesIdentified = _resultsQueue.Count;
                        var percentComplete    = 100.0 * numFilesIdentified / totalFiles;

                        // Update progress bar and status every 250 [ms]:
                        if (lastUpdateStopWatch.ElapsedMilliseconds >= 250)
                        {
                            lastUpdateStopWatch.Restart();
                            Dispatcher.BeginInvoke((Action) delegate
                            {
                                try
                                {
                                    _progressBar.Value  = percentComplete;
                                    _statusTextBox.Text = string.Format("  {0} files out of {1} identified...", numFilesIdentified, totalFiles);
                                }
                                catch { }
                            });
                        }
                    }
                    catch (Exception ex)
                    {
                        Interlocked.Increment(ref _totalExceptions);

                        var docIDResult          = new DocumentIdResult(file, DocumentIdentifier.UnknownResult);
                        docIDResult.HasError     = true;
                        docIDResult.ErrorMessage = ex.Message;

                        _resultsQueue.Enqueue(docIDResult);
                    }
                });


                totalStopWatch.Stop(); //This time is going to include file I/O along with ID time
                _totalFileIdTimeMs  = totalStopWatch.Elapsed.TotalMilliseconds;
                _totalUniqueFileIds = uniqueIdSet.Count;
                _avgTimeToIdFile    = _totalFileIdTimeMs / Math.Max(1, _resultsQueue.Count);
            }
            catch (Exception ex)
            {
                Dispatcher.Invoke((Action) delegate
                {
                    MessageBox.Show(this, ex.Message, "Exception caught on thread 'WorkerThread' method");
                });
            }
            finally
            {
                WorkerCompleted();
            }
        }

Пример #15

0

Показать файл

Файл: SimpleDocumentExecutor.cs Проект: Amichai/Prax

 public void CancelProcessing(DocumentIdentifier id)
 {
     canceledDocuments.TryAdd(id, true);
 }

Пример #16

0

Показать файл

        /// <summary>
        /// Extracts all mail store items to root output path using either (1) the mail store folder structure (see remarks for warning)
        /// or (2) with a safer folder structure that outputs folders into root directory that contain at most 1000 message objects each
        /// and are named "1000", "2000", etc.
        /// </summary>
        /// <remarks>
        /// **WARNING**: Outputting mail store folder structure does not check for illegal path characters or for long file paths
        /// (i.e., paths greater than MAX_PATH), both which could cause an exception. It is left to user to write production level
        /// code to check for and replace illegal file system characters in mail store folder names and to also ensure that their
        /// application can handle paths greater than MAX_PATH.
        /// </remarks>
        /// <param name="rootOutputPath">Root folder path to extract archive items.</param>
        /// <param name="saveWithMailStoreFolderStructure">
        /// If true, recreates the container folder hierarchy of the mail store by appending container relative paths to 'rootOutputPath'
        /// and creating these directories and extracting the emails associated with the container relative paths to these directories.
        /// NOTE: This method does not check for long file paths (paths greater than MAX_PATH)
        /// </param>
        public void ExtractItemsToDirectory(string rootOutputPath, bool saveWithMailStoreFolderStructure = false)
        {
            var totalEmailMessagesWritten = 0;
            var stopwatch = Stopwatch.StartNew();

            if (!Directory.Exists(rootOutputPath))
            {
                Directory.CreateDirectory(rootOutputPath);
            }

            var subFolder     = 1000;
            var subFolderName = subFolder.ToString();
            var path          = Path.Combine(rootOutputPath, subFolderName);

            if (!saveWithMailStoreFolderStructure)
            {
                if (!Directory.Exists(path))
                {
                    Directory.CreateDirectory(path);
                }
            }
            else if (_mailStoreContent.Root != null && _mailStoreContent.Root.SubFolders.Count > 0)
            {
                // We need to create the mail store "Root" directory 1st, all sub-folders and emails in mail store will have at least "Root" as part of their paths:
                var mailStoreRootPath = Path.Combine(rootOutputPath, _mailStoreContent.Root.DisplayName);
                if (!Directory.Exists(mailStoreRootPath))
                {
                    Directory.CreateDirectory(mailStoreRootPath);
                }

                FileSystemHelper.CreateContainerFolderDirectoryHierarchy(rootOutputPath, _mailStoreContent.Root.SubFolders);
            }


            ChildDocument childDocMsg;

            while ((childDocMsg = _mailStoreExtractor.GetNextMessage()) != null)
            {
                // Extracted child items from archives and mailstores are not automatically identified
                // like they are for document attachments/embedded items - we Id item here but nothing is
                // done with the Id:
                childDocMsg.FormatId = DocumentIdentifier.Identify(childDocMsg.DocumentBytes, null);

                var emailName = childDocMsg.EntryId;

                if (!string.IsNullOrWhiteSpace(childDocMsg.EntryId))
                {
                    var emailExt = childDocMsg.FormatId.PrimaryExtension;
                    childDocMsg.Name = string.Format("{0}{1}", childDocMsg.EntryId, emailExt == null ? "" : emailExt);
                    emailName        = childDocMsg.Name;
                }

                if (string.IsNullOrWhiteSpace(emailName))
                {
                    emailName = childDocMsg.Name;
                    if (string.IsNullOrWhiteSpace(emailName))
                    {
                        var emailExt = childDocMsg.FormatId.PrimaryExtension;
                        childDocMsg.Name = string.Format("{0}-{1}{2}", childDocMsg.Index + 1, childDocMsg.FormatId.ID.ToString(), emailExt == null ? "" : emailExt);
                        emailName        = childDocMsg.Name;
                    }
                }

                ++totalEmailMessagesWritten;

                if (saveWithMailStoreFolderStructure)
                {
                    path = Path.Combine(rootOutputPath, childDocMsg.ContainerRelativePath ?? "").Trim();
                    if (!Directory.Exists(path))
                    {
                        Directory.CreateDirectory(path);
                    }
                    File.WriteAllBytes(System.IO.Path.Combine(path, emailName), childDocMsg.DocumentBytes);
                }
                else
                {
                    // Save to folders under "rootOutputPath' names '1000', '2000','3000', etc that each hold 1000 emails until
                    // there are no more email objects to save:
                    File.WriteAllBytes(System.IO.Path.Combine(path, emailName), childDocMsg.DocumentBytes);

                    // Limit 1000 message files per sub-folder:
                    if (totalEmailMessagesWritten % 1000 == 0)
                    {
                        subFolder    += 1000;
                        subFolderName = subFolder.ToString();
                        path          = Path.Combine(rootOutputPath, subFolderName);

                        if (!Directory.Exists(path))
                        {
                            Directory.CreateDirectory(path);
                        }
                    }
                }

                childDocMsg.DocumentBytes = null;
            }

            stopwatch.Stop();
            TotalElapsedTimeMs  = stopwatch.Elapsed.TotalMilliseconds;
            TotalItemsExtracted = totalEmailMessagesWritten;
        }

Пример #17

0

Показать файл

Файл: SimpleDocumentExecutor.cs Проект: Amichai/Prax

 public StorageProgressReporter(SimpleDocumentExecutor executor, DocumentIdentifier id)
 {
     this.executor = executor;
     this.id = id;
 }

Пример #18

0

Показать файл

Файл: InMemoryStorage.cs Проект: Amichai/Prax

 public void DeleteDocument(DocumentIdentifier id)
 {
     lock (list)
         list.RemoveAll(d => d.Id == id);
 }

Пример #19

0

Показать файл

Файл: AzureDocumentExecutor.cs Проект: Amichai/Prax

 public void Execute(DocumentIdentifier id)
 {
     queue.AddMessage(new CloudQueueMessage(id.FileName()));
 }

Пример #20

0

Показать файл

        /// <summary>
        /// Override
        /// </summary>
        protected override void ProcessRecord()
        {
            IdResult        idResult = null;
            DocumentContent content  = null;

            ContentExtractorType extractorType = ContentExtractorType.Document;

            using (var stream = File.OpenRead(Path))
            {
                idResult = DocumentIdentifier.Identify(stream, Path);

                //
                // Extract metadata content from document:
                //
                var settings = new ContentExtractionSettings();
                settings.ExtractionType           = ExtractionType.TextAndMetadata;
                settings.EmbeddedObjectExtraction = EmbeddedExtractionType.EmbeddedDocumentsAndMedia;
                settings.SensitiveItemCheck.Check = true;              // Enable sensitive item checks
                settings.Hashing.HashingType      = HashingType.BinaryAndContentHash;
                settings.LargeDocumentCritera     = 100 * 1024 * 1024; // Define a 'large' file as >= 100MB (this determines when
                                                                       // ContentExtractorType.LargeUnsupported and ContentExtractorType.LargeEncodedText
                                                                       // extractor interfaces are returned.

                //
                // Get Content Extractor for identified file format type:
                //
                var contentExtractorResult = ContentExtractorFactory.GetContentExtractor(stream, idResult, Path, settings);

                if (contentExtractorResult.HasError)
                {
                    WriteObject("Error: " + contentExtractorResult.Error);
                    return;
                }
                else
                {
                    extractorType = contentExtractorResult.ContentExtractor.ContentExtractorType;

                    switch (extractorType)
                    {
                    case ContentExtractorType.Archive:
                        #region Archive Extraction...
                    {
                        var archiveExtractor = (IArchiveExtractor)contentExtractorResult.ContentExtractor;

                        if (archiveExtractor.IsSplit)
                        {
                            // Detected that currently selected file is the main split segment for a split archive. Now we will use archive
                            // extractor helper method 'GetSplitSegmentStreamsInOrder' to get the other split archive segments (in proper order)
                            // in the same directory:
                            Stream[] splitSegmentStreamsInOrder = null;
                            string[] splitSegmentNameInOrder    = null;

                            archiveExtractor.GetSplitSegmentStreamsInOrder(Path, out splitSegmentStreamsInOrder, out splitSegmentNameInOrder);

                            content = archiveExtractor.ExtractContent(splitSegmentStreamsInOrder, splitSegmentNameInOrder, Password);

                            //
                            // We have an archive level password (versus item level passwords):
                            //
                            if (content.Result == ContentResult.WrongPassword)
                            {
                                // wrong password
                            }
                        }
                        else
                        {
                            content = archiveExtractor.ExtractContent(Password);

                            //
                            // We have an archive level password (versus item level passwords):
                            //
                            if (content.Result == ContentResult.WrongPassword)
                            {
                                // wrong password
                            }
                        }
                    }
                        #endregion
                        break;

                    case ContentExtractorType.Document:
                        #region Document Extraction...
                    {
                        var docExtractor = ((IDocumentContentExtractor)contentExtractorResult.ContentExtractor);
                        content = docExtractor.ExtractContent();

                        if (content.Result == ContentResult.WrongPassword)
                        {
                            // wrong password
                        }
                    }
                        #endregion
                        break;

                    case ContentExtractorType.MailStore:
                        #region MailStore Extraction...
                    {
                        var mailStoreExtractor = ((IMailStoreExtractor)contentExtractorResult.ContentExtractor);
                        content = mailStoreExtractor.ExtractContent();
                    }
                        #endregion
                        break;

                    case ContentExtractorType.Database:
                        #region Database Extraction...
                    {
                        // We will only get table/column info (individual table extracted text can be quite large):
                        var databaseExtractor = ((IDatabaseExtractor)contentExtractorResult.ContentExtractor);
                        content = databaseExtractor.ExtractContent(Path);
                    }
                        #endregion
                        break;

                    case ContentExtractorType.DocumentStore:
                        #region DocumentStore Extraction...
                    {
                        var docExtractor = ((IDocumentContentExtractor)contentExtractorResult.ContentExtractor);
                        content = docExtractor.ExtractContent();
                    }
                        #endregion
                        break;

                    case ContentExtractorType.Unsupported:
                        #region Unsupported Type Extraction...
                    {
                        //
                        // Binary-to-text extraction: Note, if property ContentExtractionSettings.BinaryToTextOnUnsupportedTypes is false, then calling
                        //                            IUnsupportedExtractor.ExtractContent will only calculate binary hashes without performing binary-to-text.
                        //                            Binary-to-text is not useful for file formats that do not have any textual content (e.g., compressed archives or encrypted files)
                        //                            It is up to the user to filter these formats out using either file format Id or file format classification.
                        //
                        var docExtractor = ((IUnsupportedExtractor)contentExtractorResult.ContentExtractor);
                        content = docExtractor.ExtractContent();
                    }
                        #endregion
                        break;

                    case ContentExtractorType.LargeUnsupported:
                        #region 'Large' Unsupported Type Extraction...
                    {
                        // Ignore for this example, very 'large' binary-to-text that needs a FileStream could be extracted
                        content              = new DocumentContent(idResult);
                        content.Result       = ContentResult.UnsupportedError;
                        content.ErrorMessage = "Not supported for this example. Users should write output to a file stream when implemented";
                    }
                        #endregion
                        break;

                    case ContentExtractorType.LargeEncodedText:
                        #region 'Large' Encoded Text File Extraction...
                    {
                        // Ignore for this example
                        content              = new DocumentContent(idResult);
                        content.Result       = ContentResult.UnsupportedError;
                        content.ErrorMessage = "Not supported for this example. Users should write output to a file stream when implemented";
                    }
                        #endregion
                        break;
                    }
                }
            }

            WriteObject(content);
        }

Пример #21

0

Показать файл

Файл: ThreadedDocumentExecutor.cs Проект: Amichai/Prax

 public void CancelProcessing(DocumentIdentifier id)
 {
 }

Пример #22

0

Показать файл

Файл: ThreadedDocumentExecutor.cs Проект: Amichai/Prax

 public AsyncProgressReporter(IStorageClient storage, DocumentIdentifier documentId)
 {
     this.storage = storage; this.documentId = documentId;
 }

Пример #23

0

Показать файл

Файл: TypedProviderExtensions.cs Проект: dezfowler/JDoc

        public static Task <TypedDocument <T> > LoadObject <T>(this IProvider provider, DocumentIdentifier id)
        {
            Func <Task <Document>, TypedDocument <T> > continuation = task => Deserialize <T>(task.Result);

            return(provider.LoadDocument(id).ContinueWith <TypedDocument <T> >(continuation));
        }

Пример #24

0

Показать файл

Файл: IStorageClient.cs Проект: Amichai/Prax

 ///<summary>Creates a new Document with the specified ID.</summary>
 protected Document(DocumentIdentifier id)
 {
     Id = id;
 }

Пример #25

0

Показать файл

Файл: AzureStorageClient.cs Проект: Amichai/Prax

 CloudBlob CreateBlob(DocumentIdentifier id)
 {
     return new CloudBlob(container.Name + "/" + id.FileName(), client);
 }

Пример #26

0

Показать файл

Файл: GetFileInfoCmdlet.cs Проект: dotfurther/OpenDiscoverSDK

        /// <summary>
        /// Override
        /// </summary>
        protected override void ProcessRecord()
        {
            IdResult        idResult      = null;
            DocumentContent content       = null;
            var             strBuilder    = new StringBuilder();
            var             extractorType = ContentExtractorType.Document;

            using (var stream = File.OpenRead(Path))
            {
                idResult = DocumentIdentifier.Identify(stream, Path);

                //
                // Content extraction settings:
                //
                var settings = new ContentExtractionSettings();
                settings.ExtractionType           = ExtractionType.TextAndMetadata;
                settings.Hashing.HashingType      = HashingType.BinaryAndContentHash;
                settings.SensitiveItemCheck.Check = true;

                //
                // Get Content Extractor for identified file format type:
                //
                var contentExtractorResult = ContentExtractorFactory.GetContentExtractor(stream, idResult, Path, settings);

                if (contentExtractorResult.HasError)
                {
                    WriteObject("Error: " + contentExtractorResult.Error);
                    return;
                }
                else
                {
                    extractorType = contentExtractorResult.ContentExtractor.ContentExtractorType;

                    switch (extractorType)
                    {
                    case ContentExtractorType.Archive:
                        #region Archive Extraction...
                    {
                        var archiveExtractor = (IArchiveExtractor)contentExtractorResult.ContentExtractor;

                        if (archiveExtractor.IsSplit)
                        {
                            // Detected that currently selected file is the main split segment for a split archive. Now we will use archive
                            // extractor helper method 'GetSplitSegmentStreamsInOrder' to get the other split archive segments (in proper order)
                            // in the same directory:
                            Stream[] splitSegmentStreamsInOrder = null;
                            string[] splitSegmentNameInOrder    = null;

                            archiveExtractor.GetSplitSegmentStreamsInOrder(Path, out splitSegmentStreamsInOrder, out splitSegmentNameInOrder);

                            content = archiveExtractor.ExtractContent(splitSegmentStreamsInOrder, splitSegmentNameInOrder, Password);

                            //
                            // We have an archive level password (versus item level passwords):
                            //
                            if (content.Result == ContentResult.WrongPassword)
                            {
                                if (!string.IsNullOrWhiteSpace(Password))
                                {
                                    strBuilder.AppendLine("ERROR:  Wrong Password");
                                }
                                else
                                {
                                    strBuilder.AppendLine("ERROR:  Archive requires a password");
                                }
                            }
                        }
                        else
                        {
                            content = archiveExtractor.ExtractContent(Password);

                            //
                            // We have an archive level password (versus item level passwords):
                            //
                            if (content.Result == ContentResult.WrongPassword)
                            {
                                if (!string.IsNullOrWhiteSpace(Password))
                                {
                                    strBuilder.AppendLine("ERROR:  Wrong Password");
                                }
                                else
                                {
                                    strBuilder.AppendLine("ERROR:  Archive requires a password");
                                }
                            }
                        }
                    }
                        #endregion
                        break;

                    case ContentExtractorType.Document:
                        #region Document Extraction...
                    {
                        var docExtractor = ((IDocumentContentExtractor)contentExtractorResult.ContentExtractor);
                        content = docExtractor.ExtractContent();

                        // We have an encrypted document that is supported for decryption, keep prompting user for passwords until result is not
                        // ContentResult.WrongPassword or until user presses "Cancel" button:
                        if (content.Result == ContentResult.WrongPassword && content.IsEncrypted && docExtractor.SupportsDecryption)
                        {
                            if (!string.IsNullOrWhiteSpace(Password))
                            {
                                strBuilder.AppendLine("ERROR:  Wrong Password");
                            }
                            else
                            {
                                strBuilder.AppendLine("ERROR:  Document requires a password");
                            }
                        }
                        else if (content.Result == ContentResult.WrongPassword && content.IsEncrypted && !docExtractor.SupportsDecryption)
                        {
                            strBuilder.AppendLine("ERROR:  Document is encrypted with a password but format is not supported for decryption.");
                        }
                    }
                        #endregion
                        break;

                    case ContentExtractorType.MailStore:
                        #region MailStore Extraction...
                    {
                        var mailStoreExtractor = ((IMailStoreExtractor)contentExtractorResult.ContentExtractor);
                        content = mailStoreExtractor.ExtractContent();
                    }
                        #endregion
                        break;

                    case ContentExtractorType.Database:
                        #region Database Extraction...
                    {
                        // We will only get table/column info (individual table extracted text can be quite large):
                        var databaseExtractor = ((IDatabaseExtractor)contentExtractorResult.ContentExtractor);
                        content = databaseExtractor.ExtractContent(Path);
                    }
                        #endregion
                        break;

                    case ContentExtractorType.DocumentStore:
                        #region DocumentStore Extraction...
                    {
                        var docExtractor = ((IDocumentContentExtractor)contentExtractorResult.ContentExtractor);
                        content = docExtractor.ExtractContent();
                    }
                        #endregion
                        break;

                    case ContentExtractorType.Unsupported:
                        #region Unsupported Type Extraction...
                    {
                        //
                        // Binary-to-text extraction: Note, if property ContentExtractionSettings.BinaryToTextOnUnsupportedTypes is false, then calling
                        //                            IUnsupportedExtractor.ExtractContent will only calculate binary hashes without performing binary-to-text.
                        //                            Binary-to-text is not useful for file formats that do not have any textual content (e.g., compressed archives or encrypted files)
                        //                            It is up to the user to filter these formats out using either file format Id or file format classification.
                        //
                        var docExtractor = ((IUnsupportedExtractor)contentExtractorResult.ContentExtractor);
                        content = docExtractor.ExtractContent();
                    }
                        #endregion
                        break;

                    case ContentExtractorType.LargeUnsupported:
                        // Ignore for this example
                        break;

                    case ContentExtractorType.LargeEncodedText:
                        // Ignore for this example
                        break;
                    }
                }
            }

            strBuilder.AppendLine("File Format:");
            strBuilder.AppendLine("------------");
            strBuilder.AppendLine(string.Format("   ID:               {0}", idResult.ID.ToString()));
            strBuilder.AppendLine(string.Format("   Classification:   {0}", idResult.Classification.ToString()));
            strBuilder.AppendLine(string.Format("   MatchType:        {0}", idResult.MatchType.ToString()));
            strBuilder.AppendLine(string.Format("   Text Encoding ID: {0}", idResult.EncodingID.ToString()));
            strBuilder.AppendLine(string.Format("   IsEncrypted:      {0}", idResult.IsEncrypted.ToString()));
            strBuilder.AppendLine(string.Format("   MediaType:        {0}", idResult.MediaType.ToString()));
            strBuilder.AppendLine(string.Format("   Description:      {0}", idResult.Description.ToString()));

            if (content != null)
            {
                strBuilder.AppendLine();
                strBuilder.AppendLine("File Metadata:");
                strBuilder.AppendLine("---------------");

                foreach (var meta in content.Metadata)
                {
                    string value = "";
                    switch (meta.Value.PropertyType)
                    {
                    case PropertyType.Boolean:
                        value = ((BooleanProperty)meta.Value).Value.ToString();
                        break;

                    case PropertyType.DateTime:
                        value = ((DateTimeProperty)meta.Value).Value.ToString();
                        break;

                    case PropertyType.Double:
                        value = ((DoubleProperty)meta.Value).Value.ToString();
                        break;

                    case PropertyType.Int32:
                        value = ((Int32Property)meta.Value).Value.ToString();
                        break;

                    case PropertyType.Int64:
                        value = ((Int64Property)meta.Value).Value.ToString();
                        break;

                    case PropertyType.String:
                        value = ((StringProperty)meta.Value).Value;
                        break;

                    case PropertyType.BooleanList:
                        value = string.Join("; ", ((BooleanListProperty)meta.Value).Value);
                        break;

                    case PropertyType.DateTimeList:
                        value = string.Join("; ", ((DateTimeListProperty)meta.Value).Value);
                        break;

                    case PropertyType.DoubleList:
                        value = string.Join("; ", ((DoubleListProperty)meta.Value).Value);
                        break;

                    case PropertyType.Int32List:
                        value = string.Join("; ", ((Int32ListProperty)meta.Value).Value);
                        break;

                    case PropertyType.Int64List:
                        value = string.Join("; ", ((Int64ListProperty)meta.Value).Value);
                        break;

                    case PropertyType.StringList:
                        value = string.Join("; ", ((StringListProperty)meta.Value).Value);
                        break;
                    }

                    strBuilder.AppendLine(string.Format("   {0,-35} {1}", meta.Key, value));
                }

                strBuilder.AppendLine();
                strBuilder.AppendLine("Custom Metadata:");
                strBuilder.AppendLine("-----------------");

                foreach (var meta in content.CustomMetadata)
                {
                    string value = "";
                    switch (meta.Value.PropertyType)
                    {
                    case PropertyType.Boolean:
                        value = ((BooleanProperty)meta.Value).Value.ToString();
                        break;

                    case PropertyType.DateTime:
                        value = ((DateTimeProperty)meta.Value).Value.ToString();
                        break;

                    case PropertyType.Double:
                        value = ((DoubleProperty)meta.Value).Value.ToString();
                        break;

                    case PropertyType.Int32:
                        value = ((Int32Property)meta.Value).Value.ToString();
                        break;

                    case PropertyType.Int64:
                        value = ((Int64Property)meta.Value).Value.ToString();
                        break;

                    case PropertyType.String:
                        value = ((StringProperty)meta.Value).Value;
                        break;

                    case PropertyType.BooleanList:
                        value = string.Join("; ", ((BooleanListProperty)meta.Value).Value);
                        break;

                    case PropertyType.DateTimeList:
                        value = string.Join("; ", ((DateTimeListProperty)meta.Value).Value);
                        break;

                    case PropertyType.DoubleList:
                        value = string.Join("; ", ((DoubleListProperty)meta.Value).Value);
                        break;

                    case PropertyType.Int32List:
                        value = string.Join("; ", ((Int32ListProperty)meta.Value).Value);
                        break;

                    case PropertyType.Int64List:
                        value = string.Join("; ", ((Int64ListProperty)meta.Value).Value);
                        break;

                    case PropertyType.StringList:
                        value = string.Join("; ", ((StringListProperty)meta.Value).Value);
                        break;
                    }

                    strBuilder.AppendLine(string.Format("   {0,-35} {1}", meta.Key, value));
                }

                strBuilder.AppendLine();
                strBuilder.AppendLine("File Attributes:");
                strBuilder.AppendLine("----------------");
                if (content.Attributes.Count > 0)
                {
                    foreach (var attr in content.Attributes)
                    {
                        strBuilder.AppendLine(string.Format("   {0}", attr.ToString()));
                    }
                }
                strBuilder.AppendLine();


                if (content is DatabaseContent)
                {
                    var dbContent = (DatabaseContent)content;

                    strBuilder.AppendLine("Database Tables:");
                    strBuilder.AppendLine("----------------");
                    if (dbContent.Tables != null && dbContent.Tables.Count > 0)
                    {
                        strBuilder.AppendLine("   [Name]                        [Row Count]         [Num Columns]  [Is User Table] ");

                        foreach (var table in dbContent.Tables)
                        {
                            strBuilder.AppendLine(string.Format("   {0,-30} {1,-20}  {2,-15}  {3}", table.Name, table.RowCount,
                                                                table.Columns != null ? table.Columns.Count.ToString() : "0", table.IsUserTable.ToString()));
                        }
                    }
                    strBuilder.AppendLine();
                }

                strBuilder.AppendLine("File Hyperlinks:");
                strBuilder.AppendLine("----------------");
                if (content.HyperLinks != null && content.HyperLinks.Count > 0)
                {
                    foreach (var link in content.HyperLinks)
                    {
                        strBuilder.AppendLine(string.Format("   {0}", link.Url));
                    }
                }
                strBuilder.AppendLine();

                strBuilder.AppendLine();
                strBuilder.AppendLine("Detected Sensitive Items:");
                strBuilder.AppendLine("-------------------------");
                if (content.SensitiveItemResult != null && content.SensitiveItemResult.Items.Count > 0)
                {
                    foreach (var item in content.SensitiveItemResult.Items)
                    {
                        strBuilder.AppendLine(string.Format("   {0,-30} {1,-20}  {2,-15}  {3}", item.ItemType.ToString(), item.MatchType.ToString(), item.LocationType.ToString(), item.Text));
                    }
                }
                strBuilder.AppendLine();


                strBuilder.AppendLine();
                strBuilder.AppendLine("Detected Languages:");
                strBuilder.AppendLine("-------------------");
                if (content.LanguageIdResults != null && content.LanguageIdResults.Count > 0)
                {
                    foreach (var langIdResult in content.LanguageIdResults)
                    {
                        strBuilder.AppendLine(string.Format("   {0,-30} {1,-20}  {2,-15}", langIdResult.Language, langIdResult.LangIso639, langIdResult.PercentOfFullText));
                    }
                }
                strBuilder.AppendLine();

                if (ShowText)
                {
                    strBuilder.AppendLine();
                    if (content.ExtractedText != null)
                    {
                        var charsToDisplay = Math.Min(1000, content.ExtractedText.Length);
                        strBuilder.AppendLine(string.Format("Extracted Text: Total Chars = {0}, Displayed Chars = {1}", content.ExtractedText.Length, charsToDisplay));
                        strBuilder.AppendLine("-------------------------------------------------------------------");
                        strBuilder.AppendLine(content.ExtractedText.Substring(0, charsToDisplay));
                        strBuilder.AppendLine();
                    }
                    else
                    {
                        strBuilder.AppendLine(string.Format("Extracted Text: Total Chars = {0}, Displayed Chars = {1}", 0, 0));
                        strBuilder.AppendLine("-------------------------------------------------------------------");
                        strBuilder.AppendLine();
                    }
                }
            }

            WriteObject(strBuilder.ToString());
        }

C# (CSharp) DocumentIdentifier примеры использования