コード例 #1
0
        public void Index(byte[] data, string filename, Clue clue)
        {
            if (data == null)
            {
                throw new ArgumentNullException(nameof(data));
            }

            if (clue == null)
            {
                throw new ArgumentNullException(nameof(clue));
            }

            if (!ConfigurationManagerEx.AppSettings.GetFlag("Crawl.InitialCrawl.FileIndexing", true))
            {
                return;
            }

            if (data.Length > Constants.MaxFileIndexingFileSize)
            {
                return;
            }

            using (var tempFile = new TemporaryFile(filename))
            {
                CreatePhysicalFile(data, tempFile);

                FileCrawlingUtility.IndexFile(tempFile, clue.Data, clue, null, _applicationContext);
            }
        }
コード例 #2
0
        public void Index(FileInfo item, Clue clue, AgentJobProcessorState <FileSystemCrawlJobData> state)
        {
            if (item == null)
            {
                throw new ArgumentNullException(nameof(item));
            }

            if (clue == null)
            {
                throw new ArgumentNullException(nameof(clue));
            }

            if (item.Exists == false)
            {
                return;
            }

            if (item.Length == 0)
            {
                return;
            }

            if (!ConfigurationManager.AppSettings.GetFlag("Crawl.InitialCrawl.FileIndexing", true))
            {
                return;
            }

            FileCrawlingUtility.IndexFile(item, clue.Data, clue, args, context);
        }
コード例 #3
0
        public void OnNext(AttachmentModel item)
        {
            try
            {
                var value = item.Attachment;
                var clue  = this.factory.Create(item);
                var data  = clue.Data.EntityData;

                data.Name             = value.Name;
                data.DocumentSize     = value.Size;
                data.DocumentMimeType = value.ContentType;
                data.ModifiedDate     = value.LastModifiedTime;

                ////This is the Uri of the Mail, as there are no direct Uri to Attachments.
                //data.EntityData.Uri                 = uri;

                if (value is Microsoft.Exchange.WebServices.Data.FileAttachment fileAttachment)
                {
                    if (!AttachmentHelper.IsFiltered(this.state, value))
                    {
                        using (var tempFile = new TemporaryFile(fileAttachment.Name))
                        {
                            fileAttachment.ExLoad(this.state, tempFile.FilePath);

                            var mimeType = tempFile.FileInfo.ToMimeType();

                            data.DocumentFileName = fileAttachment.Name;
                            data.DocumentSize     = tempFile.FileInfo.Length;
                            data.DocumentMimeType = mimeType.Code;

                            FileCrawlingUtility.IndexFile(tempFile, clue.Data, clue, this.state, this.appContext);
                        }
                    }
                }
                else
                {
                    this.state.Status.Statistics.Clues.IncrementTaskFailureCount();
                    this.state.Status.Statistics.Tasks.IncrementTaskFailureCount();
                    this.state.Log.Error(() => "Could not cast Mail Attachment as FileAttachment in Exchange: " + value.GetType().FullName);
                    return;
                }

                this.observer.OnNext(clue);
            }
            catch (Exception ex)
            {
                this.state.Status.Statistics.Clues.IncrementTaskFailureCount();
                this.state.Status.Statistics.Tasks.IncrementTaskFailureCount();
                this.state.Log.Error(() => "Attachment Observer Exception", ex);
            }
        }
コード例 #4
0
        public async Task Index(Metadata file, Clue clue)
        {
            if (file == null)
            {
                throw new ArgumentNullException(nameof(file));
            }

            if (clue == null)
            {
                throw new ArgumentNullException(nameof(clue));
            }

            if (file.AsFile.Size == 0)
            {
                return;
            }

            if (!ConfigurationManager.AppSettings.GetFlag("Crawl.InitialCrawl.FileIndexing", true))
            {
                return;
            }

            if ((long)file.AsFile.Size > Constants.MaxFileIndexingFileSize)
            {
                return;
            }

            var f = await _client.DownloadAsync(file.PathLower, file.AsFile.Rev);

            using (var tempFile = new TemporaryFile(CleanFileName(file.Name)))
            {
                using (var stream = await f.GetContentAsStreamAsync())
                    using (var fs = new FileStream(tempFile.FileInfo.FullName, FileMode.OpenOrCreate, FileAccess.Write))
                    {
                        await stream.CopyToAsync(fs).ConfigureAwait(false);
                    }

                FileCrawlingUtility.IndexFile(tempFile, clue.Data, clue, _args, _context);
            }
        }
コード例 #5
0
        private void Index([NotNull] CluedInDriveItem input, [NotNull] string webUrl, [NotNull] Clue clue)
        {
            var data  = clue.Data;
            var value = input.DriveItem;

            string hash;

            if (value.Size <= CluedIn.Core.Constants.MaxFileIndexingFileSize)
            {
                try
                {
                    using (var tempFile = new TemporaryFile(value.Name))
                    {
                        using (var webClient = new WebClient())
                        {
                            Stream file = new MemoryStream(webClient.DownloadData(webUrl));
                            using (var md5 = MD5.Create())
                            {
                                using (var stream = file)
                                {
                                    var hashBytes = md5.ComputeHash(stream);

                                    hash = BitConverter.ToString(hashBytes);

                                    using (var fileStream = System.IO.File.Create(tempFile.FilePath))
                                    {
                                        file.Seek(0, SeekOrigin.Begin);
                                        file.CopyTo(fileStream);
                                    }
                                }
                            }
                            file.Close();
                        }


                        data.EntityData.Codes.Add(new EntityCode(EntityType.Files.File, OneDriveConstants.CodeOrigin, hash));

                        //MimeType mimeType = tempFile.FileInfo.ToMimeType();

                        if (value.Name != null)
                        {
                            data.EntityData.DocumentFileName = value.Name;
                        }

                        data.EntityData.DocumentSize = tempFile.FileInfo.Length;
                        //data.EntityData.DocumentMimeType = mimeType.Code;
                        data.EntityData.Properties[OneDriveVocabularies.File.Hash] = hash;

                        FileCrawlingUtility.IndexFile(tempFile, clue.Data, clue, state, appContext);
                    }
                }
                catch (Exception ex)
                {
                    appContext.Container.GetLogger().Error(() => ex.Message, ex);
                }
            }
            else
            {
                data.EntityData.DocumentFileName = value.Name;
                data.EntityData.DocumentSize     = value.Size;
            }
        }