public void Index(byte[] data, string filename, Clue clue) { if (data == null) { throw new ArgumentNullException(nameof(data)); } if (clue == null) { throw new ArgumentNullException(nameof(clue)); } if (!ConfigurationManagerEx.AppSettings.GetFlag("Crawl.InitialCrawl.FileIndexing", true)) { return; } if (data.Length > Constants.MaxFileIndexingFileSize) { return; } using (var tempFile = new TemporaryFile(filename)) { CreatePhysicalFile(data, tempFile); FileCrawlingUtility.IndexFile(tempFile, clue.Data, clue, null, _applicationContext); } }
public void Index(FileInfo item, Clue clue, AgentJobProcessorState <FileSystemCrawlJobData> state) { if (item == null) { throw new ArgumentNullException(nameof(item)); } if (clue == null) { throw new ArgumentNullException(nameof(clue)); } if (item.Exists == false) { return; } if (item.Length == 0) { return; } if (!ConfigurationManager.AppSettings.GetFlag("Crawl.InitialCrawl.FileIndexing", true)) { return; } FileCrawlingUtility.IndexFile(item, clue.Data, clue, args, context); }
public void OnNext(AttachmentModel item) { try { var value = item.Attachment; var clue = this.factory.Create(item); var data = clue.Data.EntityData; data.Name = value.Name; data.DocumentSize = value.Size; data.DocumentMimeType = value.ContentType; data.ModifiedDate = value.LastModifiedTime; ////This is the Uri of the Mail, as there are no direct Uri to Attachments. //data.EntityData.Uri = uri; if (value is Microsoft.Exchange.WebServices.Data.FileAttachment fileAttachment) { if (!AttachmentHelper.IsFiltered(this.state, value)) { using (var tempFile = new TemporaryFile(fileAttachment.Name)) { fileAttachment.ExLoad(this.state, tempFile.FilePath); var mimeType = tempFile.FileInfo.ToMimeType(); data.DocumentFileName = fileAttachment.Name; data.DocumentSize = tempFile.FileInfo.Length; data.DocumentMimeType = mimeType.Code; FileCrawlingUtility.IndexFile(tempFile, clue.Data, clue, this.state, this.appContext); } } } else { this.state.Status.Statistics.Clues.IncrementTaskFailureCount(); this.state.Status.Statistics.Tasks.IncrementTaskFailureCount(); this.state.Log.Error(() => "Could not cast Mail Attachment as FileAttachment in Exchange: " + value.GetType().FullName); return; } this.observer.OnNext(clue); } catch (Exception ex) { this.state.Status.Statistics.Clues.IncrementTaskFailureCount(); this.state.Status.Statistics.Tasks.IncrementTaskFailureCount(); this.state.Log.Error(() => "Attachment Observer Exception", ex); } }
public async Task Index(Metadata file, Clue clue) { if (file == null) { throw new ArgumentNullException(nameof(file)); } if (clue == null) { throw new ArgumentNullException(nameof(clue)); } if (file.AsFile.Size == 0) { return; } if (!ConfigurationManager.AppSettings.GetFlag("Crawl.InitialCrawl.FileIndexing", true)) { return; } if ((long)file.AsFile.Size > Constants.MaxFileIndexingFileSize) { return; } var f = await _client.DownloadAsync(file.PathLower, file.AsFile.Rev); using (var tempFile = new TemporaryFile(CleanFileName(file.Name))) { using (var stream = await f.GetContentAsStreamAsync()) using (var fs = new FileStream(tempFile.FileInfo.FullName, FileMode.OpenOrCreate, FileAccess.Write)) { await stream.CopyToAsync(fs).ConfigureAwait(false); } FileCrawlingUtility.IndexFile(tempFile, clue.Data, clue, _args, _context); } }
private void Index([NotNull] CluedInDriveItem input, [NotNull] string webUrl, [NotNull] Clue clue) { var data = clue.Data; var value = input.DriveItem; string hash; if (value.Size <= CluedIn.Core.Constants.MaxFileIndexingFileSize) { try { using (var tempFile = new TemporaryFile(value.Name)) { using (var webClient = new WebClient()) { Stream file = new MemoryStream(webClient.DownloadData(webUrl)); using (var md5 = MD5.Create()) { using (var stream = file) { var hashBytes = md5.ComputeHash(stream); hash = BitConverter.ToString(hashBytes); using (var fileStream = System.IO.File.Create(tempFile.FilePath)) { file.Seek(0, SeekOrigin.Begin); file.CopyTo(fileStream); } } } file.Close(); } data.EntityData.Codes.Add(new EntityCode(EntityType.Files.File, OneDriveConstants.CodeOrigin, hash)); //MimeType mimeType = tempFile.FileInfo.ToMimeType(); if (value.Name != null) { data.EntityData.DocumentFileName = value.Name; } data.EntityData.DocumentSize = tempFile.FileInfo.Length; //data.EntityData.DocumentMimeType = mimeType.Code; data.EntityData.Properties[OneDriveVocabularies.File.Hash] = hash; FileCrawlingUtility.IndexFile(tempFile, clue.Data, clue, state, appContext); } } catch (Exception ex) { appContext.Container.GetLogger().Error(() => ex.Message, ex); } } else { data.EntityData.DocumentFileName = value.Name; data.EntityData.DocumentSize = value.Size; } }