private void IndexContent(string content, Clue clue) { if (string.IsNullOrEmpty(content)) { return; } try { using (var tempFile = new TemporaryFile("item.html")) { byte[] bytes = new byte[content.Length * sizeof(char)]; Buffer.BlockCopy(content.ToCharArray(), 0, bytes, 0, bytes.Length); using (var stream = new MemoryStream(bytes)) using (var fileStream = File.Create(tempFile.FilePath)) { stream.Seek(0, SeekOrigin.Begin); stream.CopyTo(fileStream); } FileCrawlingUtility.ExtractContents(tempFile, clue.Data, clue, this.state, this.appContext); } } catch (Exception exception) { this.state.Log.Error(() => "Error Indexing Content", exception); } }
public void Index(byte[] data, string filename, Clue clue) { if (data == null) { throw new ArgumentNullException(nameof(data)); } if (clue == null) { throw new ArgumentNullException(nameof(clue)); } if (!ConfigurationManagerEx.AppSettings.GetFlag("Crawl.InitialCrawl.FileIndexing", true)) { return; } if (data.Length > Constants.MaxFileIndexingFileSize) { return; } using (var tempFile = new TemporaryFile(filename)) { CreatePhysicalFile(data, tempFile); FileCrawlingUtility.IndexFile(tempFile, clue.Data, clue, null, _applicationContext); } }
public void Index(FileInfo item, Clue clue, AgentJobProcessorState <FileSystemCrawlJobData> state) { if (item == null) { throw new ArgumentNullException(nameof(item)); } if (clue == null) { throw new ArgumentNullException(nameof(clue)); } if (item.Exists == false) { return; } if (item.Length == 0) { return; } if (!ConfigurationManager.AppSettings.GetFlag("Crawl.InitialCrawl.FileIndexing", true)) { return; } FileCrawlingUtility.IndexFile(item, clue.Data, clue, args, context); }
public void OnNext(AttachmentModel item) { try { var value = item.Attachment; var clue = this.factory.Create(item); var data = clue.Data.EntityData; data.Name = value.Name; data.DocumentSize = value.Size; data.DocumentMimeType = value.ContentType; data.ModifiedDate = value.LastModifiedTime; ////This is the Uri of the Mail, as there are no direct Uri to Attachments. //data.EntityData.Uri = uri; if (value is Microsoft.Exchange.WebServices.Data.FileAttachment fileAttachment) { if (!AttachmentHelper.IsFiltered(this.state, value)) { using (var tempFile = new TemporaryFile(fileAttachment.Name)) { fileAttachment.ExLoad(this.state, tempFile.FilePath); var mimeType = tempFile.FileInfo.ToMimeType(); data.DocumentFileName = fileAttachment.Name; data.DocumentSize = tempFile.FileInfo.Length; data.DocumentMimeType = mimeType.Code; FileCrawlingUtility.IndexFile(tempFile, clue.Data, clue, this.state, this.appContext); } } } else { this.state.Status.Statistics.Clues.IncrementTaskFailureCount(); this.state.Status.Statistics.Tasks.IncrementTaskFailureCount(); this.state.Log.Error(() => "Could not cast Mail Attachment as FileAttachment in Exchange: " + value.GetType().FullName); return; } this.observer.OnNext(clue); } catch (Exception ex) { this.state.Status.Statistics.Clues.IncrementTaskFailureCount(); this.state.Status.Statistics.Tasks.IncrementTaskFailureCount(); this.state.Log.Error(() => "Attachment Observer Exception", ex); } }
public async Task Index(Metadata file, Clue clue) { if (file == null) { throw new ArgumentNullException(nameof(file)); } if (clue == null) { throw new ArgumentNullException(nameof(clue)); } if (file.AsFile.Size == 0) { return; } if (!ConfigurationManager.AppSettings.GetFlag("Crawl.InitialCrawl.FileIndexing", true)) { return; } if ((long)file.AsFile.Size > Constants.MaxFileIndexingFileSize) { return; } var f = await _client.DownloadAsync(file.PathLower, file.AsFile.Rev); using (var tempFile = new TemporaryFile(CleanFileName(file.Name))) { using (var stream = await f.GetContentAsStreamAsync()) using (var fs = new FileStream(tempFile.FileInfo.FullName, FileMode.OpenOrCreate, FileAccess.Write)) { await stream.CopyToAsync(fs).ConfigureAwait(false); } FileCrawlingUtility.IndexFile(tempFile, clue.Data, clue, _args, _context); } }
protected void PopulateContact(Clue clue, ContactModel contact, ExchangeService service) { var value = contact.Object; var data = clue.Data.EntityData; data.Name = value.ExPrintIfAvailable(v => v.DisplayName); data.CreatedDate = value.ExGetIfAvailable(v => v.DateTimeCreated, data.CreatedDate); data.Culture = CultureInfo.InvariantCulture; this.PopulateItem(clue, contact, ExchangeSharedMailboxVocabulary.Contact, service); contact.LoadContactSchemaProperties(); data.DisplayName = value.ExPrintIfAvailable(v => v.DisplayName) ?? data.DisplayName; data.Properties[ExchangeSharedMailboxVocabulary.Contact.Alias] = value.ExPrintIfAvailable(v => v.Alias); data.Properties[ExchangeSharedMailboxVocabulary.Contact.AssistantName] = value.ExPrintIfAvailable(v => v.AssistantName); data.Properties[ExchangeSharedMailboxVocabulary.Contact.Birthday] = value.ExPrintIfAvailable(v => v.Birthday); data.Properties[ExchangeSharedMailboxVocabulary.Contact.BusinessHomePage] = value.ExPrintIfAvailable(v => v.BusinessHomePage); data.Properties[ExchangeSharedMailboxVocabulary.Contact.Companies] = value.ExPrintIfAvailable(v => v.Companies); data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompanyName] = value.ExPrintIfAvailable(v => v.CompanyName); data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.FullName] = value.ExPrintIfAvailable(v => v.CompleteName?.FullName); data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.GivenName] = value.ExPrintIfAvailable(v => v.CompleteName?.GivenName); data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.Initials] = value.ExPrintIfAvailable(v => v.CompleteName?.Initials); data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.MiddleName] = value.ExPrintIfAvailable(v => v.CompleteName?.MiddleName); data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.NickName] = value.ExPrintIfAvailable(v => v.CompleteName?.NickName); data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.Suffix] = value.ExPrintIfAvailable(v => v.CompleteName?.Suffix); data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.Surname] = value.ExPrintIfAvailable(v => v.CompleteName?.Surname); data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.Title] = value.ExPrintIfAvailable(v => v.CompleteName?.Title); data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.YomiGivenName] = value.ExPrintIfAvailable(v => v.CompleteName?.YomiGivenName); data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.YomiSurname] = value.ExPrintIfAvailable(v => v.CompleteName?.YomiSurname); data.Properties[ExchangeSharedMailboxVocabulary.Contact.ContactSource] = value.ExPrintIfAvailable(v => v.ContactSource); data.Properties[ExchangeSharedMailboxVocabulary.Contact.Department] = value.ExPrintIfAvailable(v => v.Department); data.Properties[ExchangeSharedMailboxVocabulary.Contact.DirectoryId] = value.ExPrintIfAvailable(v => v.DirectoryId); data.Properties[ExchangeSharedMailboxVocabulary.Contact.DisplayName] = value.ExPrintIfAvailable(v => v.DisplayName); data.Properties[ExchangeSharedMailboxVocabulary.Contact.FileAs] = value.ExPrintIfAvailable(v => v.FileAs); data.Properties[ExchangeSharedMailboxVocabulary.Contact.FileAsMapping] = value.ExPrintIfAvailable(v => v.FileAsMapping != FileAsMapping.None ? v.FileAsMapping.ToString() : null); data.Properties[ExchangeSharedMailboxVocabulary.Contact.Generation] = value.ExPrintIfAvailable(v => v.Generation); data.Properties[ExchangeSharedMailboxVocabulary.Contact.GivenName] = value.ExPrintIfAvailable(v => v.GivenName); data.Properties[ExchangeSharedMailboxVocabulary.Contact.HasPicture] = value.ExPrintIfAvailable(v => v.HasPicture); data.Properties[ExchangeSharedMailboxVocabulary.Contact.Initials] = value.ExPrintIfAvailable(v => v.Initials); data.Properties[ExchangeSharedMailboxVocabulary.Contact.JobTitle] = value.ExPrintIfAvailable(v => v.JobTitle); data.Properties[ExchangeSharedMailboxVocabulary.Contact.Manager] = value.ExPrintIfAvailable(v => v.Manager); data.Properties[ExchangeSharedMailboxVocabulary.Contact.MiddleName] = value.ExPrintIfAvailable(v => v.MiddleName); data.Properties[ExchangeSharedMailboxVocabulary.Contact.Mileage] = value.ExPrintIfAvailable(v => v.Mileage); data.Properties[ExchangeSharedMailboxVocabulary.Contact.NickName] = value.ExPrintIfAvailable(v => v.NickName); data.Properties[ExchangeSharedMailboxVocabulary.Contact.Notes] = value.ExPrintIfAvailable(v => v.Notes); data.Properties[ExchangeSharedMailboxVocabulary.Contact.OfficeLocation] = value.ExPrintIfAvailable(v => v.OfficeLocation); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneticFirstName] = value.ExPrintIfAvailable(v => v.PhoneticFirstName); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneticFullName] = value.ExPrintIfAvailable(v => v.PhoneticFullName); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneticLastName] = value.ExPrintIfAvailable(v => v.PhoneticLastName); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PostalAddressIndex] = value.ExPrintIfAvailable(v => v.PostalAddressIndex); data.Properties[ExchangeSharedMailboxVocabulary.Contact.Profession] = value.ExPrintIfAvailable(v => v.Profession); data.Properties[ExchangeSharedMailboxVocabulary.Contact.SpouseName] = value.ExPrintIfAvailable(v => v.SpouseName); data.Properties[ExchangeSharedMailboxVocabulary.Contact.Surname] = value.ExPrintIfAvailable(v => v.Surname); data.Properties[ExchangeSharedMailboxVocabulary.Contact.WeddingAnniversary] = value.ExPrintIfAvailable(v => v.WeddingAnniversary); // EmailAddresses try { var emails = contact.GetEmailAddresses(); var filteredEmails = contact.GetSmtpEmailAddresses(emails).ToList(); if (filteredEmails.Any()) { data.Properties[ExchangeSharedMailboxVocabulary.Contact.EmailAddress] = filteredEmails.First(); if (filteredEmails.Count > 1) { data.Properties[ExchangeSharedMailboxVocabulary.Contact.EmailAddresses] = string.Join(";", filteredEmails); } data.Aliases.AddRange(filteredEmails); } clue.Data.EntityData.Codes.AddRange(contact.CreateEntityCodesFromEmailAddresses(emails)); } catch (ServiceObjectPropertyException e) { this.state.Log.Warn(() => "Could not get email addresses", e); } // PhoneNumbers { data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.AssistantPhone] = GetPhoneNumber(value, PhoneNumberKey.AssistantPhone); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.BusinessFax] = GetPhoneNumber(value, PhoneNumberKey.BusinessFax); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.BusinessPhone] = GetPhoneNumber(value, PhoneNumberKey.BusinessPhone); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.BusinessPhone2] = GetPhoneNumber(value, PhoneNumberKey.BusinessPhone2); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.Callback] = GetPhoneNumber(value, PhoneNumberKey.Callback); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.CarPhone] = GetPhoneNumber(value, PhoneNumberKey.CarPhone); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.CompanyMainPhone] = GetPhoneNumber(value, PhoneNumberKey.CompanyMainPhone); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.HomeFax] = GetPhoneNumber(value, PhoneNumberKey.HomeFax); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.HomePhone] = GetPhoneNumber(value, PhoneNumberKey.HomePhone); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.HomePhone2] = GetPhoneNumber(value, PhoneNumberKey.HomePhone2); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.Isdn] = GetPhoneNumber(value, PhoneNumberKey.Isdn); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.MobilePhone] = GetPhoneNumber(value, PhoneNumberKey.MobilePhone); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.OtherFax] = GetPhoneNumber(value, PhoneNumberKey.OtherFax); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.BusinessFax] = GetPhoneNumber(value, PhoneNumberKey.BusinessFax); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.Pager] = GetPhoneNumber(value, PhoneNumberKey.Pager); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.OtherTelephone] = GetPhoneNumber(value, PhoneNumberKey.OtherTelephone); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.RadioPhone] = GetPhoneNumber(value, PhoneNumberKey.RadioPhone); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.Telex] = GetPhoneNumber(value, PhoneNumberKey.Telex); data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.TtyTddPhone] = GetPhoneNumber(value, PhoneNumberKey.TtyTddPhone); } // DirectoryPhoto try { var version = (int)contact.Service.RequestedServerVersion; var additional = new List <PropertyDefinitionBase>() { ContactSchema.Photo }; var filtered = additional.Where(p => ((int)p.Version) <= version).ToArray(); if (filtered.Any()) { var tmp = new PropertySet(BasePropertySet.FirstClassProperties, filtered.Concat(new[] { ItemSchema.Attachments })); if (value.Id != null) { value.ExLoad(this.state, tmp); } var directoryPhoto = value.ExGetIfAvailable(v => v.DirectoryPhoto, null); if (directoryPhoto != null) { using (var stream = new MemoryStream(directoryPhoto)) { var mimeType = FileCrawlingUtility.DetectMimeType(new FileInfo("PreviewImage"), new DataPart(), stream, this.state); if (mimeType.FileDescriptor.Category == FileCategory.ImageBitmap) { var rawDataPart = new RawDataPart() { Type = "/RawData/PreviewImage", MimeType = mimeType.Code, FileName = "preview_{0}".FormatWith(data.OriginEntityCode.Key), RawDataMD5 = FileHashUtility.GetMD5Base64String(directoryPhoto), RawData = Convert.ToBase64String(directoryPhoto) }; clue.Details.RawData.Add(rawDataPart); data.PreviewImage = new ImageReferencePart(rawDataPart); } else { this.state.Log.Info(() => $"DirectoryPhoto is not an image: {mimeType.Code}"); } } } } if (value.Attachments != null) { var contactPhotos = value.Attachments.Where(a => a is FileAttachment && ((FileAttachment)a).IsContactPhoto).Cast <FileAttachment>().ToList(); var contactPhoto = contactPhotos.FirstOrDefault(); if (contactPhoto != null) { using (var tempFile = new TemporaryFile(contactPhoto.Name)) { contactPhoto.ExLoad(this.state, tempFile.FilePath); var mimeType = tempFile.FileInfo.ToMimeType(); using (var stream = File.OpenRead(tempFile.FilePath)) { mimeType = FileCrawlingUtility.DetectMimeType(tempFile.FileInfo, new DataPart(), stream, this.state); } if (mimeType.FileDescriptor.Category == FileCategory.ImageBitmap) { using (var stream = File.OpenRead(tempFile.FilePath)) { var bytes = stream.ToArray(); var rawDataPart = new RawDataPart() { Type = "/RawData/PreviewImage", MimeType = mimeType.Code, FileName = "preview_{0}".FormatWith(data.OriginEntityCode.Key), RawDataMD5 = FileHashUtility.GetMD5Base64String(bytes), RawData = Convert.ToBase64String(bytes) }; clue.Details.RawData.Add(rawDataPart); data.PreviewImage = new ImageReferencePart(rawDataPart); } } else { this.state.Log.Info(() => $"ContactPhoto is not an image: {mimeType.Code}"); } } } } } catch (OperationCanceledException) { } catch (Exception e) { this.state.Log.Warn(() => "Could not get DirectoryPhoto", e); } }
private void Index([NotNull] CluedInDriveItem input, [NotNull] string webUrl, [NotNull] Clue clue) { var data = clue.Data; var value = input.DriveItem; string hash; if (value.Size <= CluedIn.Core.Constants.MaxFileIndexingFileSize) { try { using (var tempFile = new TemporaryFile(value.Name)) { using (var webClient = new WebClient()) { Stream file = new MemoryStream(webClient.DownloadData(webUrl)); using (var md5 = MD5.Create()) { using (var stream = file) { var hashBytes = md5.ComputeHash(stream); hash = BitConverter.ToString(hashBytes); using (var fileStream = System.IO.File.Create(tempFile.FilePath)) { file.Seek(0, SeekOrigin.Begin); file.CopyTo(fileStream); } } } file.Close(); } data.EntityData.Codes.Add(new EntityCode(EntityType.Files.File, OneDriveConstants.CodeOrigin, hash)); //MimeType mimeType = tempFile.FileInfo.ToMimeType(); if (value.Name != null) { data.EntityData.DocumentFileName = value.Name; } data.EntityData.DocumentSize = tempFile.FileInfo.Length; //data.EntityData.DocumentMimeType = mimeType.Code; data.EntityData.Properties[OneDriveVocabularies.File.Hash] = hash; FileCrawlingUtility.IndexFile(tempFile, clue.Data, clue, state, appContext); } } catch (Exception ex) { appContext.Container.GetLogger().Error(() => ex.Message, ex); } } else { data.EntityData.DocumentFileName = value.Name; data.EntityData.DocumentSize = value.Size; } }