Ejemplo n.º 1
0
        private void IndexContent(string content, Clue clue)
        {
            if (string.IsNullOrEmpty(content))
            {
                return;
            }

            try
            {
                using (var tempFile = new TemporaryFile("item.html"))
                {
                    byte[] bytes = new byte[content.Length * sizeof(char)];
                    Buffer.BlockCopy(content.ToCharArray(), 0, bytes, 0, bytes.Length);

                    using (var stream = new MemoryStream(bytes))
                        using (var fileStream = File.Create(tempFile.FilePath))
                        {
                            stream.Seek(0, SeekOrigin.Begin);
                            stream.CopyTo(fileStream);
                        }

                    FileCrawlingUtility.ExtractContents(tempFile, clue.Data, clue, this.state, this.appContext);
                }
            }
            catch (Exception exception)
            {
                this.state.Log.Error(() => "Error Indexing Content", exception);
            }
        }
        public void Index(byte[] data, string filename, Clue clue)
        {
            if (data == null)
            {
                throw new ArgumentNullException(nameof(data));
            }

            if (clue == null)
            {
                throw new ArgumentNullException(nameof(clue));
            }

            if (!ConfigurationManagerEx.AppSettings.GetFlag("Crawl.InitialCrawl.FileIndexing", true))
            {
                return;
            }

            if (data.Length > Constants.MaxFileIndexingFileSize)
            {
                return;
            }

            using (var tempFile = new TemporaryFile(filename))
            {
                CreatePhysicalFile(data, tempFile);

                FileCrawlingUtility.IndexFile(tempFile, clue.Data, clue, null, _applicationContext);
            }
        }
Ejemplo n.º 3
0
        public void Index(FileInfo item, Clue clue, AgentJobProcessorState <FileSystemCrawlJobData> state)
        {
            if (item == null)
            {
                throw new ArgumentNullException(nameof(item));
            }

            if (clue == null)
            {
                throw new ArgumentNullException(nameof(clue));
            }

            if (item.Exists == false)
            {
                return;
            }

            if (item.Length == 0)
            {
                return;
            }

            if (!ConfigurationManager.AppSettings.GetFlag("Crawl.InitialCrawl.FileIndexing", true))
            {
                return;
            }

            FileCrawlingUtility.IndexFile(item, clue.Data, clue, args, context);
        }
Ejemplo n.º 4
0
        public void OnNext(AttachmentModel item)
        {
            try
            {
                var value = item.Attachment;
                var clue  = this.factory.Create(item);
                var data  = clue.Data.EntityData;

                data.Name             = value.Name;
                data.DocumentSize     = value.Size;
                data.DocumentMimeType = value.ContentType;
                data.ModifiedDate     = value.LastModifiedTime;

                ////This is the Uri of the Mail, as there are no direct Uri to Attachments.
                //data.EntityData.Uri                 = uri;

                if (value is Microsoft.Exchange.WebServices.Data.FileAttachment fileAttachment)
                {
                    if (!AttachmentHelper.IsFiltered(this.state, value))
                    {
                        using (var tempFile = new TemporaryFile(fileAttachment.Name))
                        {
                            fileAttachment.ExLoad(this.state, tempFile.FilePath);

                            var mimeType = tempFile.FileInfo.ToMimeType();

                            data.DocumentFileName = fileAttachment.Name;
                            data.DocumentSize     = tempFile.FileInfo.Length;
                            data.DocumentMimeType = mimeType.Code;

                            FileCrawlingUtility.IndexFile(tempFile, clue.Data, clue, this.state, this.appContext);
                        }
                    }
                }
                else
                {
                    this.state.Status.Statistics.Clues.IncrementTaskFailureCount();
                    this.state.Status.Statistics.Tasks.IncrementTaskFailureCount();
                    this.state.Log.Error(() => "Could not cast Mail Attachment as FileAttachment in Exchange: " + value.GetType().FullName);
                    return;
                }

                this.observer.OnNext(clue);
            }
            catch (Exception ex)
            {
                this.state.Status.Statistics.Clues.IncrementTaskFailureCount();
                this.state.Status.Statistics.Tasks.IncrementTaskFailureCount();
                this.state.Log.Error(() => "Attachment Observer Exception", ex);
            }
        }
Ejemplo n.º 5
0
        public async Task Index(Metadata file, Clue clue)
        {
            if (file == null)
            {
                throw new ArgumentNullException(nameof(file));
            }

            if (clue == null)
            {
                throw new ArgumentNullException(nameof(clue));
            }

            if (file.AsFile.Size == 0)
            {
                return;
            }

            if (!ConfigurationManager.AppSettings.GetFlag("Crawl.InitialCrawl.FileIndexing", true))
            {
                return;
            }

            if ((long)file.AsFile.Size > Constants.MaxFileIndexingFileSize)
            {
                return;
            }

            var f = await _client.DownloadAsync(file.PathLower, file.AsFile.Rev);

            using (var tempFile = new TemporaryFile(CleanFileName(file.Name)))
            {
                using (var stream = await f.GetContentAsStreamAsync())
                    using (var fs = new FileStream(tempFile.FileInfo.FullName, FileMode.OpenOrCreate, FileAccess.Write))
                    {
                        await stream.CopyToAsync(fs).ConfigureAwait(false);
                    }

                FileCrawlingUtility.IndexFile(tempFile, clue.Data, clue, _args, _context);
            }
        }
Ejemplo n.º 6
0
        protected void PopulateContact(Clue clue, ContactModel contact, ExchangeService service)
        {
            var value = contact.Object;
            var data  = clue.Data.EntityData;

            data.Name        = value.ExPrintIfAvailable(v => v.DisplayName);
            data.CreatedDate = value.ExGetIfAvailable(v => v.DateTimeCreated, data.CreatedDate);
            data.Culture     = CultureInfo.InvariantCulture;

            this.PopulateItem(clue, contact, ExchangeSharedMailboxVocabulary.Contact, service);

            contact.LoadContactSchemaProperties();

            data.DisplayName = value.ExPrintIfAvailable(v => v.DisplayName) ?? data.DisplayName;

            data.Properties[ExchangeSharedMailboxVocabulary.Contact.Alias]            = value.ExPrintIfAvailable(v => v.Alias);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.AssistantName]    = value.ExPrintIfAvailable(v => v.AssistantName);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.Birthday]         = value.ExPrintIfAvailable(v => v.Birthday);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.BusinessHomePage] = value.ExPrintIfAvailable(v => v.BusinessHomePage);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.Companies]        = value.ExPrintIfAvailable(v => v.Companies);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompanyName]      = value.ExPrintIfAvailable(v => v.CompanyName);

            data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.FullName]      = value.ExPrintIfAvailable(v => v.CompleteName?.FullName);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.GivenName]     = value.ExPrintIfAvailable(v => v.CompleteName?.GivenName);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.Initials]      = value.ExPrintIfAvailable(v => v.CompleteName?.Initials);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.MiddleName]    = value.ExPrintIfAvailable(v => v.CompleteName?.MiddleName);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.NickName]      = value.ExPrintIfAvailable(v => v.CompleteName?.NickName);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.Suffix]        = value.ExPrintIfAvailable(v => v.CompleteName?.Suffix);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.Surname]       = value.ExPrintIfAvailable(v => v.CompleteName?.Surname);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.Title]         = value.ExPrintIfAvailable(v => v.CompleteName?.Title);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.YomiGivenName] = value.ExPrintIfAvailable(v => v.CompleteName?.YomiGivenName);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.CompleteName.YomiSurname]   = value.ExPrintIfAvailable(v => v.CompleteName?.YomiSurname);

            data.Properties[ExchangeSharedMailboxVocabulary.Contact.ContactSource] = value.ExPrintIfAvailable(v => v.ContactSource);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.Department]    = value.ExPrintIfAvailable(v => v.Department);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.DirectoryId]   = value.ExPrintIfAvailable(v => v.DirectoryId);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.DisplayName]   = value.ExPrintIfAvailable(v => v.DisplayName);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.FileAs]        = value.ExPrintIfAvailable(v => v.FileAs);

            data.Properties[ExchangeSharedMailboxVocabulary.Contact.FileAsMapping] = value.ExPrintIfAvailable(v => v.FileAsMapping != FileAsMapping.None ? v.FileAsMapping.ToString() : null);

            data.Properties[ExchangeSharedMailboxVocabulary.Contact.Generation]         = value.ExPrintIfAvailable(v => v.Generation);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.GivenName]          = value.ExPrintIfAvailable(v => v.GivenName);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.HasPicture]         = value.ExPrintIfAvailable(v => v.HasPicture);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.Initials]           = value.ExPrintIfAvailable(v => v.Initials);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.JobTitle]           = value.ExPrintIfAvailable(v => v.JobTitle);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.Manager]            = value.ExPrintIfAvailable(v => v.Manager);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.MiddleName]         = value.ExPrintIfAvailable(v => v.MiddleName);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.Mileage]            = value.ExPrintIfAvailable(v => v.Mileage);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.NickName]           = value.ExPrintIfAvailable(v => v.NickName);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.Notes]              = value.ExPrintIfAvailable(v => v.Notes);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.OfficeLocation]     = value.ExPrintIfAvailable(v => v.OfficeLocation);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneticFirstName]  = value.ExPrintIfAvailable(v => v.PhoneticFirstName);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneticFullName]   = value.ExPrintIfAvailable(v => v.PhoneticFullName);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneticLastName]   = value.ExPrintIfAvailable(v => v.PhoneticLastName);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.PostalAddressIndex] = value.ExPrintIfAvailable(v => v.PostalAddressIndex);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.Profession]         = value.ExPrintIfAvailable(v => v.Profession);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.SpouseName]         = value.ExPrintIfAvailable(v => v.SpouseName);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.Surname]            = value.ExPrintIfAvailable(v => v.Surname);
            data.Properties[ExchangeSharedMailboxVocabulary.Contact.WeddingAnniversary] = value.ExPrintIfAvailable(v => v.WeddingAnniversary);

            // EmailAddresses
            try
            {
                var emails         = contact.GetEmailAddresses();
                var filteredEmails = contact.GetSmtpEmailAddresses(emails).ToList();

                if (filteredEmails.Any())
                {
                    data.Properties[ExchangeSharedMailboxVocabulary.Contact.EmailAddress] = filteredEmails.First();

                    if (filteredEmails.Count > 1)
                    {
                        data.Properties[ExchangeSharedMailboxVocabulary.Contact.EmailAddresses] = string.Join(";", filteredEmails);
                    }

                    data.Aliases.AddRange(filteredEmails);
                }

                clue.Data.EntityData.Codes.AddRange(contact.CreateEntityCodesFromEmailAddresses(emails));
            }
            catch (ServiceObjectPropertyException e)
            {
                this.state.Log.Warn(() => "Could not get email addresses", e);
            }

            // PhoneNumbers
            {
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.AssistantPhone]   = GetPhoneNumber(value, PhoneNumberKey.AssistantPhone);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.BusinessFax]      = GetPhoneNumber(value, PhoneNumberKey.BusinessFax);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.BusinessPhone]    = GetPhoneNumber(value, PhoneNumberKey.BusinessPhone);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.BusinessPhone2]   = GetPhoneNumber(value, PhoneNumberKey.BusinessPhone2);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.Callback]         = GetPhoneNumber(value, PhoneNumberKey.Callback);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.CarPhone]         = GetPhoneNumber(value, PhoneNumberKey.CarPhone);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.CompanyMainPhone] = GetPhoneNumber(value, PhoneNumberKey.CompanyMainPhone);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.HomeFax]          = GetPhoneNumber(value, PhoneNumberKey.HomeFax);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.HomePhone]        = GetPhoneNumber(value, PhoneNumberKey.HomePhone);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.HomePhone2]       = GetPhoneNumber(value, PhoneNumberKey.HomePhone2);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.Isdn]             = GetPhoneNumber(value, PhoneNumberKey.Isdn);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.MobilePhone]      = GetPhoneNumber(value, PhoneNumberKey.MobilePhone);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.OtherFax]         = GetPhoneNumber(value, PhoneNumberKey.OtherFax);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.BusinessFax]      = GetPhoneNumber(value, PhoneNumberKey.BusinessFax);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.Pager]            = GetPhoneNumber(value, PhoneNumberKey.Pager);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.OtherTelephone]   = GetPhoneNumber(value, PhoneNumberKey.OtherTelephone);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.RadioPhone]       = GetPhoneNumber(value, PhoneNumberKey.RadioPhone);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.Telex]            = GetPhoneNumber(value, PhoneNumberKey.Telex);
                data.Properties[ExchangeSharedMailboxVocabulary.Contact.PhoneNumbers.TtyTddPhone]      = GetPhoneNumber(value, PhoneNumberKey.TtyTddPhone);
            }

            // DirectoryPhoto
            try
            {
                var version    = (int)contact.Service.RequestedServerVersion;
                var additional = new List <PropertyDefinitionBase>()
                {
                    ContactSchema.Photo
                };
                var filtered = additional.Where(p => ((int)p.Version) <= version).ToArray();

                if (filtered.Any())
                {
                    var tmp = new PropertySet(BasePropertySet.FirstClassProperties, filtered.Concat(new[] { ItemSchema.Attachments }));

                    if (value.Id != null)
                    {
                        value.ExLoad(this.state, tmp);
                    }

                    var directoryPhoto = value.ExGetIfAvailable(v => v.DirectoryPhoto, null);

                    if (directoryPhoto != null)
                    {
                        using (var stream = new MemoryStream(directoryPhoto))
                        {
                            var mimeType = FileCrawlingUtility.DetectMimeType(new FileInfo("PreviewImage"), new DataPart(), stream, this.state);

                            if (mimeType.FileDescriptor.Category == FileCategory.ImageBitmap)
                            {
                                var rawDataPart = new RawDataPart()
                                {
                                    Type       = "/RawData/PreviewImage",
                                    MimeType   = mimeType.Code,
                                    FileName   = "preview_{0}".FormatWith(data.OriginEntityCode.Key),
                                    RawDataMD5 = FileHashUtility.GetMD5Base64String(directoryPhoto),
                                    RawData    = Convert.ToBase64String(directoryPhoto)
                                };

                                clue.Details.RawData.Add(rawDataPart);

                                data.PreviewImage = new ImageReferencePart(rawDataPart);
                            }
                            else
                            {
                                this.state.Log.Info(() => $"DirectoryPhoto is not an image: {mimeType.Code}");
                            }
                        }
                    }
                }

                if (value.Attachments != null)
                {
                    var contactPhotos = value.Attachments.Where(a => a is FileAttachment && ((FileAttachment)a).IsContactPhoto).Cast <FileAttachment>().ToList();
                    var contactPhoto  = contactPhotos.FirstOrDefault();

                    if (contactPhoto != null)
                    {
                        using (var tempFile = new TemporaryFile(contactPhoto.Name))
                        {
                            contactPhoto.ExLoad(this.state, tempFile.FilePath);

                            var mimeType = tempFile.FileInfo.ToMimeType();

                            using (var stream = File.OpenRead(tempFile.FilePath))
                            {
                                mimeType = FileCrawlingUtility.DetectMimeType(tempFile.FileInfo, new DataPart(), stream, this.state);
                            }

                            if (mimeType.FileDescriptor.Category == FileCategory.ImageBitmap)
                            {
                                using (var stream = File.OpenRead(tempFile.FilePath))
                                {
                                    var bytes = stream.ToArray();

                                    var rawDataPart = new RawDataPart()
                                    {
                                        Type       = "/RawData/PreviewImage",
                                        MimeType   = mimeType.Code,
                                        FileName   = "preview_{0}".FormatWith(data.OriginEntityCode.Key),
                                        RawDataMD5 = FileHashUtility.GetMD5Base64String(bytes),
                                        RawData    = Convert.ToBase64String(bytes)
                                    };

                                    clue.Details.RawData.Add(rawDataPart);

                                    data.PreviewImage = new ImageReferencePart(rawDataPart);
                                }
                            }
                            else
                            {
                                this.state.Log.Info(() => $"ContactPhoto is not an image: {mimeType.Code}");
                            }
                        }
                    }
                }
            }
            catch (OperationCanceledException)
            {
            }
            catch (Exception e)
            {
                this.state.Log.Warn(() => "Could not get DirectoryPhoto", e);
            }
        }
Ejemplo n.º 7
0
        private void Index([NotNull] CluedInDriveItem input, [NotNull] string webUrl, [NotNull] Clue clue)
        {
            var data  = clue.Data;
            var value = input.DriveItem;

            string hash;

            if (value.Size <= CluedIn.Core.Constants.MaxFileIndexingFileSize)
            {
                try
                {
                    using (var tempFile = new TemporaryFile(value.Name))
                    {
                        using (var webClient = new WebClient())
                        {
                            Stream file = new MemoryStream(webClient.DownloadData(webUrl));
                            using (var md5 = MD5.Create())
                            {
                                using (var stream = file)
                                {
                                    var hashBytes = md5.ComputeHash(stream);

                                    hash = BitConverter.ToString(hashBytes);

                                    using (var fileStream = System.IO.File.Create(tempFile.FilePath))
                                    {
                                        file.Seek(0, SeekOrigin.Begin);
                                        file.CopyTo(fileStream);
                                    }
                                }
                            }
                            file.Close();
                        }


                        data.EntityData.Codes.Add(new EntityCode(EntityType.Files.File, OneDriveConstants.CodeOrigin, hash));

                        //MimeType mimeType = tempFile.FileInfo.ToMimeType();

                        if (value.Name != null)
                        {
                            data.EntityData.DocumentFileName = value.Name;
                        }

                        data.EntityData.DocumentSize = tempFile.FileInfo.Length;
                        //data.EntityData.DocumentMimeType = mimeType.Code;
                        data.EntityData.Properties[OneDriveVocabularies.File.Hash] = hash;

                        FileCrawlingUtility.IndexFile(tempFile, clue.Data, clue, state, appContext);
                    }
                }
                catch (Exception ex)
                {
                    appContext.Container.GetLogger().Error(() => ex.Message, ex);
                }
            }
            else
            {
                data.EntityData.DocumentFileName = value.Name;
                data.EntityData.DocumentSize     = value.Size;
            }
        }