/// <summary>
        /// Method doing actually something with the changes obtained via the web hook notification.
        /// </summary>
        private static void DoWork(ClientContext cc, List changeList, Change change)
        {
            //Get Fields, if no fields then don't do anything
            var taxonomyTerms = CloudConfigurationManager.GetSetting("TaxonomyTermNames")
                                ?.Split(",".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);

            if (!taxonomyTerms.Any())
            {
                return;
            }

            // Get the list item from the Change List
            // Note that this is the ID of the item in the list, not a reference to its position.
            var targetListItem = changeList.GetItemById(((ChangeItem)change).ItemId);

            cc.Load(targetListItem.File);

            // Get the File Binary Stream
            var streamResult = targetListItem.File.OpenBinaryStream();

            cc.ExecuteQueryRetry();

            string textFromStream;

            try
            {
                // Get Text Rendition of document binary
                var tika = new Tika();
                textFromStream = tika.ParseToString(streamResult.Value);
            }
            catch (Exception ex)
            {
                Trace.TraceWarning($"Tika Error: {ex}");
                return;
            }

            // Get Key phrases from text rendition
            var client = new TextAnalyticsClient();
            var result = client.GetStringPhrasesEntities(textFromStream).Result;

            // list of distinct key phrases
            var keyPhrases = result as string[] ?? result.ToArray();

            Trace.TraceInformation($"Key Phrases: {string.Join(",", keyPhrases)}");

            try
            {
                var results = taxonomyTerms.Select(term =>
                                                   SetTaxFieldValueAgainstKeyPhrase(cc, changeList, targetListItem, term, keyPhrases)).ToList();
            }
            catch (Exception ex)
            {
                Trace.TraceError($"Error: {ex}");
            }
        }
Exemple #2
0
        public void ParallelTestWithCentralParser()
        {
            var plist = new List <KeyValuePair <string, string> >();

            var list = new List <KeyValuePair <string, string> >();

            list.Add(new KeyValuePair <string, string>(_filePathParent + "files/Tika.pptx", "Tika Test Presentation"));
            list.Add(new KeyValuePair <string, string>(_filePathParent + "files/Tika.docx", "formatted in interesting ways"));
            list.Add(new KeyValuePair <string, string>(_filePathParent + "files/Tika.xlsx", "Use the force duke"));

            for (int i = 0; i < 1000; i++)
            {
                plist.AddRange(list);
            }

            Parallel.ForEach(plist, (test) =>
            {
                var result = tika.Parse(test.Key);
                result.Text.Should().Contain(test.Value);

                var result2 = tika.ParseToString(test.Key);
                result2.Should().Contain(test.Value);
            });
        }
        public void Simple_File_To_String_Parsing()
        {
            string text = tika.ParseToString(_filePathParent + "files/Tika.rtf");

            text.Should().Contain("pack of pickled almonds");
        }