Beispiel #1
0
        static void TestCase1()
        {
            List <string> docsToIndex      = DocumentsToIndex("./TestCases/1/");
            List <string> queriesToProcess = QueriesToProcess("./TestCases/1/");
            ParseOptions  options          = new ParseOptions();

            // load documents
            foreach (string curr in docsToIndex)
            {
                byte[]         data   = Common.ReadBinaryFile(curr);
                SourceDocument src    = new SourceDocument("test", "test", curr, curr, null, DocType.Json, null, "application/json", data.Length, Common.Md5(data));
                IndexResult    result = _IndexClient.Add(src, data, options, true).Result;
                Console.WriteLine("");
                Console.WriteLine("Add: " + curr);
                Console.WriteLine(Common.SerializeJson(result, true));
            }

            Console.WriteLine("");
            Console.WriteLine("Press ENTER to continue");
            Console.ReadLine();

            // execute queries
            foreach (string curr in queriesToProcess)
            {
                SearchQuery  query  = Common.DeserializeJson <SearchQuery>(Common.ReadBinaryFile(curr));
                SearchResult result = _IndexClient.Search(query);
                Console.WriteLine("");
                Console.WriteLine("Query: " + curr);
                Console.WriteLine(Common.SerializeJson(result, true));
            }
        }
Beispiel #2
0
        /// <summary>
        /// Store or store-and-index a document.
        /// </summary>
        /// <param name="indexName">Name of the index.</param>
        /// <param name="sourceDoc">Source document.</param>
        /// <param name="data">Byte data.</param>
        /// <param name="options">Text parse options.</param>
        /// <param name="parse">True if the document should be parsed and indexed.</param>
        /// <param name="postbackUrl">URL to which results should be POSTed.</param>
        /// <returns>Index result.</returns>
        public async Task <IndexResult> AddDocument(string indexName, SourceDocument sourceDoc, byte[] data, ParseOptions options, bool parse = true, string postbackUrl = null)
        {
            if (String.IsNullOrEmpty(indexName))
            {
                throw new ArgumentNullException(nameof(indexName));
            }
            KomodoIndex idx = GetIndexClient(indexName);

            return(await idx.Add(sourceDoc, data, options, parse, postbackUrl));
        }
Beispiel #3
0
        static void Main(string[] args)
        {
            #region Index-Manager

            Console.WriteLine("Initializing index manager");
            _Indices = new KomodoIndices(
                new DbSettings("./indices.db"),
                new StorageSettings(new DiskSettings("./source/")),
                new StorageSettings(new DiskSettings("./parsed/")));

            #endregion

            #region Indices

            Console.WriteLine("Initializing indices");
            _Index1 = new Index("default", "default", "default");
            _Index2 = new Index("metadata", "default", "metadata");
            _Indices.Add(_Index1);
            _Indices.Add(_Index2);

            #endregion

            #region Adding-Documents

            _IndexClient1 = _Indices.GetIndexClient("default");
            _IndexClient2 = _Indices.GetIndexClient("metadata");

            byte[]         doc1 = File.ReadAllBytes("person1.json");
            SourceDocument sd1  = new SourceDocument(
                "default",
                "default",
                "Person 1",
                "Person 1",
                null,
                DocType.Json,
                null,
                "application/json",
                doc1.Length,
                Common.Md5(doc1));

            byte[]         doc2 = File.ReadAllBytes("person2.json");
            SourceDocument sd2  = new SourceDocument(
                "default",
                "default",
                "Person 2",
                "Person 2",
                null,
                DocType.Json,
                null,
                "application/json",
                doc2.Length,
                Common.Md5(doc2));

            byte[]         doc3 = File.ReadAllBytes("person3.json");
            SourceDocument sd3  = new SourceDocument(
                "default",
                "default",
                "Person 3",
                "Person 3",
                null,
                DocType.Json,
                null,
                "application/json",
                doc3.Length,
                Common.Md5(doc3));

            IndexResult r1 = _IndexClient1.Add(sd1, doc1, new ParseOptions(), true).Result;
            IndexResult r2 = _IndexClient1.Add(sd2, doc2, new ParseOptions(), true).Result;
            IndexResult r3 = _IndexClient1.Add(sd3, doc3, new ParseOptions(), true).Result;

            #endregion

            #region Blobs

            _Blobs = new Blobs(new DiskSettings("./Metadata/"));
            if (!Directory.Exists("./Metadata/"))
            {
                Directory.CreateDirectory("./Metadata/");
            }

            #endregion

            #region Policy

            byte[] bytes = File.ReadAllBytes("./policy.json");
            _Policy = Common.DeserializeJson <MetadataPolicy>(File.ReadAllBytes("./policy.json"));

            #endregion

            #region Initialize-Metadata

            Console.WriteLine("Initializing metadata processor");

            _Metadata = new MetadataProcessor(_Policy, _Indices);

            #endregion

            #region Apply-Metadata

            Console.WriteLine("Processing metadata");

            _Result1 = _Metadata.ProcessDocument(
                r1.SourceDocument,
                r1.ParsedDocument,
                r1.ParseResult).Result;

            // Console.WriteLine("Document 1: " + Environment.NewLine + Common.SerializeJson(_Result1, true));

            _Result2 = _Metadata.ProcessDocument(
                r2.SourceDocument,
                r2.ParsedDocument,
                r2.ParseResult).Result;

            // Console.WriteLine("Document 2: " + Environment.NewLine + Common.SerializeJson(_Result2, true));

            _Result3 = _Metadata.ProcessDocument(
                r3.SourceDocument,
                r3.ParsedDocument,
                r3.ParseResult).Result;

            Console.WriteLine("Document 3: " + Environment.NewLine + Common.SerializeJson(_Result3, true));

            #endregion
        }
Beispiel #4
0
        /// <summary>
        /// Process a document using the configured rules.
        /// </summary>
        /// <param name="source">Source document.</param>
        /// <param name="parsed">Parsed document.</param>
        /// <param name="parseResult">Parse result.</param>
        /// <returns>Metadata result.</returns>
        public async Task <MetadataResult> ProcessDocument(SourceDocument source, ParsedDocument parsed, ParseResult parseResult)
        {
            if (source == null)
            {
                throw new ArgumentNullException(nameof(source));
            }
            if (parsed == null)
            {
                throw new ArgumentNullException(nameof(parsed));
            }
            if (parseResult == null)
            {
                throw new ArgumentNullException(nameof(parseResult));
            }

            MetadataResult result = new MetadataResult();

            result.Source      = source;
            result.Parsed      = parsed;
            result.ParseResult = parseResult;

            List <MetadataRule> matchingRules = GetMatchingRules(source, parsed, parseResult);

            if (matchingRules == null || matchingRules.Count < 1)
            {
                return(result);
            }

            foreach (MetadataRule rule in matchingRules)
            {
                result.MatchingRules.Add(rule);

                if (rule.AddMetadataDocument != null && rule.AddMetadataDocument.Count > 0)
                {
                    foreach (AddMetadataDocumentAction addDocAction in rule.AddMetadataDocument)
                    {
                        if (addDocAction.Properties == null || addDocAction.Properties.Count < 1)
                        {
                            continue;
                        }

                        #region Retrieve-Index-Clients

                        KomodoIndex src = _Indices.GetIndexClient(source.IndexGUID);
                        if (src == null)
                        {
                            throw new InvalidOperationException("Unable to find source index " + source.IndexGUID);
                        }

                        KomodoIndex dst = _Indices.GetIndexClient(addDocAction.IndexGUID);
                        if (dst == null)
                        {
                            throw new InvalidOperationException("Unable to find destination index " + addDocAction.IndexGUID);
                        }

                        #endregion

                        #region Generate-Derived-Metadata-Document

                        Dictionary <string, object> derivedDocument = new Dictionary <string, object>();

                        foreach (MetadataDocumentProperty prop in addDocAction.Properties)
                        {
                            if (prop.ValueAction == PropertyValueAction.CopyFromDocument)
                            {
                                string val = GetValueFromParseResult(parseResult, prop.SourceProperty);
                                derivedDocument.Add(prop.Key, val);
                            }
                            else if (prop.ValueAction == PropertyValueAction.Static)
                            {
                                derivedDocument.Add(prop.Key, prop.Value);
                            }
                        }

                        byte[] derivedDocBytes = Encoding.UTF8.GetBytes(Common.SerializeJson(derivedDocument, true));

                        #endregion

                        #region Store-in-Database

                        MetadataDocument metadataDoc = new MetadataDocument();
                        metadataDoc.Created            = DateTime.UtcNow;
                        metadataDoc.GUID               = Guid.NewGuid().ToString();
                        metadataDoc.IndexGUID          = source.IndexGUID;
                        metadataDoc.OwnerGUID          = source.OwnerGUID;
                        metadataDoc.SourceDocumentGUID = source.GUID;
                        metadataDoc.TargetIndexGUID    = addDocAction.IndexGUID;
                        metadataDoc.Type               = DocType.Json;

                        metadataDoc = src.AddMetadata(source, metadataDoc);

                        #endregion

                        #region Index

                        SourceDocument derivedSourceDoc = new SourceDocument(
                            metadataDoc.GUID,
                            source.OwnerGUID,
                            addDocAction.IndexGUID,
                            addDocAction.Name,
                            addDocAction.Title,
                            addDocAction.Tags,
                            DocType.Json,
                            null,
                            "application/json",
                            derivedDocBytes.Length,
                            Common.Md5(derivedDocBytes));

                        IndexResult idxResult = await dst.Add(derivedSourceDoc, derivedDocBytes, new ParseOptions(), addDocAction.Parse);

                        #endregion

                        #region Store-Results

                        result.MetadataDocuments.Add(metadataDoc);
                        result.DerivedDocuments.Add(idxResult.SourceDocument);
                        result.DerivedDocumentsData.Add(derivedDocument);
                        result.DerivedIndexResults.Add(idxResult);

                        #endregion
                    }
                }
            }

            foreach (MetadataRule rule in matchingRules)
            {
                if (rule.Postback != null &&
                    rule.Postback.Urls != null &&
                    rule.Postback.Urls.Count > 0)
                {
                    MetadataResult postbackMetadata = Common.CopyObject <MetadataResult>(result);
                    if (!rule.Postback.IncludeSource)
                    {
                        postbackMetadata.Source = null;
                    }
                    if (!rule.Postback.IncludeParsed)
                    {
                        postbackMetadata.Parsed = null;
                    }
                    if (!rule.Postback.IncludeParseResult)
                    {
                        postbackMetadata.ParseResult = null;
                    }
                    if (!rule.Postback.IncludeMetadata)
                    {
                        postbackMetadata.MetadataDocuments = null;
                    }
                    if (!rule.Postback.IncludeRules)
                    {
                        postbackMetadata.MatchingRules = null;
                    }
                    if (!rule.Postback.IncludeDerivedDocuments)
                    {
                        postbackMetadata.DerivedDocuments = null;
                    }

                    rule.Postback.Urls = rule.Postback.Urls.Distinct().ToList();

                    foreach (string url in rule.Postback.Urls)
                    {
                        if (String.IsNullOrEmpty(url))
                        {
                            continue;
                        }

                        RestRequest req = new RestRequest(
                            url,
                            HttpMethod.POST,
                            null,
                            "application/json");

                        RestResponse resp = req.Send(Common.SerializeJson(result, true));

                        result.PostbackStatusCodes.Add(url, resp.StatusCode);
                    }
                }
            }

            return(result);
        }