Exemple #1
0
        /// <summary>
        ///
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="externalId"></param>
        /// <param name="AsReadVisibilityScope"></param>
        /// <returns></returns>
        public IEnumerable <DGNode> GetParent <T>(T externalId, bool AsReadVisibilityScope = false)
        {
            if (this.internalId == 0)   //Not existing in DB node
            {
                yield return(null);
            }

            CheckGraph();

            if (externalId == null)
            {
                throw new Exception("Searched ID can't be null");
            }

            byte[] btExId = DataTypesConvertor.ConvertKey <T>(externalId);

            byte[] key = new byte[] { 4 }.ConcatMany(this.internalId.To_4_bytes_array_BigEndian(), btExId);
            byte[] key1 = null;
            DBreeze.DataTypes.Row <byte[], byte[]> row = null;
            DGNode node = null;

            foreach (var n in this.graph.tran.SelectForwardStartFrom <byte[], byte[]>(this.graph.tableName, key, false, AsReadVisibilityScope))
            {
                //if (n.Key.Length <= 5) //1 - protocol + 4 bytes internalID
                //{
                //    break;
                //}
                //else
                if (!key.Substring(0, key.Length)._ByteArrayEquals(n.Key.Substring(0, key.Length)))
                {
                    break;
                }
                else
                {
                    node = new DGNode(btExId)
                    {
                        content = n.Value, graph = this.graph, internalId = n.Key.Substring(key.Length).To_UInt32_BigEndian()
                    };
                    key1         = new byte[] { 1 }.ConcatMany(btExId, node.internalId.To_4_bytes_array_BigEndian());
                    row          = this.graph.tran.Select <byte[], byte[]>(this.graph.tableName, key1, true);
                    node.content = row.Value;
                    yield return(node);
                }
            }
        }
Exemple #2
0
        /// <summary>
        /// IEnumerable returning External GetDocumentIDs
        /// </summary>
        /// <returns></returns>
        public IEnumerable <byte[]> GetDocumentIDs()
        {
            this._tsm.ComputeWordsOrigin();

            //New Logical block is always a result of operation between 2 blocks
            //Usual block is added via TextSearchManager

            if (this._tsm.ExternalDocumentIdStart != null)
            {
                this._tsm.DocIdA = this._tsm.e2i.Select <byte[], int>(this._tsm.ExternalDocumentIdStart).Value;
            }

            if (this._tsm.ExternalDocumentIdStop != null)
            {
                this._tsm.DocIdZ = this._tsm.e2i.Select <byte[], int>(this._tsm.ExternalDocumentIdStop).Value;
            }



            var myArray = this.GetArrays();

            if (myArray.Count != 0)
            {
                DBreeze.DataTypes.Row <int, byte[]> docRow = null;
                //foreach (var el in WABI.TextSearch_AND_logic(myArray))

                var q = WABI.TextSearch_AND_logic(myArray);

                if (this._tsm.DocIdA > 0 || this._tsm.DocIdZ > 0 || !this._tsm.Descending)
                {
                    q = WABI.TextSearch_AND_logic(myArray, this._tsm.DocIdA, this._tsm.DocIdZ, this._tsm.Descending);
                }

                foreach (var el in q)
                {
                    //Getting document external ID
                    docRow = this._tsm.i2e.Select <int, byte[]>((int)el);
                    if (docRow.Exists)
                    {
                        yield return(docRow.Value);
                    }
                }
            }
        }
Exemple #3
0
        /// <summary>
        /// IEnumerable returning External GetDocumentIDs
        /// </summary>
        /// <returns></returns>
        public IEnumerable <byte[]> GetDocumentIDs()
        {
            this._tsm.ComputeWordsOrigin();

            //New Logical block is always a result of operation between 2 blocks
            //Usual block is added via TextSearchManager

            var myArray = this.GetArrays();

            if (myArray.Count != 0)
            {
                DBreeze.DataTypes.Row <int, byte[]> docRow = null;
                foreach (var el in WABI.TextSearch_AND_logic(myArray))
                {
                    //Getting document external ID
                    docRow = this._tsm.tbExternalIDs.Select <int, byte[]>((int)el);
                    if (docRow.Exists)
                    {
                        yield return(docRow.Value);
                    }
                }
            }
        }
Exemple #4
0
        /// <summary>
        /// SearchTextInDocuments
        /// </summary>
        /// <param name="tableName"></param>
        /// <param name="req"></param>
        /// <returns></returns>
        public TextSearchResponse SearchTextInDocuments(string tableName, TextSearchRequest req)
        {
            TextSearchResponse resp = new TextSearchResponse();

            //[string,byte[]] BlockId[int] + NumberInBlock[int]
            NestedTable tbWords = tran.SelectTable <byte>(tableName, 20, 0);

            tbWords.ValuesLazyLoadingIsOn = false;

            var Words = this.PrepareSearchKeyWords(req.SearchWords);

            #region "Multiple Words"

            int           j                  = -1;
            List <byte[]> foundArrays        = new List <byte[]>();
            List <byte[]> oneWordFoundArrays = new List <byte[]>();

            bool anyWordFound    = false;
            int  totalFoundWords = 0;

            Dictionary <string, WordInDocs> words = new Dictionary <string, WordInDocs>();
            int foundOrigin = 1;

            Dictionary <string, WordInDocs> perWord = new Dictionary <string, WordInDocs>();
            Dictionary <string, WordInDocs> firstHighOccuranceWord = new Dictionary <string, WordInDocs>();

            //Currently we ignore these words and do nothing with them
            List <string> highOccuranceWordParts = new List <string>();


            foreach (var word in Words.Take(tran._transactionUnit.TransactionsCoordinator._engine.Configuration.TextSearchConfig.MaxQuantityOfWordsToBeSearched)) //Maximum 10 words for search
            {
                anyWordFound    = false;
                totalFoundWords = 0;
                perWord         = new Dictionary <string, WordInDocs>();


                foreach (var row1 in tbWords.SelectForwardStartsWith <string, byte[]>(word))
                {
                    anyWordFound = true;
                    totalFoundWords++;

                    if (Words.Count() == 1 && totalFoundWords > req.Quantity)
                    {
                        //In case if only one search word, then we don't need to make any comparation
                        break;
                    }
                    else if (totalFoundWords >= req.NoisyQuantity)  //Found lots of words with such mask inside
                    {
                        //Too much found docs have this word-part inside, better to enhance search
                        if (firstHighOccuranceWord.Count() == 0)
                        {
                            //Only first HighOccurance word part come to the list. It can be used later in case if all search words are of HighOccurance (then we will visualize only this one)
                            firstHighOccuranceWord = perWord.ToDictionary(r => r.Key, r => r.Value);
                        }
                        //Clearing repack element
                        perWord.Clear();
                        //Adding word into List of High-Occurance word-part
                        highOccuranceWordParts.Add(word);
                        break;
                    }

                    perWord.Add(row1.Key, new WordInDocs()
                    {
                        BlockId       = row1.Value.Substring(0, 4).To_UInt32_BigEndian(),
                        NumberInBlock = row1.Value.Substring(4, 4).To_UInt32_BigEndian(),
                        foundOrigin   = foundOrigin
                    });
                }

                //Repacking occurances
                foreach (var pw in perWord)
                {
                    words.Add(pw.Key, pw.Value);
                }

                foundOrigin++;

                if (
                    req.SearchLogicType == TextSearchRequest.eSearchLogicType.AND
                    &&
                    !anyWordFound
                    )
                {
                    //Non of words found corresponding to AND logic
                    return(resp);
                }
            }


            if (words.Count() == 0)
            {
                //In case of multiple search words and each of them of HighOccurance.
                //We will form result only from the first HighOccurance list

                //Repacking occurances
                foreach (var pw in firstHighOccuranceWord.Take(req.Quantity))
                {
                    words.Add(pw.Key, pw.Value);
                }

                //In this case highOccuranceWordParts must be cleared, because the returning result is very approximate
                highOccuranceWordParts.Clear();
            }


            //Here we must start get data from blocks
            //Nested table with blocks
            //[uint,byte[]] where K is BlockID[uint]
            NestedTable tbBlocks = tran.SelectTable <byte>(tableName, 10, 0);
            tbBlocks.ValuesLazyLoadingIsOn = false;

            Dictionary <uint, byte[]> block = new Dictionary <uint, byte[]>();
            byte[] btBlock        = null;
            uint   currentBlockId = 0;

            //DBreeze.Diagnostic.SpeedStatistic.StartCounter("LoadBlocks");

            foreach (var wrd in words.OrderBy(r => r.Value.BlockId))
            {
                if (currentBlockId != wrd.Value.BlockId)
                {
                    currentBlockId = wrd.Value.BlockId;
                    block          = new Dictionary <uint, byte[]>();

                    //DBreeze.Diagnostic.SpeedStatistic.StartCounter("SelectBlocks");
                    btBlock = tbBlocks.Select <uint, byte[]>(wrd.Value.BlockId).Value;
                    //DBreeze.Diagnostic.SpeedStatistic.StopCounter("SelectBlocks");
                    btBlock = btBlock.Substring(4, btBlock.Substring(0, 4).To_Int32_BigEndian());
                    //DBreeze.Diagnostic.SpeedStatistic.StartCounter("DecomDeserBlocks");
                    btBlock.Decode_DICT_PROTO_UINT_BYTEARRAY(block, Compression.eCompressionMethod.Gzip);
                    // block = btBlock.DeserializeProtobuf<Dictionary<int, byte[]>>();
                    //DBreeze.Diagnostic.SpeedStatistic.StopCounter("DecomDeserBlocks");
                }

                wrd.Value.wah = new WAH2(block[wrd.Value.NumberInBlock]);
            }
            //DBreeze.Diagnostic.SpeedStatistic.PrintOut("LoadBlocks", true);
            //DBreeze.Diagnostic.SpeedStatistic.PrintOut("SelectBlocks", true);
            //DBreeze.Diagnostic.SpeedStatistic.PrintOut("DecomDeserBlocks", true);

            foundOrigin = 0;

            foreach (var wrd in words.OrderBy(r => r.Value.foundOrigin))
            {
                //Console.WriteLine(wrd.Value.foundOrigin);

                if (foundOrigin != wrd.Value.foundOrigin)
                {
                    if (oneWordFoundArrays.Count() > 0)
                    {
                        j++;
                        foundArrays.Add(WAH2.MergeAllUncompressedIntoOne(oneWordFoundArrays));
                        oneWordFoundArrays = new List <byte[]>();
                    }

                    foundOrigin = wrd.Value.foundOrigin;
                }
                else
                {
                }

                oneWordFoundArrays.Add(wrd.Value.wah.GetUncompressedByteArray());
            }

            //The last
            if (oneWordFoundArrays.Count() > 0)
            {
                j++;
                foundArrays.Add(WAH2.MergeAllUncompressedIntoOne(oneWordFoundArrays));
                oneWordFoundArrays = new List <byte[]>();
            }


            //////////  final results

            if (j >= 0)
            {
                var q = WAH2.TextSearch_OR_logic(foundArrays, req.Quantity);

                if (req.SearchLogicType == TextSearchRequest.eSearchLogicType.AND)
                {
                    q = WAH2.TextSearch_AND_logic(foundArrays).Take(req.Quantity);
                }

                //Key int, Value byte[]
                NestedTable i2e = tran.SelectTable <byte>(tableName, 2, 0);
                i2e.ValuesLazyLoadingIsOn = false;

                int qOutput = 0;
                DBreeze.DataTypes.Row <int, byte[]> docRow = null;
                foreach (var el in q)
                {
                    ////Getting document exterrnal ID
                    docRow = i2e.Select <int, byte[]>((int)el);
                    if (docRow.Exists)
                    {
                        resp.FoundDocumentIDs.Add(docRow.Value);
                    }

                    //docRow = dt.Select<int, byte[]>((int)el);
                    //if (docRow.Exists)
                    //{
                    //    if (!dmnts.ContainsKey((int)el))
                    //    {
                    //        if (highOccuranceWordParts.Count() > 0)
                    //        {
                    //            //We got some noisy word-parts of high occurance together with strongly found words.
                    //            //We must be sure that these word parts are also inside of returned docs
                    //            doc = this.RetrieveDocument(req.IncludeDocumentsContent, true, dt, docRow);
                    //            if (doc != null)
                    //            {
                    //                //Checking doc.Searchables must have all word parts from the occurance in case of AND
                    //                if (req.SearchLogicType == SearchRequest.eSearchLogicType.AND)
                    //                {
                    //                    if (String.IsNullOrEmpty(doc.Searchables))
                    //                        continue;
                    //                    if (!highOccuranceWordParts.All(doc.Searchables.ToLower().Contains))
                    //                        continue;
                    //                }

                    //                if (req.IncludeDocuments)
                    //                {
                    //                    if (!req.IncludeDocumentsSearchanbles)
                    //                        doc.Searchables = String.Empty;

                    //                    dmnts.Add((int)el, doc);
                    //                }
                    //                else
                    //                {
                    //                    dmnts.Add((int)el, null);
                    //                }

                    //            }
                    //            else
                    //                continue;
                    //        }
                    //        else
                    //        {
                    //            if (req.IncludeDocuments)
                    //            {

                    //                doc = this.RetrieveDocument(req.IncludeDocumentsContent, req.IncludeDocumentsSearchanbles, dt, docRow);
                    //                if (doc == null) //If doc is deleted, while search was in progress and we received its id in the list
                    //                    continue;

                    //                dmnts.Add((int)el, doc);
                    //            }
                    //            else
                    //            {
                    //                dmnts.Add((int)el, null);
                    //            }

                    //        }

                    //        qOutput++;
                    //    }
                    //}

                    qOutput++;

                    if (qOutput > req.Quantity)
                    {
                        break;
                    }
                }
            }
            #endregion



            return(resp);
        }
Exemple #5
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="parent">can be null</param>
        /// <param name="nodes"></param>
        /// <param name="id"></param>
        private void AddNodesRecursive(DGNode parent, List <DGNode> nodes, ref uint id)
        {
            DBreeze.DataTypes.Row <byte[], byte[]> row = null;
            bool newNode = false;

            byte[] key = null;

            foreach (var n in nodes)
            {
                if (n == null)
                {
                    continue;
                }

                newNode = false;

                //Getting new ID
                if (n.internalId == 0)
                {
                    if (id == 0)
                    {
                        row = tran.Select <byte[], byte[]>(tableName, new byte[] { 0 });
                        if (row.Exists)
                        {
                            id = row.Value.To_UInt32_BigEndian();
                        }
                    }
                    id++;
                    n.internalId = id;
                    if (n.externalId == null)
                    {
                        n.externalId = n.internalId.To_4_bytes_array_BigEndian();   //If externalID is empty it becomes uint
                    }
                    newNode = true;
                }

                //Instantiating Graph from node, so other operations could be done via it.
                n.graph = this;

                /* Binding between externalID and internalID
                 * 1 - node externalID (byte[]) + internalID (uint) - (binding between externalID and internalID)
                 * 2??? - node internalID (uint) + externalID (byte[]) - (binding between internalID and externalID)
                 */
                //Inserting links from parent to it
                if (newNode)
                {
                    key = new byte[] { 1 }.ConcatMany(n.externalId, n.internalId.To_4_bytes_array_BigEndian());
                    tran.Insert <byte[], byte[]>(tableName, key, n.content); //CONTENT

                    ////????????????????? probably we dont need such connection
                    //key = new byte[] { 2 }.ConcatMany(n.internalId.To_4_bytes_array_BigEndian(), n.externalId);
                    //tran.Insert<byte[], byte[]>(tableName, key, null);  //FOR NOW NO CONTENT
                }
                else
                {
                    if (n.contentWasModified)    //Saving new content
                    {
                        key = new byte[] { 1 }.ConcatMany(n.externalId, n.internalId.To_4_bytes_array_BigEndian());
                        tran.Insert <byte[], byte[]>(tableName, key, n.content);
                    }
                }

                if (parent != null)
                {
                    /*
                     *
                     * Current concept
                     *  To search kids by ExternalId from Parent
                     *  3 - internalID (Parent)(uint)+internalID(or better externalID(!null)+internalID) (Kid)(uint) - get all kid (referencing) nodes *-->
                     *  To search Parent by ExternalId from Node
                     *  4 - internalID (Node)(uint)+(externalID+internalId)(Parent)(uint) - get all parent (referenced by) nodes *<--
                     *
                     *  Alternative concept (not so good, because, gonna be very difficult to get internal via externalID - are not unique)
                     *  To search kids by ExternalId from Parent
                     *  3 - internalID (Parent)(uint) + internalID(Kid)(uint) - get all kid (referencing) nodes *-->
                     *  To search Parent by ExternalId from Node
                     *  4 - internalID (Node)(uint) + internalId(Parent)(uint) - get all parent (referenced by) nodes *<--
                     *
                     */

                    //Filling kid of the parent node links
                    key = new byte[] { 3 }.ConcatMany(parent.internalId.To_4_bytes_array_BigEndian(), n.externalId, n.internalId.To_4_bytes_array_BigEndian());
                    //key = new byte[] { 3 }.ConcatMany(parent.internalId.To_4_bytes_array_BigEndian(), n.internalId.To_4_bytes_array_BigEndian());
                    tran.Insert <byte[], byte[]>(tableName, key, null);

                    //Filling parent of the node link
                    //key = new byte[] { 4 }.ConcatMany(n.internalId.To_4_bytes_array_BigEndian(), parent.externalId,parent.internalId.To_4_bytes_array_BigEndian());
                    //key = new byte[] { 4 }.ConcatMany(n.internalId.To_4_bytes_array_BigEndian(), parent.internalId.To_4_bytes_array_BigEndian());
                    //tran.Insert<byte[], byte[]>(tableName, key, null);
                }

                //Handling kids
                if (n.LinksKids != null && n.LinksKids.Count > 0)
                {
                    AddNodesRecursive(n, n.LinksKids, ref id);
                }
            }
        }