Options which are used while tran.InsertDocumentText
Beispiel #1
0
        /// <summary>
        /// Returns null in case of notfound anything or what ever
        /// </summary>
        /// <param name="text"></param>
        /// <returns></returns>
        SortedDictionary <string, WordDefinition> GetWordsDefinitionFromText(string text, TextSearchStorageOptions opt)
        {
            SortedDictionary <string, WordDefinition> wordsCounter = new SortedDictionary <string, WordDefinition>();

            try
            {
                if (String.IsNullOrEmpty(text))
                {
                    return(wordsCounter);
                }

                StringBuilder  sb             = new StringBuilder();
                string         word           = "";
                WordDefinition wordDefinition = null;

                //Support for previous versions without FullTextOnly
                if (!opt.FullTextOnly && opt.SearchWordMinimalLength == 0)
                {
                    opt.FullTextOnly            = true;
                    opt.SearchWordMinimalLength = 2;
                }


                Action processWord = () =>
                {
                    //We take all words, so we can later find even by email address [email protected] ... we will need jj and gmx.net
                    if (sb.Length > 0 && sb.Length >= opt.SearchWordMinimalLength)
                    {
                        word = sb.ToString().ToLower();

                        List <string> wrds = new List <string>();
                        wrds.Add(word);
                        int i = 1;

                        if (!opt.FullTextOnly)   //If equals to 0, we store only words for full text search
                        {
                            while (word.Length - i >= opt.SearchWordMinimalLength)
                            {
                                wrds.Add(word.Substring(i));
                                i++;
                            }
                        }

                        // System.Diagnostics.Debug.WriteLine("--------------");
                        foreach (var w in wrds)
                        {
                            //System.Diagnostics.Debug.WriteLine(w);
                            if (wordsCounter.TryGetValue(w, out wordDefinition))
                            {
                                wordDefinition.CountInDocu++;
                            }
                            else
                            {
                                wordDefinition = new WordDefinition()
                                {
                                    CountInDocu = 1
                                };
                                wordsCounter[w] = wordDefinition;
                            }
                        }
                    }

                    if (sb.Length > 0)
                    {
                        sb.Remove(0, sb.Length);
                    }
                    //sb.Clear();
                };

                int wordLen = 0;
                int maximalWordLengthBeforeSplit = 50;

                foreach (var c in text)
                {
                    //No words reviews (must be checked in outer systems)
                    if (c != ' ')
                    {
                        sb.Append(c);
                        wordLen++;

                        if (wordLen >= maximalWordLengthBeforeSplit)
                        {
                            //Processing ready word
                            processWord();
                            wordLen = 0;
                        }
                    }
                    else
                    {
                        //Processing ready word
                        processWord();
                        wordLen = 0;
                    }

                    //---REVIEW ON START
                    //if (c == '-' || c == '@')   //Complex names or email address inside
                    //    continue;

                    //if (Char.IsLetterOrDigit(c) || Char.IsSymbol(c))
                    //{
                    //    sb.Append(c);
                    //    wordLen++;

                    //    if (wordLen >= maximalWordLengthBeforeSplit)
                    //    {
                    //        //Processing ready word
                    //        processWord();
                    //        wordLen = 0;
                    //    }
                    //}
                    //else
                    //{
                    //    //Processing ready word
                    //    processWord();
                    //    wordLen = 0;
                    //}
                    //---REVIEW ON STOP
                }

                //Processing last word
                processWord();

                //if (wordsCounter.Count() > 0)
                //    return wordsCounter;
            }
            catch (System.Exception ex)
            {
            }

            return(wordsCounter);
        }
Beispiel #2
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="tran"></param>
        /// <param name="tableName">Search document space/physical dbreeze table, that's why must be synchronized</param>
        /// <param name="documentId"></param>
        /// <param name="searchables"></param>
        /// <param name="opt"></param>
        /// <param name="iMode"></param>
        public void InsertDocumentText(Transaction tran, string tableName, byte[] documentId, string searchables, TextSearchStorageOptions opt, eInsertMode iMode)
        {
            //tran._transactionUnit.TransactionsCoordinator._engine.Configuration.
            if (String.IsNullOrEmpty(tableName) || documentId == null)
            {
                return;
            }

            if ((iMode == eInsertMode.Append || iMode == eInsertMode.Remove) && String.IsNullOrEmpty(searchables))
            {
                return;
            }

            SortedDictionary <string, WordDefinition> pST = this.GetWordsDefinitionFromText(searchables, opt); //flattend searchables
            StringBuilder sbPs = new StringBuilder();

            //Registering all tables for text-search in current transaction
            ITS its = null;

            if (!itbls.TryGetValue(tableName, out its))
            {
                its = new ITS()
                {
                    e2i  = tran.InsertTable <byte>(tableName, 1, 0),
                    i2e  = tran.InsertTable <byte>(tableName, 2, 0),
                    srch = tran.InsertTable <byte>(tableName, 3, 0),
                };

                its.e2i.ValuesLazyLoadingIsOn  = false;
                its.i2e.ValuesLazyLoadingIsOn  = false;
                its.srch.ValuesLazyLoadingIsOn = false;

                itbls.Add(tableName, its);
            }

            //Internal document ID
            int iId = 0;

            //Searching document by externalID
            var r1 = its.e2i.Select <byte[], int>(documentId);

            if (r1.Exists)          //DOCUMENT EXISTS
            {
                iId = r1.Value;

                //Getting old searchables for this document
                byte[]           oldSrch        = its.srch.Select <byte[], byte[]>(iId.To_4_bytes_array_BigEndian().Concat(new byte[] { 0 }), true).Value;
                HashSet <string> oldSearchables = GetSearchablesFromByteArray_AsHashSet(oldSrch); //always instantiated hashset

                switch (iMode)
                {
                case eInsertMode.Insert:
                    //Comparing
                    if (oldSearchables.Intersect(pST.Keys).Count() == oldSearchables.Count)
                    {
                        return;     //Going out, nothing to insert
                    }
                    foreach (var ps1i in pST)
                    {
                        sbPs.Append(ps1i.Key);
                        sbPs.Append(" ");
                    }
                    break;

                case eInsertMode.Append:
                case eInsertMode.Remove:

                    if ((iMode == eInsertMode.Append) && oldSearchables.Intersect(pST.Keys).Count() == oldSearchables.Count)
                    {
                        return;     //Going out, nothing to insert
                    }
                    foreach (var ew in pST.Keys)
                    {
                        if (iMode == eInsertMode.Append)
                        {
                            oldSearchables.Add(ew);
                        }
                        else
                        {
                            oldSearchables.Remove(ew);
                        }
                    }

                    foreach (var el in oldSearchables)
                    {
                        sbPs.Append(el);
                        sbPs.Append(" ");
                    }

                    break;
                }
            }
            else
            {           //DOCUMENT NEW
                if (pST.Count < 1)
                {
                    return; //Going out, nothing to insert
                }
                //Document is new
                if (iMode == eInsertMode.Append)
                {
                    iMode = eInsertMode.Insert;
                }
                else if (iMode == eInsertMode.Remove)
                {
                    return; //Going out
                }
                iId = its.i2e.Max <int, byte[]>().Key;
                iId++;

                its.e2i.Insert <byte[], int>(documentId, iId);
                its.i2e.Insert <int, byte[]>(iId, documentId);

                foreach (var ps1i in pST)
                {
                    sbPs.Append(ps1i.Key);
                    sbPs.Append(" ");
                }
            }

            this.InsertWasPerformed = true;

            //Inserting into affected table
            if (!opt.DeferredIndexing)
            {
                its.ChangedDocIds.Add(iId);
            }
            else
            {
                if (!defferedDocIds.ContainsKey(tableName))
                {
                    defferedDocIds[tableName] = new HashSet <uint>();
                }

                defferedDocIds[tableName].Add((uint)iId);
            }

            //Inserting searchables to be indexed
            its.srch.Insert <byte[], byte[]>(iId.To_4_bytes_array_BigEndian().Concat(new byte[] { 1 }), GetByteArrayFromSearchbles(sbPs.ToString()));
        }