/// <summary> /// Returns null in case of notfound anything or what ever /// </summary> /// <param name="text"></param> /// <returns></returns> SortedDictionary <string, WordDefinition> GetWordsDefinitionFromText(string text, TextSearchStorageOptions opt) { SortedDictionary <string, WordDefinition> wordsCounter = new SortedDictionary <string, WordDefinition>(); try { if (String.IsNullOrEmpty(text)) { return(wordsCounter); } StringBuilder sb = new StringBuilder(); string word = ""; WordDefinition wordDefinition = null; //Support for previous versions without FullTextOnly if (!opt.FullTextOnly && opt.SearchWordMinimalLength == 0) { opt.FullTextOnly = true; opt.SearchWordMinimalLength = 2; } Action processWord = () => { //We take all words, so we can later find even by email address [email protected] ... we will need jj and gmx.net if (sb.Length > 0 && sb.Length >= opt.SearchWordMinimalLength) { word = sb.ToString().ToLower(); List <string> wrds = new List <string>(); wrds.Add(word); int i = 1; if (!opt.FullTextOnly) //If equals to 0, we store only words for full text search { while (word.Length - i >= opt.SearchWordMinimalLength) { wrds.Add(word.Substring(i)); i++; } } // System.Diagnostics.Debug.WriteLine("--------------"); foreach (var w in wrds) { //System.Diagnostics.Debug.WriteLine(w); if (wordsCounter.TryGetValue(w, out wordDefinition)) { wordDefinition.CountInDocu++; } else { wordDefinition = new WordDefinition() { CountInDocu = 1 }; wordsCounter[w] = wordDefinition; } } } if (sb.Length > 0) { sb.Remove(0, sb.Length); } //sb.Clear(); }; int wordLen = 0; int maximalWordLengthBeforeSplit = 50; foreach (var c in text) { //No words reviews (must be checked in outer systems) if (c != ' ') { sb.Append(c); wordLen++; if (wordLen >= maximalWordLengthBeforeSplit) { //Processing ready word processWord(); wordLen = 0; } } else { //Processing ready word processWord(); wordLen = 0; } //---REVIEW ON START //if (c == '-' || c == '@') //Complex names or email address inside // continue; //if (Char.IsLetterOrDigit(c) || Char.IsSymbol(c)) //{ // sb.Append(c); // wordLen++; // if (wordLen >= maximalWordLengthBeforeSplit) // { // //Processing ready word // processWord(); // wordLen = 0; // } //} //else //{ // //Processing ready word // processWord(); // wordLen = 0; //} //---REVIEW ON STOP } //Processing last word processWord(); //if (wordsCounter.Count() > 0) // return wordsCounter; } catch (System.Exception ex) { } return(wordsCounter); }
/// <summary> /// /// </summary> /// <param name="tran"></param> /// <param name="tableName">Search document space/physical dbreeze table, that's why must be synchronized</param> /// <param name="documentId"></param> /// <param name="searchables"></param> /// <param name="opt"></param> /// <param name="iMode"></param> public void InsertDocumentText(Transaction tran, string tableName, byte[] documentId, string searchables, TextSearchStorageOptions opt, eInsertMode iMode) { //tran._transactionUnit.TransactionsCoordinator._engine.Configuration. if (String.IsNullOrEmpty(tableName) || documentId == null) { return; } if ((iMode == eInsertMode.Append || iMode == eInsertMode.Remove) && String.IsNullOrEmpty(searchables)) { return; } SortedDictionary <string, WordDefinition> pST = this.GetWordsDefinitionFromText(searchables, opt); //flattend searchables StringBuilder sbPs = new StringBuilder(); //Registering all tables for text-search in current transaction ITS its = null; if (!itbls.TryGetValue(tableName, out its)) { its = new ITS() { e2i = tran.InsertTable <byte>(tableName, 1, 0), i2e = tran.InsertTable <byte>(tableName, 2, 0), srch = tran.InsertTable <byte>(tableName, 3, 0), }; its.e2i.ValuesLazyLoadingIsOn = false; its.i2e.ValuesLazyLoadingIsOn = false; its.srch.ValuesLazyLoadingIsOn = false; itbls.Add(tableName, its); } //Internal document ID int iId = 0; //Searching document by externalID var r1 = its.e2i.Select <byte[], int>(documentId); if (r1.Exists) //DOCUMENT EXISTS { iId = r1.Value; //Getting old searchables for this document byte[] oldSrch = its.srch.Select <byte[], byte[]>(iId.To_4_bytes_array_BigEndian().Concat(new byte[] { 0 }), true).Value; HashSet <string> oldSearchables = GetSearchablesFromByteArray_AsHashSet(oldSrch); //always instantiated hashset switch (iMode) { case eInsertMode.Insert: //Comparing if (oldSearchables.Intersect(pST.Keys).Count() == oldSearchables.Count) { return; //Going out, nothing to insert } foreach (var ps1i in pST) { sbPs.Append(ps1i.Key); sbPs.Append(" "); } break; case eInsertMode.Append: case eInsertMode.Remove: if ((iMode == eInsertMode.Append) && oldSearchables.Intersect(pST.Keys).Count() == oldSearchables.Count) { return; //Going out, nothing to insert } foreach (var ew in pST.Keys) { if (iMode == eInsertMode.Append) { oldSearchables.Add(ew); } else { oldSearchables.Remove(ew); } } foreach (var el in oldSearchables) { sbPs.Append(el); sbPs.Append(" "); } break; } } else { //DOCUMENT NEW if (pST.Count < 1) { return; //Going out, nothing to insert } //Document is new if (iMode == eInsertMode.Append) { iMode = eInsertMode.Insert; } else if (iMode == eInsertMode.Remove) { return; //Going out } iId = its.i2e.Max <int, byte[]>().Key; iId++; its.e2i.Insert <byte[], int>(documentId, iId); its.i2e.Insert <int, byte[]>(iId, documentId); foreach (var ps1i in pST) { sbPs.Append(ps1i.Key); sbPs.Append(" "); } } this.InsertWasPerformed = true; //Inserting into affected table if (!opt.DeferredIndexing) { its.ChangedDocIds.Add(iId); } else { if (!defferedDocIds.ContainsKey(tableName)) { defferedDocIds[tableName] = new HashSet <uint>(); } defferedDocIds[tableName].Add((uint)iId); } //Inserting searchables to be indexed its.srch.Insert <byte[], byte[]>(iId.To_4_bytes_array_BigEndian().Concat(new byte[] { 1 }), GetByteArrayFromSearchbles(sbPs.ToString())); }