Exemple #1
0
        /// <summary>
        ///
        /// </summary>
        internal void ComputeWordsOrigin()
        {
            if (!toComputeWordsOrigin)
            {
                return;
            }

            //this.SearchCriteriaIsNoisy = false;

            if (this.tbWords == null)
            {
                if (this._tran == null || String.IsNullOrEmpty(this._tableName))
                {
                    throw new Exception("DBreeze.TextSearch.TextSearchTable.ComputeWordsOrigin: transaction is not initialzed");
                }

                this.tbWords = this._tran.SelectTable <byte>(this._tableName, 20, 0);
                this.tbWords.ValuesLazyLoadingIsOn = false;
            }

            if (this.tbBlocks == null)
            {
                this.tbBlocks = this._tran.SelectTable <byte>(this._tableName, 10, 0);
                this.tbBlocks.ValuesLazyLoadingIsOn = false;
            }

            if (this.i2e == null)
            {
                i2e = this._tran.SelectTable <byte>(this._tableName, 2, 0);
                i2e.ValuesLazyLoadingIsOn = false;
            }

            if (this.e2i == null && (ExternalDocumentIdStart != null || ExternalDocumentIdStop != null))
            {
                e2i = this._tran.SelectTable <byte>(this._tableName, 1, 0);
                e2i.ValuesLazyLoadingIsOn = false;
            }

            ////DEBUG
            //foreach (var dbgWrd in this.tbWords.SelectForward<string,byte[]>())
            //{
            //    Console.WriteLine(dbgWrd.Key);
            //}


            TextSearchHandler.WordInDocs wid = null;
            int containsFound = 0;
            HashSet <string> startsWithEchoes = null;

            //possibly to move all RealWords to Pure
            //Resolving pure words
            foreach (var wrd in this.PureWords.Where(r => !r.Value.Processed).OrderBy(r => r.Key))
            {
                if (wrd.Value.FullMatch)
                {
                    if (this.RealWords.ContainsKey(wrd.Key))
                    {
                        continue;
                    }
                    var row2 = this.tbWords.Select <string, byte[]>(wrd.Key);
                    if (row2.Exists)
                    {
                        wid = new TextSearchHandler.WordInDocs()
                        {
                            BlockId       = row2.Value.Substring(0, 4).To_UInt32_BigEndian(),
                            NumberInBlock = row2.Value.Substring(4, 4).To_UInt32_BigEndian()
                        };

                        this.RealWords[wrd.Key] = wid;
                    }
                }
                else
                {
                    //Contains
                    containsFound    = 0;
                    startsWithEchoes = new HashSet <string>();
                    foreach (var row1 in this.tbWords.SelectForwardStartsWith <string, byte[]>(wrd.Key).Take(this.NoisyQuantity))
                    {
                        containsFound++;

                        if (wrd.Key != row1.Key)
                        {
                            startsWithEchoes.Add(row1.Key);
                        }

                        if (this.RealWords.ContainsKey(row1.Key))
                        {
                            continue;
                        }

                        wid = new TextSearchHandler.WordInDocs()
                        {
                            BlockId       = row1.Value.Substring(0, 4).To_UInt32_BigEndian(),
                            NumberInBlock = row1.Value.Substring(4, 4).To_UInt32_BigEndian()
                        };

                        this.RealWords.Add(row1.Key, wid);
                    }

                    if (startsWithEchoes.Count > 0)
                    {
                        wrd.Value.StartsWith = startsWithEchoes;
                    }

                    if (containsFound == this.NoisyQuantity)
                    {
                        this.SearchCriteriaIsNoisy = true;
                    }
                }

                wrd.Value.Processed = true;
            }

            //Getting bitmaps for the non-processed RealWords
            //Getting blocks for the returned words

            uint currentBlockId             = 0;
            Dictionary <uint, byte[]> block = null;

            byte[] btBlock = null;

            foreach (var wrd in this.RealWords.Where(r => !r.Value.Processed).OrderBy(r => r.Value.BlockId))
            {
                if (currentBlockId != wrd.Value.BlockId)
                {
                    currentBlockId = wrd.Value.BlockId;
                    block          = new Dictionary <uint, byte[]>();
                    btBlock        = this.tbBlocks.Select <uint, byte[]>(wrd.Value.BlockId).Value;
                    btBlock        = btBlock.Substring(4, btBlock.Substring(0, 4).To_Int32_BigEndian());
                    btBlock.Decode_DICT_PROTO_UINT_BYTEARRAY(block, Compression.eCompressionMethod.Gzip);
                }

                wrd.Value.wahArray  = new WABI(block[wrd.Value.NumberInBlock]).GetUncompressedByteArray();
                wrd.Value.Processed = true;
            }

            toComputeWordsOrigin = false;
        }
        public override void update()
        {
            #region Synonyms

            //Add Synonyms here

            #endregion Synonyms

            #region Triggers

            //Add Triggers here

            #endregion Triggers

            #region Functions

            foreach (CswUpdateSchemaPLSQLFunctions.Functions Function in CswUpdateSchemaPLSQLFunctions.Functions._All)
            {
                _acceptBlame(Function._Dev, Function._CaseNo);
                _CswNbtSchemaModTrnsctn.execArbitraryPlatformNeutralSql(Function.ToString());
                _resetBlame();
            }

            #endregion Functions

            #region Views

            foreach (CswUpdateSchemaPLSQLViews.Views View in CswUpdateSchemaPLSQLViews.Views._All)
            {
                _acceptBlame(View._Dev, View._CaseNo);
                _CswNbtSchemaModTrnsctn.execArbitraryPlatformNeutralSql(View.ToString());
                _resetBlame();
            }

            #endregion Views

            #region Procedures

            foreach (CswUpdateSchemaPLSQLProcedures.Procedures Procedure in CswUpdateSchemaPLSQLProcedures.Procedures._All)
            {
                _acceptBlame(Procedure._Dev, Procedure._CaseNo);
                _CswNbtSchemaModTrnsctn.execArbitraryPlatformNeutralSql(Procedure.ToString());
                _resetBlame();
            }

            #endregion Procedures

            #region Types

            //Because Nested Tables are dependant upon types, they must be dropped before they can be recompiled
            //Normally we would drop types explicitly based on the order of their dependencies,
            //but that can't be done generically. So instead, we use "force" to avoid ORA-02303.

            #region Drop Types

            foreach (CswUpdateSchemaPLSQLTypes.NestedTables NestedTable in CswUpdateSchemaPLSQLTypes.NestedTables._All)
            {
                _acceptBlame(NestedTable._Dev, NestedTable._CaseNo);
                _CswNbtSchemaModTrnsctn.execArbitraryPlatformNeutralSql(
                    @"declare
                      object_not_exists EXCEPTION;
                      PRAGMA EXCEPTION_INIT(object_not_exists, -04043);
                    begin
                      execute immediate 'drop type " + NestedTable._Title + @" force';
                    exception
                      when object_not_exists then null;
                    end;"
                    );
                _resetBlame();
            }

            foreach (CswUpdateSchemaPLSQLTypes.TypeHeaders TypeHeader in CswUpdateSchemaPLSQLTypes.TypeHeaders._All)
            {
                _acceptBlame(TypeHeader._Dev, TypeHeader._CaseNo);
                _CswNbtSchemaModTrnsctn.execArbitraryPlatformNeutralSql(
                    @"declare
                      object_not_exists EXCEPTION;
                      PRAGMA EXCEPTION_INIT(object_not_exists, -04043);
                    begin
                      execute immediate 'drop type " + TypeHeader._Title + @" force';
                    exception
                      when object_not_exists then null;
                    end;"
                    );
                _resetBlame();
            }

            #endregion Drop Types

            #region Type Headers

            foreach (CswUpdateSchemaPLSQLTypes.TypeHeaders TypeHeader in CswUpdateSchemaPLSQLTypes.TypeHeaders._All)
            {
                _acceptBlame(TypeHeader._Dev, TypeHeader._CaseNo);
                _CswNbtSchemaModTrnsctn.execArbitraryPlatformNeutralSql(TypeHeader.ToString());
                _resetBlame();
            }

            #endregion Type Headers

            #region Type Bodies

            //Add Type Bodies here

            #endregion Type Bodies

            #region Nested Tables

            foreach (CswUpdateSchemaPLSQLTypes.NestedTables NestedTable in CswUpdateSchemaPLSQLTypes.NestedTables._All)
            {
                _acceptBlame(NestedTable._Dev, NestedTable._CaseNo);
                _CswNbtSchemaModTrnsctn.execArbitraryPlatformNeutralSql(NestedTable.ToString());
                _resetBlame();
            }

            #endregion Nested Tables

            #endregion Types

            #region Package Headers

            foreach (CswUpdateSchemaPLSQLPackages.PackageHeaders PackageHead in CswUpdateSchemaPLSQLPackages.PackageHeaders._All)
            {
                _acceptBlame(PackageHead._Dev, PackageHead._CaseNo);
                _CswNbtSchemaModTrnsctn.execArbitraryPlatformNeutralSql(PackageHead.ToString());
                _resetBlame();
            }

            #endregion Package Headers

            #region Package Bodies

            foreach (CswUpdateSchemaPLSQLPackages.PackageBodies PackageBodies in CswUpdateSchemaPLSQLPackages.PackageBodies._All)
            {
                _acceptBlame(PackageBodies._Dev, PackageBodies._CaseNo);
                _CswNbtSchemaModTrnsctn.execArbitraryPlatformNeutralSql(PackageBodies.ToString());
                _resetBlame();
            }

            #endregion Package Bodies
        } //update()
Exemple #3
0
        /// <summary>
        /// 
        /// </summary>
        internal void ComputeWordsOrigin()
        {
            if (!toComputeWordsOrigin)
                return;

            //this.SearchCriteriaIsNoisy = false;

            if (this.tbWords == null)
            {
                if (this._tran == null || String.IsNullOrEmpty(this._tableName))
                    throw new Exception("DBreeze.TextSearch.TextSearchTable.ComputeWordsOrigin: transaction is not initialzed");

                this.tbWords = this._tran.SelectTable<byte>(this._tableName, 20, 0);
                this.tbWords.ValuesLazyLoadingIsOn = false;
            }

            if (this.tbBlocks == null)
            {
                this.tbBlocks = this._tran.SelectTable<byte>(this._tableName, 10, 0);
                this.tbBlocks.ValuesLazyLoadingIsOn = false;
            }

            if (this.i2e == null)
            {
                i2e = this._tran.SelectTable<byte>(this._tableName, 2, 0);
                i2e.ValuesLazyLoadingIsOn = false;
            }

            if (this.e2i == null && (ExternalDocumentIdStart != null || ExternalDocumentIdStop != null))
            {
                e2i = this._tran.SelectTable<byte>(this._tableName, 1, 0);
                e2i.ValuesLazyLoadingIsOn = false;
            }

            ////DEBUG
            //foreach (var dbgWrd in this.tbWords.SelectForward<string,byte[]>())
            //{
            //    Console.WriteLine(dbgWrd.Key);
            //}

            TextSearchHandler.WordInDocs wid = null;
            int containsFound = 0;
            HashSet<string> startsWithEchoes = null;

            //possibly to move all RealWords to Pure
            //Resolving pure words
            foreach (var wrd in this.PureWords.Where(r => !r.Value.Processed).OrderBy(r => r.Key))
            {

                if (wrd.Value.FullMatch)
                {
                    if (this.RealWords.ContainsKey(wrd.Key))
                        continue;
                    var row2 = this.tbWords.Select<string, byte[]>(wrd.Key);
                    if (row2.Exists)
                    {
                        wid = new TextSearchHandler.WordInDocs()
                        {
                            BlockId = row2.Value.Substring(0, 4).To_UInt32_BigEndian(),
                            NumberInBlock = row2.Value.Substring(4, 4).To_UInt32_BigEndian()
                        };

                        this.RealWords[wrd.Key] = wid;
                    }
                }
                else
                {
                    //Contains
                    containsFound = 0;
                    startsWithEchoes = new HashSet<string>();
                    foreach (var row1 in this.tbWords.SelectForwardStartsWith<string, byte[]>(wrd.Key).Take(this.NoisyQuantity))
                    {
                        containsFound++;

                        if (wrd.Key != row1.Key)
                            startsWithEchoes.Add(row1.Key);

                        if (this.RealWords.ContainsKey(row1.Key))
                            continue;

                        wid = new TextSearchHandler.WordInDocs()
                        {
                            BlockId = row1.Value.Substring(0, 4).To_UInt32_BigEndian(),
                            NumberInBlock = row1.Value.Substring(4, 4).To_UInt32_BigEndian()
                        };

                        this.RealWords.Add(row1.Key, wid);
                    }

                    if (startsWithEchoes.Count > 0)
                        wrd.Value.StartsWith = startsWithEchoes;

                    if (containsFound == this.NoisyQuantity)
                        this.SearchCriteriaIsNoisy = true;
                }

                wrd.Value.Processed = true;
            }

            //Getting bitmaps for the non-processed RealWords
            //Getting blocks for the returned words

            uint currentBlockId = 0;
            Dictionary<uint, byte[]> block = null;
            byte[] btBlock = null;

            foreach (var wrd in this.RealWords.Where(r => !r.Value.Processed).OrderBy(r => r.Value.BlockId))
            {
                if (currentBlockId != wrd.Value.BlockId)
                {
                    currentBlockId = wrd.Value.BlockId;
                    block = new Dictionary<uint, byte[]>();
                    btBlock = this.tbBlocks.Select<uint, byte[]>(wrd.Value.BlockId).Value;
                    btBlock = btBlock.Substring(4, btBlock.Substring(0, 4).To_Int32_BigEndian());
                    btBlock.Decode_DICT_PROTO_UINT_BYTEARRAY(block, Compression.eCompressionMethod.Gzip);
                }

                wrd.Value.wahArray = new WABI(block[wrd.Value.NumberInBlock]).GetUncompressedByteArray();
                wrd.Value.Processed = true;
            }

            toComputeWordsOrigin = false;
        }
Exemple #4
0
        /// <summary>
        /// SearchTextInDocuments
        /// </summary>
        /// <param name="tableName"></param>
        /// <param name="req"></param>
        /// <returns></returns>
        public TextSearchResponse SearchTextInDocuments(string tableName, TextSearchRequest req)
        {
            TextSearchResponse resp = new TextSearchResponse();

            //[string,byte[]] BlockId[int] + NumberInBlock[int]
            NestedTable tbWords = tran.SelectTable <byte>(tableName, 20, 0);

            tbWords.ValuesLazyLoadingIsOn = false;

            var Words = this.PrepareSearchKeyWords(req.SearchWords);

            #region "Multiple Words"

            int           j                  = -1;
            List <byte[]> foundArrays        = new List <byte[]>();
            List <byte[]> oneWordFoundArrays = new List <byte[]>();

            bool anyWordFound    = false;
            int  totalFoundWords = 0;

            Dictionary <string, WordInDocs> words = new Dictionary <string, WordInDocs>();
            int foundOrigin = 1;

            Dictionary <string, WordInDocs> perWord = new Dictionary <string, WordInDocs>();
            Dictionary <string, WordInDocs> firstHighOccuranceWord = new Dictionary <string, WordInDocs>();

            //Currently we ignore these words and do nothing with them
            List <string> highOccuranceWordParts = new List <string>();


            foreach (var word in Words.Take(tran._transactionUnit.TransactionsCoordinator._engine.Configuration.TextSearchConfig.MaxQuantityOfWordsToBeSearched)) //Maximum 10 words for search
            {
                anyWordFound    = false;
                totalFoundWords = 0;
                perWord         = new Dictionary <string, WordInDocs>();


                foreach (var row1 in tbWords.SelectForwardStartsWith <string, byte[]>(word))
                {
                    anyWordFound = true;
                    totalFoundWords++;

                    if (Words.Count() == 1 && totalFoundWords > req.Quantity)
                    {
                        //In case if only one search word, then we don't need to make any comparation
                        break;
                    }
                    else if (totalFoundWords >= req.NoisyQuantity)  //Found lots of words with such mask inside
                    {
                        //Too much found docs have this word-part inside, better to enhance search
                        if (firstHighOccuranceWord.Count() == 0)
                        {
                            //Only first HighOccurance word part come to the list. It can be used later in case if all search words are of HighOccurance (then we will visualize only this one)
                            firstHighOccuranceWord = perWord.ToDictionary(r => r.Key, r => r.Value);
                        }
                        //Clearing repack element
                        perWord.Clear();
                        //Adding word into List of High-Occurance word-part
                        highOccuranceWordParts.Add(word);
                        break;
                    }

                    perWord.Add(row1.Key, new WordInDocs()
                    {
                        BlockId       = row1.Value.Substring(0, 4).To_UInt32_BigEndian(),
                        NumberInBlock = row1.Value.Substring(4, 4).To_UInt32_BigEndian(),
                        foundOrigin   = foundOrigin
                    });
                }

                //Repacking occurances
                foreach (var pw in perWord)
                {
                    words.Add(pw.Key, pw.Value);
                }

                foundOrigin++;

                if (
                    req.SearchLogicType == TextSearchRequest.eSearchLogicType.AND
                    &&
                    !anyWordFound
                    )
                {
                    //Non of words found corresponding to AND logic
                    return(resp);
                }
            }


            if (words.Count() == 0)
            {
                //In case of multiple search words and each of them of HighOccurance.
                //We will form result only from the first HighOccurance list

                //Repacking occurances
                foreach (var pw in firstHighOccuranceWord.Take(req.Quantity))
                {
                    words.Add(pw.Key, pw.Value);
                }

                //In this case highOccuranceWordParts must be cleared, because the returning result is very approximate
                highOccuranceWordParts.Clear();
            }


            //Here we must start get data from blocks
            //Nested table with blocks
            //[uint,byte[]] where K is BlockID[uint]
            NestedTable tbBlocks = tran.SelectTable <byte>(tableName, 10, 0);
            tbBlocks.ValuesLazyLoadingIsOn = false;

            Dictionary <uint, byte[]> block = new Dictionary <uint, byte[]>();
            byte[] btBlock        = null;
            uint   currentBlockId = 0;

            //DBreeze.Diagnostic.SpeedStatistic.StartCounter("LoadBlocks");

            foreach (var wrd in words.OrderBy(r => r.Value.BlockId))
            {
                if (currentBlockId != wrd.Value.BlockId)
                {
                    currentBlockId = wrd.Value.BlockId;
                    block          = new Dictionary <uint, byte[]>();

                    //DBreeze.Diagnostic.SpeedStatistic.StartCounter("SelectBlocks");
                    btBlock = tbBlocks.Select <uint, byte[]>(wrd.Value.BlockId).Value;
                    //DBreeze.Diagnostic.SpeedStatistic.StopCounter("SelectBlocks");
                    btBlock = btBlock.Substring(4, btBlock.Substring(0, 4).To_Int32_BigEndian());
                    //DBreeze.Diagnostic.SpeedStatistic.StartCounter("DecomDeserBlocks");
                    btBlock.Decode_DICT_PROTO_UINT_BYTEARRAY(block, Compression.eCompressionMethod.Gzip);
                    // block = btBlock.DeserializeProtobuf<Dictionary<int, byte[]>>();
                    //DBreeze.Diagnostic.SpeedStatistic.StopCounter("DecomDeserBlocks");
                }

                wrd.Value.wah = new WAH2(block[wrd.Value.NumberInBlock]);
            }
            //DBreeze.Diagnostic.SpeedStatistic.PrintOut("LoadBlocks", true);
            //DBreeze.Diagnostic.SpeedStatistic.PrintOut("SelectBlocks", true);
            //DBreeze.Diagnostic.SpeedStatistic.PrintOut("DecomDeserBlocks", true);

            foundOrigin = 0;

            foreach (var wrd in words.OrderBy(r => r.Value.foundOrigin))
            {
                //Console.WriteLine(wrd.Value.foundOrigin);

                if (foundOrigin != wrd.Value.foundOrigin)
                {
                    if (oneWordFoundArrays.Count() > 0)
                    {
                        j++;
                        foundArrays.Add(WAH2.MergeAllUncompressedIntoOne(oneWordFoundArrays));
                        oneWordFoundArrays = new List <byte[]>();
                    }

                    foundOrigin = wrd.Value.foundOrigin;
                }
                else
                {
                }

                oneWordFoundArrays.Add(wrd.Value.wah.GetUncompressedByteArray());
            }

            //The last
            if (oneWordFoundArrays.Count() > 0)
            {
                j++;
                foundArrays.Add(WAH2.MergeAllUncompressedIntoOne(oneWordFoundArrays));
                oneWordFoundArrays = new List <byte[]>();
            }


            //////////  final results

            if (j >= 0)
            {
                var q = WAH2.TextSearch_OR_logic(foundArrays, req.Quantity);

                if (req.SearchLogicType == TextSearchRequest.eSearchLogicType.AND)
                {
                    q = WAH2.TextSearch_AND_logic(foundArrays).Take(req.Quantity);
                }

                //Key int, Value byte[]
                NestedTable i2e = tran.SelectTable <byte>(tableName, 2, 0);
                i2e.ValuesLazyLoadingIsOn = false;

                int qOutput = 0;
                DBreeze.DataTypes.Row <int, byte[]> docRow = null;
                foreach (var el in q)
                {
                    ////Getting document exterrnal ID
                    docRow = i2e.Select <int, byte[]>((int)el);
                    if (docRow.Exists)
                    {
                        resp.FoundDocumentIDs.Add(docRow.Value);
                    }

                    //docRow = dt.Select<int, byte[]>((int)el);
                    //if (docRow.Exists)
                    //{
                    //    if (!dmnts.ContainsKey((int)el))
                    //    {
                    //        if (highOccuranceWordParts.Count() > 0)
                    //        {
                    //            //We got some noisy word-parts of high occurance together with strongly found words.
                    //            //We must be sure that these word parts are also inside of returned docs
                    //            doc = this.RetrieveDocument(req.IncludeDocumentsContent, true, dt, docRow);
                    //            if (doc != null)
                    //            {
                    //                //Checking doc.Searchables must have all word parts from the occurance in case of AND
                    //                if (req.SearchLogicType == SearchRequest.eSearchLogicType.AND)
                    //                {
                    //                    if (String.IsNullOrEmpty(doc.Searchables))
                    //                        continue;
                    //                    if (!highOccuranceWordParts.All(doc.Searchables.ToLower().Contains))
                    //                        continue;
                    //                }

                    //                if (req.IncludeDocuments)
                    //                {
                    //                    if (!req.IncludeDocumentsSearchanbles)
                    //                        doc.Searchables = String.Empty;

                    //                    dmnts.Add((int)el, doc);
                    //                }
                    //                else
                    //                {
                    //                    dmnts.Add((int)el, null);
                    //                }

                    //            }
                    //            else
                    //                continue;
                    //        }
                    //        else
                    //        {
                    //            if (req.IncludeDocuments)
                    //            {

                    //                doc = this.RetrieveDocument(req.IncludeDocumentsContent, req.IncludeDocumentsSearchanbles, dt, docRow);
                    //                if (doc == null) //If doc is deleted, while search was in progress and we received its id in the list
                    //                    continue;

                    //                dmnts.Add((int)el, doc);
                    //            }
                    //            else
                    //            {
                    //                dmnts.Add((int)el, null);
                    //            }

                    //        }

                    //        qOutput++;
                    //    }
                    //}

                    qOutput++;

                    if (qOutput > req.Quantity)
                    {
                        break;
                    }
                }
            }
            #endregion



            return(resp);
        }