Пример #1
0
        private List <string> AnalyzeSentence(Sentence sentence, NgramType type)
        {
            var length        = (int)type;
            var result        = new List <string>();
            var ngramVariants = new List <NGramVariants>();

            var combinationWords = CreateCombinationsWordList(sentence.Text);

            combinationWords = _dictionary.CheckWords(combinationWords);

            ngramVariants.AddRange(CreateNgramVariantsList(sentence.Text, combinationWords, length));

            var tmp = ngramVariants.Where(x => x.NgramVariants.Count > 1).ToList();

            var ngrams = GetAllData(tmp, type).ToList();

            foreach (var variant in ngramVariants)
            {
                variant.UpdateNGramsVariants(ngrams);
                variant.RestoreUpperLettersInVariants();
                variant.CountProbability(tmp.Contains(variant));
            }

            result.AddRange(_ngramConnector.AnalyzeNgramsVariants(ngramVariants, length, sentence.Text.Count));
            result[result.Count - 1] = result[result.Count - 1] + sentence.EndMarks;

            return(result);
        }
Пример #2
0
        /// <summary>
        /// Generate query to get data from Ngrams table.
        /// </summary>
        /// <param name="ngramType">Type of NGrams.</param>
        /// <param name="wordList">List of words - must have a size suitable for the type ngrams.</param>
        /// <returns>
        /// Query string.
        /// </returns>
        /// <exception cref="ArgumentException">List(string) 'wordList' has wrong size</exception>
        /// <inheritdoc />
        public string GetTheSameNgramsFromTable(NgramType ngramType, List <string> wordList)
        {
            var number = (int)ngramType;

            if (wordList == null || wordList.Count < number)
            {
                throw new ArgumentException("List<string> 'wordList' has wrong size");
            }

            var index = GetIndexOfNames(wordList[0]);

            var query = $"SELECT * FROM `{_dbTableDbTableName[number - 1]}[{Names[index]}]` WHERE";

            for (var i = 0; i < number; ++i)
            {
                if (i != 0)
                {
                    query += " AND";
                }
                query += " Word" + (i + 1) + "='" + wordList[i].ChangeSpecialCharacters() + "'";
            }

            //query += ";";

            return(query);
        }
Пример #3
0
        private IEnumerable <NGram> GetAllData(List <NGramVariants> wordLists, NgramType type)
        {
            var ngramsList    = new List <NGram>();
            var ngramVariants = new List <NGram>();

            foreach (var item in wordLists)
            {
                foreach (var elem in item.NgramVariants)
                {
                    ngramVariants.Add(elem.Ngram);
                }
            }

            ngramVariants = ngramVariants.Distinct().ToList();
            var index = ngramVariants.FindIndex(x => x.WordsList.Contains(""));

            if (index != -1)
            {
                ngramVariants.RemoveAt(index);
            }

            _db.ConnectToDb();
            foreach (var elem in ngramVariants)
            {
                var tmp = new NGram(elem);

                if (_iManager != null)
                {
                    tmp = _iManager.Remove(tmp);
                }

                var data = _db.ExecuteSqlCommand(_queryProvider.GetTheSameNgramsFromTable(type, tmp.WordsList));
                for (var i = 0; i < data.Tables[0].Rows.Count; ++i)
                {
                    var dataRow = data.Tables[0].Rows[i].ItemArray;
                    var ngram   = StringArrayToNgram(dataRow.Select(a => a.ToString()).ToArray());
                    if (ngram == null)
                    {
                        continue;
                    }
                    var n = (NGram)ngram;
                    if (_iManager != null)
                    {
                        n = _iManager.Restore(elem, n);
                    }
                    ngramsList.Add(n);
                }
            }
            _db.Disconnect();

            return(ngramsList);
        }
        /// <inheritdoc />
        /// <summary>
        /// Generate query to get the similar ngrams from Ngrams table.
        /// </summary>
        /// <param name="ngramType">Type of the ngram.</param>
        /// <param name="wordList">List of words - must have a one size smaller than the type ngrams.</param>
        /// <returns></returns>
        /// <exception cref="T:System.ArgumentException">
        /// List(string) 'wordList' has wrong size
        /// or
        /// NgramType 'ngramType' cannot be an Unigram
        /// </exception>
        public string GetSimilarNgramsFromTable(NgramType ngramType, List <string> wordList)
        {
            var number = (int)ngramType;
            var numberComparedWords = number - 1;

            if (wordList == null || wordList.Count < numberComparedWords)
            {
                throw new ArgumentException("List<string> 'wordList' has wrong size");
            }

            var query = QueryCreator(number, numberComparedWords, wordList);

            return(query);
        }
Пример #5
0
        private string[] GetLeftContext(List<string> context, NgramType type)
        {
            if (type == NgramType.Digram && context.Count == 3)
            {
                context.RemoveAt(0);
            }

            if (type == NgramType.Unigram && context.Count == 3)
            {
                context.RemoveRange(0, 2);
            }

            if (type == NgramType.Unigram && context.Count == 2)
            {
                context.RemoveAt(0);
            }

            return context.ToArray();
        }
Пример #6
0
        // public List<string> WordsCombinations { get; private set; }
        #endregion

        #region CONSTRUCTORS

        /// <summary>
        /// Initializes a new instance of the <see cref="DiacriticMarksRestorer"/> class.
        /// </summary>
        /// <param name="diacriticAdder">The diacritic adder.</param>
        /// <param name="dictionary">The dictionary.</param>
        /// <param name="splitter"></param>
        /// <param name="iManager"></param>
        /// <param name="ngramConnector"></param>
        public DiacriticMarksRestorer(ILetterChanger diacriticAdder = null, IDictionary dictionary = null, IFragmentsSplitter splitter = null, ICharactersIgnorer iManager = null, INgramsConnector ngramConnector = null)
        {
            _diacriticAdder = diacriticAdder ?? new DiacriticMarksAdder();
            if (dictionary == null)
            {
                var logFile = File.ReadAllLines(@"Resources\dictionary");
                var logList = new List <string>(logFile);
                var result  = new Dictionary <string, int>();
                foreach (var item in logList)
                {
                    result.Add(item, 0);
                }
                dictionary = new Dict(result);
            }

            _dictionary     = dictionary;
            _splitter       = splitter;
            _ngramConnector = ngramConnector ?? new Hierarchy();
            _iManager       = iManager;
            _ngramType      = NgramType.Bigram;
        }
Пример #7
0
        /// <summary>
        /// Generate query which gets the ngrams changing the last word from table.
        /// </summary>
        /// <param name="ngramType">Type of the ngram.</param>
        /// <param name="wordList">The word list.</param>
        /// <param name="combinations">Possible last words.</param>
        /// <returns></returns>
        /// <exception cref="ArgumentException">
        /// List(string) 'wordList' has wrong size
        /// or
        /// NgramType 'ngramType' cannot be an Unigram
        /// </exception>
        public string GetMultiNgramsFromTable(NgramType ngramType, List <string> wordList, List <string> combinations)
        {
            var number = (int)ngramType;

            if (wordList == null || wordList.Count != number - 1)
            {
                throw new ArgumentException("List<string> 'wordList' has wrong size");
            }
            if (combinations == null || combinations.Count < 1)
            {
                throw new ArgumentException("List<string> 'combinations' has wrong size");
            }

            var index = GetIndexOfNames(wordList[0]);

            var query = $"SELECT * FROM `{_dbTableDbTableName[number - 1]}[{Names[index]}]` WHERE ";

            for (var i = 0; i < number - 1; ++i)
            {
                if (i != 0)
                {
                    query += "AND ";
                }
                query += "Word" + (i + 1) + "='" + wordList[i].ChangeSpecialCharacters() + "' ";
            }
            query += "AND ( ";
            for (var i = 0; i < combinations.Count; ++i)
            {
                if (i != 0)
                {
                    query += "OR ";
                }
                query += "Word" + number + "='" + combinations[i].ChangeSpecialCharacters() + "' ";
            }
            query += ");";

            return(query);
        }
        public string GetAllNecessaryNgramsFromTable(NgramType ngramType, List <List <List <string> > > wordLists)
        {
            var number = (int)ngramType;

            if (wordLists == null || wordLists.Count < 1)
            {
                throw new ArgumentException("List<string> 'wordLists' has wrong size");
            }
            foreach (var item in wordLists)
            {
                if (item.Count != number)
                {
                    throw new ArgumentException("List<string> middle list has wrong size");
                }
                foreach (var item2 in item)
                {
                    if (item2.Count < 1)
                    {
                        throw new ArgumentException("List<string> inside list has wrong size");
                    }
                }
            }


            var query = "SELECT * FROM " + _dbTableDbTableName[number - 1] + " WHERE ";

            var z = 1;

            foreach (var item1 in wordLists)
            {
                var j = 1;

                if (z != 1)
                {
                    query += " OR ";
                }

                query += "( ";
                foreach (var item2 in item1)
                {
                    if (j != 1)
                    {
                        query += " AND ";
                    }
                    query += "( ";

                    for (var i = 0; i < item2.Count; ++i)
                    {
                        if (i != 0)
                        {
                            query += "OR ";
                        }
                        query += "Word" + j + "='" + item2[i].ChangeSpecialCharacters() + "' ";
                    }

                    query += ")";
                    ++j;
                }
                query += " )";
                ++z;
            }

            query += ";";

            return(query);
        }
Пример #9
0
 internal void AddNgramCollection(NgramType type, NgramCollection collection)
 {
     ngrams.Add(type, collection);
 }
Пример #10
0
 public bool IsAvailableNgramCollection(NgramType type)
 {
     return this.ngrams.ContainsKey(type);
 }
Пример #11
0
 public NgramCollection GetNgramCollection(NgramType type)
 {
     return this.ngrams[type];
 }
Пример #12
0
        public string GetAllNecessaryNgramsFromTable(NgramType ngramType, List <List <List <string> > > wordLists)
        {
            var number = (int)ngramType;

            if (wordLists == null || wordLists.Count < 1)
            {
                throw new ArgumentException("List<string> 'wordLists' has wrong size");
            }
            foreach (var item in wordLists)
            {
                if (item.Count != number)
                {
                    throw new ArgumentException("List<string> middle list has wrong size");
                }
                foreach (var item2 in item)
                {
                    if (item2.Count < 1)
                    {
                        throw new ArgumentException("List<string> inside list has wrong size");
                    }
                }
            }

            var commandsText = new string[Names.Length];

            for (var index = 0; index < commandsText.Length; index++)
            {
                commandsText[index] = "";
            }

            foreach (var item1 in wordLists)
            {
                var index = GetIndexOfNames(item1[0][0]);
                if (commandsText[index] == "")
                {
                    commandsText[index] = $"SELECT * FROM `{_dbTableDbTableName[number - 1]}[{Names[index]}]` WHERE ";
                }
                else
                {
                    commandsText[index] += " OR ";
                }

                var j = 1;

                commandsText[index] += "( ";
                foreach (var item2 in item1)
                {
                    if (j != 1)
                    {
                        commandsText[index] += " AND ";
                    }
                    commandsText[index] += "( ";

                    for (var i = 0; i < item2.Count; ++i)
                    {
                        if (i != 0)
                        {
                            commandsText[index] += "OR ";
                        }
                        commandsText[index] += "Word" + j + "='" + item2[i].ChangeSpecialCharacters() + "' ";
                    }

                    commandsText[index] += ")";
                    ++j;
                }
                commandsText[index] += " )";
            }

            var result = "";

            for (var index = 0; index < commandsText.Length; index++)
            {
                if (commandsText[index] != "")
                {
                    commandsText[index] += " UNION ALL ";
                }
                result += commandsText[index];
            }

            return(result.Substring(0, result.Length - 11) + ";");
        }
Пример #13
0
 /// <summary>
 /// This method set a NgramType.
 /// </summary>
 /// <param name="type">The type.</param>
 /// <inheritdoc />
 public void SetNgram(NgramType type)
 {
     _ngramType = type;
 }
Пример #14
0
        private List <string> AnalyzeSentence2(Sentence sentence, NgramType type, ref List <TimeSpan> times, ref List <int> counts)
        {
            var length        = (int)type;
            var result        = new List <string>();
            var ngramVariants = new List <NGramVariants>();

            var start            = DateTime.Now;
            var combinationWords = CreateCombinationsWordList(sentence.Text);

            counts[2] += combinationWords.Count;
            var stop = DateTime.Now;

            times[1] += stop - start;

            start            = DateTime.Now;
            combinationWords = _dictionary.CheckWords(combinationWords);
            stop             = DateTime.Now;
            counts[3]       += combinationWords.Count;
            times[2]        += stop - start;

            start = DateTime.Now;
            ngramVariants.AddRange(CreateNgramVariantsList(sentence.Text, combinationWords, length));
            stop       = DateTime.Now;
            counts[4] += ngramVariants.Count;
            times[3]  += stop - start;

            start = DateTime.Now;
            var tmp = ngramVariants.Where(x => x.NgramVariants.Count > 1).ToList();

            stop       = DateTime.Now;
            counts[5] += tmp.Count;
            times[4]  += stop - start;

            foreach (var item in tmp)
            {
                counts[6] += item.NgramVariants.Count;
            }
            start = DateTime.Now;
            var ngrams = GetAllData(tmp, type).ToList();

            stop       = DateTime.Now;
            counts[7] += ngrams.Count;
            times[5]  += stop - start;

            start = DateTime.Now;
            foreach (var variant in ngramVariants)
            {
                variant.UpdateNGramsVariants(ngrams);
                variant.RestoreUpperLettersInVariants();
                variant.CountProbability(tmp.Contains(variant));
            }
            stop      = DateTime.Now;
            times[6] += stop - start;

            start = DateTime.Now;
            result.AddRange(_ngramConnector.AnalyzeNgramsVariants(ngramVariants, length, sentence.Text.Count));
            result[result.Count - 1] = result[result.Count - 1] + sentence.EndMarks;
            counts[8] += result.Count;
            stop       = DateTime.Now;
            times[7]  += stop - start;

            return(result);
        }