Пример #1
0
 /// <summary>
 /// Adds item and increase its count
 /// </summary>
 /// <param name="text">Text to be added</param>
 /// <param name="splitWithIdentification">Corresponding merge</param>
 internal void Add(string text, SplitWithIdentification splitWithIdentification)
 {
     Add(text);
     if (!_identificationDictionary.ContainsKey(text))
     {
         _identificationDictionary.Add(text, splitWithIdentification);
     }
 }
Пример #2
0
 /// <summary>
 /// Removes all merged token item and updates respective identified or unidentified
 /// <param name="tokenDictionary">Token dictionary to get result</param>
 /// </summary>
 internal void UpdateFromMergeToken(ITokenDictionary tokenDictionary)
 {
     foreach (KeyValuePair <string, List <IndexerFile> > item in _mergedTokenList)
     {
         SplitWithIdentification splitWithIdentification = tokenDictionary.GetIdentificationSplitForMergedToken(item.Key);
         foreach (IndexerFile indexerFile in item.Value)
         {
             AddIdentificationToList(splitWithIdentification, indexerFile);
         }
     }
     _mergedTokenList.Clear();
 }
Пример #3
0
        /// <summary>
        /// Updates stemmed
        /// <param name="tokenDictionary">Token dictionary to get result</param>
        /// </summary>
        internal void UpdateFromStemmed(ITokenDictionary tokenDictionary)
        {
            foreach (string key in _stemmedDictionary.Keys.ToList())
            {
                bool?  isToken;
                string stem = tokenDictionary.GetStemmedForText(key, out isToken);
                if (!isToken.HasValue)
                {
                    continue;
                }

                _stemmedDictionary[key].Word = stem;
                SplitWithIdentification splitWithIdentification = new SplitWithIdentification(stem, isToken.Value ? SplitIdentification.Token : SplitIdentification.Identified);
                foreach (IndexerFile file in _stemmedDictionary[key].IndexerFiles)
                {
                    AddIdentificationToList(splitWithIdentification, file);
                }
            }
        }
Пример #4
0
        /// <summary>
        /// Updates misspelled
        /// <param name="tokenDictionary">Token dictionary to get result</param>
        /// </summary>
        internal void UpdateFromMisspelled(ITokenDictionary tokenDictionary)
        {
            foreach (string key in _correctedDictionary.Keys.ToList())
            {
                bool?  isToken;
                string correction = tokenDictionary.GetCorrectionForMisspelled(key, out isToken);
                if (!isToken.HasValue)
                {
                    continue;
                }

                _correctedDictionary[key].Word = correction;
                SplitWithIdentification splitWithIdentification = new SplitWithIdentification(correction, isToken.Value ? SplitIdentification.Token : SplitIdentification.Identified);
                foreach (IndexerFile file in _correctedDictionary[key].IndexerFiles)
                {
                    AddIdentificationToList(splitWithIdentification, file);
                }
            }
        }
Пример #5
0
 /// <summary>
 /// Adds item to split list
 /// </summary>
 /// <param name="splitWithIdentification">Identification result to be added</param>
 public void Add(SplitWithIdentification splitWithIdentification)
 {
     _splits.Add(splitWithIdentification);
 }
Пример #6
0
        /// <summary>
        /// Adds items to respecitive category and associates it with indexer file too
        /// </summary>
        /// <param name="splitWithIdentification">Split information</param>
        /// <param name="file">File the split belongs to</param>
        private void AddIdentificationToList(SplitWithIdentification splitWithIdentification, IndexerFile file)
        {
            string splitToAdd = splitWithIdentification.Split.ToLower();

            switch (splitWithIdentification.SplitIdentification)
            {
            case SplitIdentification.Identified:
                if (_dictionaryWordList.ContainsKey(splitToAdd))
                {
                    if (!_dictionaryWordList[splitToAdd].Contains(file))
                    {
                        _dictionaryWordList[splitToAdd].Add(file);
                    }
                }
                else
                {
                    _dictionaryWordList.Add(splitToAdd, new List <IndexerFile>()
                    {
                        file
                    });
                }
                break;

            case SplitIdentification.Token:
            case SplitIdentification.SingleLetterIdentifier:
                if (_tokenList.ContainsKey(splitToAdd))
                {
                    if (!_tokenList[splitToAdd].Contains(file))
                    {
                        _tokenList[splitToAdd].Add(file);
                    }
                }
                else
                {
                    _tokenList.Add(splitToAdd, new List <IndexerFile>()
                    {
                        file
                    });
                }
                break;

            case SplitIdentification.Unidentified:
                if (_unidentifiedList.ContainsKey(splitToAdd))
                {
                    if (!_unidentifiedList[splitToAdd].Contains(file))
                    {
                        _unidentifiedList[splitToAdd].Add(file);
                    }
                }
                else
                {
                    _unidentifiedList.Add(splitToAdd, new List <IndexerFile>()
                    {
                        file
                    });
                }
                break;

            case SplitIdentification.MergedToken:
                if (_mergedTokenList.ContainsKey(splitToAdd))
                {
                    if (!_mergedTokenList[splitToAdd].Contains(file))
                    {
                        _mergedTokenList[splitToAdd].Add(file);
                    }
                }
                else
                {
                    _mergedTokenList.Add(splitToAdd, new List <IndexerFile>()
                    {
                        file
                    });
                }
                break;

            case SplitIdentification.WordMisspelled:
            case SplitIdentification.TokenMisspelled:
                if (!_correctedDictionary.ContainsKey(splitToAdd))
                {
                    _correctedDictionary.Add(splitToAdd, new WordWithFiles(null, new List <IndexerFile>()
                    {
                        file
                    }));
                }
                else
                {
                    if (!_correctedDictionary[splitToAdd].IndexerFiles.Contains(file))
                    {
                        _correctedDictionary[splitToAdd].IndexerFiles.Add(file);
                    }
                }
                break;

            case SplitIdentification.TokenStemmed:
            case SplitIdentification.WordStemmed:
                if (!_stemmedDictionary.ContainsKey(splitToAdd))
                {
                    _stemmedDictionary.Add(splitToAdd, new WordWithFiles(null, new List <IndexerFile>()
                    {
                        file
                    }));
                }
                else
                {
                    if (!_stemmedDictionary[splitToAdd].IndexerFiles.Contains(file))
                    {
                        _stemmedDictionary[splitToAdd].IndexerFiles.Add(file);
                    }
                }
                break;

            default:
                throw new NotImplementedException("SplitIdentification of type " + splitWithIdentification.SplitIdentification + " is not implemented");
            }
        }