/// <summary> /// Adds item and increase its count /// </summary> /// <param name="text">Text to be added</param> /// <param name="splitWithIdentification">Corresponding merge</param> internal void Add(string text, SplitWithIdentification splitWithIdentification) { Add(text); if (!_identificationDictionary.ContainsKey(text)) { _identificationDictionary.Add(text, splitWithIdentification); } }
/// <summary> /// Removes all merged token item and updates respective identified or unidentified /// <param name="tokenDictionary">Token dictionary to get result</param> /// </summary> internal void UpdateFromMergeToken(ITokenDictionary tokenDictionary) { foreach (KeyValuePair <string, List <IndexerFile> > item in _mergedTokenList) { SplitWithIdentification splitWithIdentification = tokenDictionary.GetIdentificationSplitForMergedToken(item.Key); foreach (IndexerFile indexerFile in item.Value) { AddIdentificationToList(splitWithIdentification, indexerFile); } } _mergedTokenList.Clear(); }
/// <summary> /// Updates stemmed /// <param name="tokenDictionary">Token dictionary to get result</param> /// </summary> internal void UpdateFromStemmed(ITokenDictionary tokenDictionary) { foreach (string key in _stemmedDictionary.Keys.ToList()) { bool? isToken; string stem = tokenDictionary.GetStemmedForText(key, out isToken); if (!isToken.HasValue) { continue; } _stemmedDictionary[key].Word = stem; SplitWithIdentification splitWithIdentification = new SplitWithIdentification(stem, isToken.Value ? SplitIdentification.Token : SplitIdentification.Identified); foreach (IndexerFile file in _stemmedDictionary[key].IndexerFiles) { AddIdentificationToList(splitWithIdentification, file); } } }
/// <summary> /// Updates misspelled /// <param name="tokenDictionary">Token dictionary to get result</param> /// </summary> internal void UpdateFromMisspelled(ITokenDictionary tokenDictionary) { foreach (string key in _correctedDictionary.Keys.ToList()) { bool? isToken; string correction = tokenDictionary.GetCorrectionForMisspelled(key, out isToken); if (!isToken.HasValue) { continue; } _correctedDictionary[key].Word = correction; SplitWithIdentification splitWithIdentification = new SplitWithIdentification(correction, isToken.Value ? SplitIdentification.Token : SplitIdentification.Identified); foreach (IndexerFile file in _correctedDictionary[key].IndexerFiles) { AddIdentificationToList(splitWithIdentification, file); } } }
/// <summary> /// Adds item to split list /// </summary> /// <param name="splitWithIdentification">Identification result to be added</param> public void Add(SplitWithIdentification splitWithIdentification) { _splits.Add(splitWithIdentification); }
/// <summary> /// Adds items to respecitive category and associates it with indexer file too /// </summary> /// <param name="splitWithIdentification">Split information</param> /// <param name="file">File the split belongs to</param> private void AddIdentificationToList(SplitWithIdentification splitWithIdentification, IndexerFile file) { string splitToAdd = splitWithIdentification.Split.ToLower(); switch (splitWithIdentification.SplitIdentification) { case SplitIdentification.Identified: if (_dictionaryWordList.ContainsKey(splitToAdd)) { if (!_dictionaryWordList[splitToAdd].Contains(file)) { _dictionaryWordList[splitToAdd].Add(file); } } else { _dictionaryWordList.Add(splitToAdd, new List <IndexerFile>() { file }); } break; case SplitIdentification.Token: case SplitIdentification.SingleLetterIdentifier: if (_tokenList.ContainsKey(splitToAdd)) { if (!_tokenList[splitToAdd].Contains(file)) { _tokenList[splitToAdd].Add(file); } } else { _tokenList.Add(splitToAdd, new List <IndexerFile>() { file }); } break; case SplitIdentification.Unidentified: if (_unidentifiedList.ContainsKey(splitToAdd)) { if (!_unidentifiedList[splitToAdd].Contains(file)) { _unidentifiedList[splitToAdd].Add(file); } } else { _unidentifiedList.Add(splitToAdd, new List <IndexerFile>() { file }); } break; case SplitIdentification.MergedToken: if (_mergedTokenList.ContainsKey(splitToAdd)) { if (!_mergedTokenList[splitToAdd].Contains(file)) { _mergedTokenList[splitToAdd].Add(file); } } else { _mergedTokenList.Add(splitToAdd, new List <IndexerFile>() { file }); } break; case SplitIdentification.WordMisspelled: case SplitIdentification.TokenMisspelled: if (!_correctedDictionary.ContainsKey(splitToAdd)) { _correctedDictionary.Add(splitToAdd, new WordWithFiles(null, new List <IndexerFile>() { file })); } else { if (!_correctedDictionary[splitToAdd].IndexerFiles.Contains(file)) { _correctedDictionary[splitToAdd].IndexerFiles.Add(file); } } break; case SplitIdentification.TokenStemmed: case SplitIdentification.WordStemmed: if (!_stemmedDictionary.ContainsKey(splitToAdd)) { _stemmedDictionary.Add(splitToAdd, new WordWithFiles(null, new List <IndexerFile>() { file })); } else { if (!_stemmedDictionary[splitToAdd].IndexerFiles.Contains(file)) { _stemmedDictionary[splitToAdd].IndexerFiles.Add(file); } } break; default: throw new NotImplementedException("SplitIdentification of type " + splitWithIdentification.SplitIdentification + " is not implemented"); } }