static void StemmAllText(ref List <string> AllWords) { for (int i = 0; i < AllWords.Count; i++) { List <string> words = AllWords[i].Split(' ').ToList <string>(); AllWords[i] = ""; for (int j = 0; j < words.Count; j++) { words[j] = Stemming.TransformingWord(words[j]); AllWords[i] += words[j] + " "; } } }
//Составление списка слов по одному public static void BoolSearch(string request) { List <string> document = new List <string>(); foreach (var docs in startDocs) { document.Add(docs); } StemmAllText(ref document); List <string> wordList = CreateWordList(ref document); DelStopWordsText(ref wordList); ListCreate(ref wordList); Dictionary <string, List <int> > invIndex = new Dictionary <string, List <int> >(); foreach (var a in wordList) { //Console.WriteLine(a); invIndex.Add(a, new List <int>()); } foreach (var a in invIndex) { for (int i = 0; i < document.Count; i++) { if (document[i].Contains(a.Key)) { a.Value.Add(i); } } } Console.WriteLine("Инвертированный индекс:"); foreach (var a in invIndex) { Console.Write(a.Key + "-"); foreach (var b in a.Value) { Console.Write(b + " "); } Console.WriteLine(); } Console.WriteLine(); Dictionary <string, List <int> > invIndexGood = new Dictionary <string, List <int> >(); List <string> requestWords = request.ToLower().Split(' ').ToList <string>(); for (int i = 0; i < requestWords.Count; i++) { requestWords[i] = Stemming.TransformingWord(requestWords[i]); } DelStopWordsText(ref requestWords); foreach (var requestWord in requestWords) { if (invIndex.ContainsKey(requestWord)) { foreach (var a in invIndex) { // Console.WriteLine(":"+a.Key+" "+requestWord+ a.Key.Length + " " + requestWord.Length); if (a.Key == requestWord) { invIndexGood.Add(a.Key, a.Value); } } } else { invIndexGood.Add(requestWord, new List <int>()); } } Console.WriteLine("Булев поиск:"); List <int> result = new List <int>(); foreach (var goodInd in invIndexGood) { foreach (var number in goodInd.Value) { int count = 0; foreach (var goodInd2 in invIndexGood) { if (goodInd2.Value.Contains(number)) { count++; } } if (count == invIndexGood.Count) { if (!result.Contains(number)) { result.Add(number); } } } } foreach (var a in result) { Console.WriteLine(startDocs[a]); } Console.WriteLine(); }
static void CalcScore(string request) { List <string> document = new List <string>(); foreach (var docs in startDocs) { document.Add(docs); } StemmAllText(ref document); List <double> scores = new List <double>(); List <string> requestWords = request.ToLower().Split(' ').ToList <string>(); for (int i = 0; i < requestWords.Count; i++) { requestWords[i] = Stemming.TransformingWord(requestWords[i]); } DelStopWordsText(ref requestWords); //single for (int i = 0; i < document.Count; i++) { scores.Add(0); foreach (var a in requestWords) { scores[i] += CountWords(a, document[i]); } } //pair for (int i = 0; i < document.Count; i++) { for (int j = 0; j < requestWords.Count - 1; j++) { scores[i] += CountWords(requestWords[j] + " " + requestWords[j + 1], document[i]); } } //AllWords for (int i = 0; i < document.Count; i++) { bool f = true; foreach (var a in requestWords) { if (!document[i].Contains(a)) { f = false; } } if (f) { scores[i]++; } } string str = ""; foreach (var a in requestWords) { str += a + " "; } for (int i = 0; i < document.Count; i++) { if (document[i].Contains(str)) { scores[i] += 1f / 350f; } } double temp; string tempStr; List <string> result = startDocs; for (int i = 0; i < scores.Count - 1; i++) { for (int j = i + 1; j < scores.Count; j++) { if (scores[i] < scores[j]) { tempStr = result[i]; result[i] = result[j]; result[j] = tempStr; temp = scores[i]; scores[i] = scores[j]; scores[j] = temp; } } } Console.WriteLine("Ранжирование с помощью Score:"); for (int i = 0; i < scores.Count; i++) { Console.WriteLine(i + 1 + "." + result[i]); } }