private static DictResult checkMultiLines(string text) { DictResult dictRPage = new DictResult(); dictRPage.similarity = 0; dictRPage.text = ""; List <DictResult> dictionaryResultList = new List <DictResult>(); string[] InputFragments = text.Split('\n'); for (int k = 0; k < InputFragments.Length; k++) { DictResult dictRWord = checkOneLine(InputFragments[k]); dictionaryResultList.Add(dictRWord); } for (int i = 0; i < dictionaryResultList.Count; i++) { if (i == dictionaryResultList.Count - 1) { dictRPage.text += dictionaryResultList[i].text; } else { dictRPage.text += dictionaryResultList[i].text + "\n"; } dictRPage.similarity += dictionaryResultList[i].similarity * (double)dictionaryResultList[i].word_count; dictRPage.word_count = +dictionaryResultList[i].word_count; } dictRPage.similarity /= (double)(dictRPage.word_count); return(dictRPage);; }
private static DictResult checkOneLine(string text) { DictResult dictRLine = new DictResult(); dictRLine.similarity = 0; dictRLine.text = ""; List <DictResult> dictionaryResultList = new List <DictResult>(); string[] InputFragments = text.Split(' '); for (int k = 0; k < InputFragments.Length; k++) { DictResult dictRWord = checkOneWord(InputFragments[k]); dictionaryResultList.Add(dictRWord); } for (int i = 0; i < dictionaryResultList.Count; i++) { if (dictionaryResultList[i].similarity > _minWordSimilarity) { if (i == dictionaryResultList.Count - 1) { dictRLine.text += dictionaryResultList[i].text; } else { dictRLine.text += dictionaryResultList[i].text + " "; } dictRLine.similarity += dictionaryResultList[i].similarity; dictRLine.word_count++; } } dictRLine.similarity /= (double)(dictRLine.word_count); return(dictRLine); }
private static DictResult checkOneLine(string text, bool f, bool b) { DictResult dictRLine = new DictResult(); dictRLine.similarity = 0; dictRLine.text = ""; List <DictResult> dictionaryResultList = new List <DictResult>(); string[] InputFragments = text.Split(' '); for (int k = 0; k < InputFragments.Length; k++) { bool front = false, back = false; if (k == 0 && f) { front = true; } else if (k == InputFragments.Length - 1 && b) { back = true; } DictResult dictRWord = checkOneWord(InputFragments[k], front, back); dictionaryResultList.Add(dictRWord); } for (int i = 0; i < dictionaryResultList.Count; i++) { if (dictionaryResultList[i].similarity > _minWordSimilarity) { if (i == dictionaryResultList.Count - 1) { dictRLine.text += dictionaryResultList[i].text; } else { dictRLine.text += dictionaryResultList[i].text + " "; } dictRLine.similarity += dictionaryResultList[i].similarity; dictRLine.word_count++; } } dictRLine.similarity /= (double)(dictRLine.word_count); dictRLine.text.Trim(); return(dictRLine); }
private static DictResult checkMultiLines(string text, bool front, bool back) { DictResult dictRPage = new DictResult(); dictRPage.similarity = 0; dictRPage.text = ""; List <DictResult> dictionaryResultList = new List <DictResult>(); string[] InputFragments = text.Split('\n'); for (int k = 0; k < InputFragments.Length; k++) { bool tfront = false, tback = false; if (k == 0 && front) { tfront = true; } else if (k == InputFragments.Length - 1 && back) { tback = true; } DictResult dictRWord = checkOneLine(InputFragments[k], tfront, tback); dictionaryResultList.Add(dictRWord); } for (int i = 0; i < dictionaryResultList.Count; i++) { if (i == dictionaryResultList.Count - 1) { dictRPage.text += dictionaryResultList[i].text; } else { dictRPage.text += dictionaryResultList[i].text + "\n"; } dictRPage.similarity += dictionaryResultList[i].similarity * (double)dictionaryResultList[i].word_count; dictRPage.word_count = +dictionaryResultList[i].word_count; } dictRPage.similarity /= (double)(dictRPage.word_count); dictRPage.text.Trim(); return(dictRPage);; }
private static DictResult checkOneWord(string text) { double maxSimilarity = 0; DictResult dictR = new DictResult(); List <string> equalMinDistanceDictWordList = new List <string>(); if (text.Length == _dictionaryExactMatchStringLength)//short strings are looking for the exact match { maxSimilarity = findSimilarDictionaryWord(text, maxSimilarity, 0, equalMinDistanceDictWordList); if (maxSimilarity != 1) { dictR.similarity = 0; dictR.text = ""; } else { dictR.similarity = maxSimilarity; dictR.text = equalMinDistanceDictWordList[0]; } } else { for (int m = 0; m < _maxWordLength; m++) { maxSimilarity = findSimilarDictionaryWord(text, maxSimilarity, m, equalMinDistanceDictWordList); } if (maxSimilarity < _minWordSimilarity) //dictionary word not found (most similar is 1) hill vs hall = 0.333333 { dictR.similarity = 0; dictR.text = ""; } else { dictR.similarity = maxSimilarity; dictR.text = equalMinDistanceDictWordList[0]; } } return(dictR); }
private static DictResult checkOneWord(string text, bool front, bool back) { double maxSimilarity = 0; DictResult dictR = new DictResult(); List<string> equalMinDistanceDictWordList = new List<string>(); var checkExact = client.Search<Frequency>(q => q .From(0) .Size(100) .Index("frequency") .Type("frequency") .Query(fq => fq .Filtered(fqq => fqq .Query(qq => qq.MatchAll()) .Filter(ff => ff .Bool(b => b .Must(m1 => m1.Term("word_name", text.ToLower())) ) ) ) ) ); if (text.Length < _dictionaryExactMatchStringLength) { dictR.similarity = 0; dictR.text = ""; } else if (text.Length == _dictionaryExactMatchStringLength)//short strings are looking for the exact match { //maxSimilarity = findSimilarDictionaryWord(text, maxSimilarity, 0, equalMinDistanceDictWordList, true); if (checkExact.Documents.Count()==0) { dictR.similarity = 0; dictR.text = ""; } else { dictR.similarity = maxSimilarity; dictR.text = text;//NeedlemanWunschTiebreaker(equalMinDistanceDictWordList, text); } } else if (checkExact.Documents.Count() > 0) dictR.text = text; else { string temp = text.ToLower(); int fuzziness = 1; temp = temp + " "; fuzziness += 2; //fuzziness+=20; if(!(char.IsLower(text[1])&&char.IsUpper(text[0]))) { temp = " " + temp; fuzziness += 2; } if (text.Length > 3) fuzziness++; if (text.Length > 4) fuzziness++; //if (text.Length > 5) // fuzziness++; double minsim = 1- (double) fuzziness/(double) temp.Length-.001; var searchResults = client.Search<Frequency>(s => s .From(0) .Size(10000) .Index("frequency") .Type("frequency") .Query(q => q .FuzzyLikeThis(fz => fz.OnFields(w=>w.word_name).LikeText(temp).MaxQueryTerms(20000).MinimumSimilarity(minsim)) ) ); Log.WriteLine("EDIT DISTANCE: " + fuzziness+". MINSIM:" + minsim); //dictR.similarity = maxSimilarity; //Log.WriteLine("--------------------------------------------------------------------------------------"+text); //equalMinDistanceDictWordList = searchResults.Documents.ToList(); var wordList = searchResults.Documents; if (wordList.Count() == 1) { dictR.text = wordList.ToList()[0].word_name;// NeedlemanWunschTiebreaker(equalMinDistanceDictWordList, text); //Log.WriteLine("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" + dictR.text); } else dictR.text = NeedlemanWunschTiebreaker(wordList, text, front, back); //Log.WriteLine("--------------------------------------------------------------------------------------"); } dictR.text=dictR.text.Trim('\n','\r',' '); return dictR; }
private static DictResult checkOneLine(string text, bool f, bool b) { DictResult dictRLine = new DictResult(); dictRLine.similarity = 0; dictRLine.text = ""; List<DictResult> dictionaryResultList = new List<DictResult>(); string[] InputFragments = text.Split(' '); for (int k = 0; k < InputFragments.Length; k++) { bool front = false, back = false; if (k == 0 && f) front = true; else if (k == InputFragments.Length - 1 && b) back = true; DictResult dictRWord = checkOneWord(InputFragments[k], front, back); dictionaryResultList.Add(dictRWord); } for (int i = 0; i < dictionaryResultList.Count; i++) { if (dictionaryResultList[i].similarity > _minWordSimilarity) { if (i == dictionaryResultList.Count - 1) dictRLine.text += dictionaryResultList[i].text; else dictRLine.text += dictionaryResultList[i].text + " "; dictRLine.similarity += dictionaryResultList[i].similarity; dictRLine.word_count++; } } dictRLine.similarity /= (double)(dictRLine.word_count); dictRLine.text.Trim(); return dictRLine; }
private static DictResult checkMultiLines(string text, bool front, bool back) { DictResult dictRPage = new DictResult(); dictRPage.similarity = 0; dictRPage.text = ""; List<DictResult> dictionaryResultList = new List<DictResult>(); string[] InputFragments = text.Split('\n'); for (int k = 0; k < InputFragments.Length; k++) { bool tfront = false, tback = false; if(k==0&&front) tfront = true; else if (k == InputFragments.Length-1 && back) tback= true; DictResult dictRWord = checkOneLine(InputFragments[k], tfront, tback); dictionaryResultList.Add(dictRWord); } for (int i = 0; i < dictionaryResultList.Count; i++) { if (i == dictionaryResultList.Count - 1) dictRPage.text += dictionaryResultList[i].text; else dictRPage.text += dictionaryResultList[i].text + "\n"; dictRPage.similarity += dictionaryResultList[i].similarity * (double)dictionaryResultList[i].word_count; dictRPage.word_count = +dictionaryResultList[i].word_count; } dictRPage.similarity /= (double)(dictRPage.word_count); dictRPage.text.Trim(); return dictRPage; ; }
private static DictResult checkOneWord(string text) { double maxSimilarity = 0; DictResult dictR = new DictResult(); List<string> equalMinDistanceDictWordList = new List<string>(); if (text.Length == _dictionaryExactMatchStringLength)//short strings are looking for the exact match { maxSimilarity = findSimilarDictionaryWord(text, maxSimilarity, 0, equalMinDistanceDictWordList); if (maxSimilarity != 1) { dictR.similarity = 0; dictR.text = ""; } else { dictR.similarity = maxSimilarity; dictR.text = equalMinDistanceDictWordList[0]; } } else { for (int m = 0; m < _maxWordLength; m++) maxSimilarity = findSimilarDictionaryWord(text, maxSimilarity, m, equalMinDistanceDictWordList); if (maxSimilarity < _minWordSimilarity) //dictionary word not found (most similar is 1) hill vs hall = 0.333333 { dictR.similarity = 0; dictR.text = ""; } else { dictR.similarity = maxSimilarity; dictR.text = equalMinDistanceDictWordList[0]; } } return dictR; }
private static DictResult checkOneLine(string text) { DictResult dictRLine = new DictResult(); dictRLine.similarity = 0; dictRLine.text = ""; List<DictResult> dictionaryResultList = new List<DictResult>(); string[] InputFragments = text.Split(' '); for (int k = 0; k < InputFragments.Length; k++) { DictResult dictRWord = checkOneWord(InputFragments[k]); dictionaryResultList.Add(dictRWord); } for (int i = 0; i < dictionaryResultList.Count; i++) { if (dictionaryResultList[i].similarity > _minWordSimilarity) { if (i == dictionaryResultList.Count - 1) dictRLine.text += dictionaryResultList[i].text; else dictRLine.text += dictionaryResultList[i].text + " "; dictRLine.similarity += dictionaryResultList[i].similarity; dictRLine.word_count++; } } dictRLine.similarity /= (double)(dictRLine.word_count); return dictRLine; }
private static DictResult checkMultiLines(string text) { DictResult dictRPage = new DictResult(); dictRPage.similarity = 0; dictRPage.text = ""; List<DictResult> dictionaryResultList = new List<DictResult>(); string[] InputFragments = text.Split('\n'); for (int k = 0; k < InputFragments.Length; k++) { DictResult dictRWord = checkOneLine(InputFragments[k]); dictionaryResultList.Add(dictRWord); } for (int i = 0; i < dictionaryResultList.Count; i++) { if (i == dictionaryResultList.Count - 1) dictRPage.text += dictionaryResultList[i].text; else dictRPage.text += dictionaryResultList[i].text + "\n"; dictRPage.similarity += dictionaryResultList[i].similarity * (double)dictionaryResultList[i].word_count; dictRPage.word_count = +dictionaryResultList[i].word_count; } dictRPage.similarity /= (double)(dictRPage.word_count); return dictRPage; ; }
private static DictResult checkOneWord(string text, bool front, bool back) { double maxSimilarity = 0; DictResult dictR = new DictResult(); List <string> equalMinDistanceDictWordList = new List <string>(); var checkExact = client.Search <Frequency>(q => q .From(0) .Size(100) .Index("frequency") .Type("frequency") .Query(fq => fq .Filtered(fqq => fqq .Query(qq => qq.MatchAll()) .Filter(ff => ff .Bool(b => b .Must(m1 => m1.Term("word_name", text.ToLower())) ) ) ) ) ); if (text.Length < _dictionaryExactMatchStringLength) { dictR.similarity = 0; dictR.text = ""; } else if (text.Length == _dictionaryExactMatchStringLength)//short strings are looking for the exact match { //maxSimilarity = findSimilarDictionaryWord(text, maxSimilarity, 0, equalMinDistanceDictWordList, true); if (checkExact.Documents.Count() == 0) { dictR.similarity = 0; dictR.text = ""; } else { dictR.similarity = maxSimilarity; dictR.text = text;//NeedlemanWunschTiebreaker(equalMinDistanceDictWordList, text); } } else if (checkExact.Documents.Count() > 0) { dictR.text = text; } else { string temp = text.ToLower(); int fuzziness = 1; temp = temp + " "; fuzziness += 2; //fuzziness+=20; if (!(char.IsLower(text[1]) && char.IsUpper(text[0]))) { temp = " " + temp; fuzziness += 2; } if (text.Length > 3) { fuzziness++; } if (text.Length > 4) { fuzziness++; } //if (text.Length > 5) // fuzziness++; double minsim = 1 - (double)fuzziness / (double)temp.Length - .001; var searchResults = client.Search <Frequency>(s => s .From(0) .Size(10000) .Index("frequency") .Type("frequency") .Query(q => q .FuzzyLikeThis(fz => fz.OnFields(w => w.word_name).LikeText(temp).MaxQueryTerms(20000).MinimumSimilarity(minsim)) ) ); Log.WriteLine("EDIT DISTANCE: " + fuzziness + ". MINSIM:" + minsim); //dictR.similarity = maxSimilarity; //Log.WriteLine("--------------------------------------------------------------------------------------"+text); //equalMinDistanceDictWordList = searchResults.Documents.ToList(); var wordList = searchResults.Documents; if (wordList.Count() == 1) { dictR.text = wordList.ToList()[0].word_name; // NeedlemanWunschTiebreaker(equalMinDistanceDictWordList, text); //Log.WriteLine("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" + dictR.text); } else { dictR.text = NeedlemanWunschTiebreaker(wordList, text, front, back); } //Log.WriteLine("--------------------------------------------------------------------------------------"); } dictR.text = dictR.text.Trim('\n', '\r', ' '); return(dictR); }