C# (CSharp) SearchEngine Stemmer.stemTerm Exemples

Langage de programmation: C# (CSharp)

Espace de nommage/Pack: SearchEngine

Class/Type: Stemmer

Méthode/Fonction: stemTerm

Exemples au hotexamples.com: 2

C# (CSharp) SearchEngine Stemmer.stemTerm - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de SearchEngine.Stemmer.stemTerm extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

stemTerm(2)

add(1)

getResultBuffer(1)

getResultLength(1)

stem(1)

Méthodes fréquemment utilisées

stemTerm (2)

add (1)

getResultBuffer (1)

getResultLength (1)

stem (1)

Associées

UsuarioRepositor

RoleManage

ResponseCoinAddressTableModel

OkCancelDialogFragment

ShooterHerd

WordDoc

Base_Collider

RandomAnimation

SWRemainingTime

Related in langs

Pyrus\PackageFile\v2 (PHP)

Google_Verifier_Pem (PHP)

inputdevice_compa_clear (C++)

num_par_ids (C++)

PixbufFromFile (Go)

Timed (Go)

ServiceResponse (Java)

const (Python)

italic (Python)

Exemple #1

0

Afficher le fichier

Fichier : Parse.cs Projet : leraShtotland/SEARCH-ENGINE

//Parse a document public Dictionary <string, int> parseFile(DocumentData docData, string content) { parserList = new List <string>(); //add the data from the readFile instance docInfo[docData.Name] = docData; termsDic = new Dictionary <string, int>(); maxFrecInt = 0; string[] delimiters = { " ", "\r\n", "\n", "--" }; string word; int i = 0; //split the document's content into a string array splited = content.Split(delimiters, StringSplitOptions.RemoveEmptyEntries); string termStr = ""; //indicate if the word end with puncuation bool chopedLast = false; //indicate if the word already parsed bool parsed = false; //find match role for each term while (i < splited.Length) { parsed = false; chopedLast = false; //chop the start of the word if it contain un wanted characters word = chopStart(splited[i]); while (word != "" && toRemoveLast(word)) { chopedLast = true; word = word.Substring(0, word.Length - 1); } //delete un wanted character word = toRemoveAny(word); //if the word not conatain any word or digit it not parsed if (word == "" || !(word.Any(char.IsLetterOrDigit))) { i++; } //not parse the word "Language" ???? else if (word == "Language") { i++; } else if (splited[i] == "<F") { while (i < splited.Length - 1 && splited[i] != "</F>") { i++; } i++; } //check if the word is the article type else if (word == "Article" && splited[i + 1].StartsWith("Type")) //////check if to save the Type { i = i + 3; } else if (splited[i] == "[Text]") { i++; } else if (splited[i][0] == '<' || splited[i][splited[i].Length - 1] == '>') { i++; } //chack if the word starts a date and if so add the date to the dictionary else if (checkAndParseDate(ref i, docData.Name) == true) { } //check if the word match to the between rule else if (!parsed && !chopedLast && i < splited.Length - 3 && (word == "Between" || word == "between") && (splited[i + 2] == "And" || splited[i + 2] == "and") && (isNumber(splited[i + 1]) || isAfrac(splited[i + 1])) && (isNumberWithAfter(splited[i + 3]) || isAfracWithAfter(splited[i + 3]))) { string tmpS = splited[i + 3]; //chope the unnececery chars from the beginning of the word while (toRemoveLast(tmpS)) { tmpS = tmpS.Substring(0, tmpS.Length - 1); } termStr = "between " + parseNumber(splited[i + 1]) + " and " + parseNumber(tmpS); i = i + 4; addToDic(termStr, docData.Name); } //check if the word and those after it are contain only capital letters and store all consecutive at one term else if (!chopedLast && i < splited.Length && word.Length > 1 && (word.All(char.IsUpper))) { handleCapitalLetters(docData, ref word, ref i); } //check if the word starts with $ else if (word[0] == '$') { handleDollarAtStart(docData, ref word, ref i, ref termStr); } //check if there is an hyphen else if (word.Contains('-') && !word.Contains("--") && word[0] != '-' && word[word.Length - 1] != '-') { termStr = handleHyphen(word, ref i, ref chopedLast); //store the new term in the parser's dictionary addToDic(termStr.ToLower(), docData.Name); } //check if the wod is a fraction else if (isAfrac(word)) { termStr = word; i++; addToDic(termStr, docData.Name); } //check if the word is number else if (isNumber(word)) { termStr = handleNumber(docData, word, ref i, ref chopedLast); } //check if the word contain only letters else if (word.All(char.IsLetter)) { word = word.ToLower(); if (!stopWords.Contains(word)) { termStr = word;// + "#\n"; addToDic(termStr.ToLower(), docData.Name); } i++; } //check if the word is a stopword else if (stopWords.Contains(word.ToLower())) { i++; } //if the word didnt match any rule add it to the dictionary else if (!parsed) { termStr = word; addToDic(termStr.ToLower(), docData.Name); i++; } } //check if there is need to stem if (stemBool == true) { Dictionary <string, int> termsDicStemer = new Dictionary <string, int>(); foreach (string term in termsDic.Keys) { string stem = stemmer.stemTerm(term); if (termsDicStemer.ContainsKey(stem)) { termsDicStemer[stem] = termsDicStemer[stem] + termsDic[term]; } else { termsDicStemer.Add(stem, termsDic[term]); } } //return the dictionary after stemminfg return(termsDicStemer); } docInfo[docData.Name].max_tf = maxFrecInt; //return the terms' dictionary return(termsDic); }

Exemple #2

0

Afficher le fichier

public void strToLowerCase(string s, int fa) { string str = s.ToLower().Trim('-').TrimStart(new char[] { ' ' }).TrimEnd(' ').Replace("/", ""); if (str.Length != 0) { str = str.Trim(' '); if (stop_words.Contains(str)) { return; } double doub = 0; if (s.Contains(".") && Double.TryParse(s, out doub)) { doub = System.Math.Round(doub, 2); string stri = "" + doub; if (!termPerDoc.ContainsKey(stri)) { termPerDoc.Add(stri, new termInfo(1, fa)); } else { termPerDoc[stri].tf++; if (termPerDoc[stri].tf > maxtf) { maxtf = termPerDoc[stri].tf; maxterm = stri; } } return; } if (str.Contains("-")) { splitslash(str, fa); return; } if (prefix.ContainsKey(str)) { str = prefix[str]; } if (toStem) { if (stemmers.ContainsKey(str)) { str = stemmers[str]; } else { stemmers.Add(str, stem.stemTerm(str)); str = stemmers[str]; } } if (!termPerDoc.ContainsKey(str)) { termPerDoc.Add(str, new termInfo(1, fa)); } else { termPerDoc[str].tf++; if (termPerDoc[str].tf > maxtf) { maxtf = termPerDoc[str].tf; maxterm = str; } } } }