public static WordInfo Compine(ArrayList morphinfos) { WordInfo res = new WordInfo(); res.senseCounts = new int[5]; res.texts = new ArrayList(); foreach (WordInfo morph in morphinfos) { for (int i = 1; i < 5; i++) { res.senseCounts[i] += morph.senseCounts[i]; } if (!res.texts.Contains(morph.texts[0])) { res.texts.Add(morph.texts[0]); } } return(res); }
private static WordInfo GetMorphInfo(ArrayList morphinfos, string morph) { // Attempt to find the morph string in the list. // NOTE: Since the list should never get very large, a selection search will work just fine foreach (WordInfo morphinfo in morphinfos) { if (morphinfo.text == morph) { return(morphinfo); } } // if not found, create a new one WordInfo wordinfo = new WordInfo(); wordinfo.text = morph; wordinfo.SenseCounts = new int[Enums.Length]; return((WordInfo)morphinfos[morphinfos.Add(wordinfo)]); }
public static WordInfo Compine(WordInfo word, WordInfo morph) { if (word.Strength == 0) { return(morph); } if (morph.Strength == 0) { return(word); } WordInfo result = new WordInfo(); result.texts = new ArrayList(); result.texts.AddRange(morph.texts); if (!result.texts.Contains(word.texts[0])) { result.texts.Add(word.texts[0]); } int MaxLen = Math.Max(word.senseCounts.Length, morph.senseCounts.Length); result.senseCounts = new int[MaxLen]; for (int i = 0; i < MaxLen; i++) { result.senseCounts[i] = word.senseCounts[i] + morph.senseCounts[i]; } Wnlib.PartsOfSpeech[] enums = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech)); int MaxCount = 0; result.partOfSpeech = Wnlib.PartsOfSpeech.Unknown; for (int i = 0; i < enums.Length; i++) { if (result.senseCounts[i] > MaxCount) { MaxCount = result.senseCounts[i]; result.partOfSpeech = enums[i]; } } return(result); }
private static WordInfo getMorphInfo(ArrayList morphinfos, string morph) { // Attempt to find the morph string in the list. // NOTE: Since the list should never get very large, a selection search will work just fine foreach (WordInfo morphinfo in morphinfos) if (morphinfo.text == morph) return morphinfo; // if not found, create a new one WordInfo wordinfo = new WordInfo(); wordinfo.text = morph; wordinfo.senseCounts = new int[enums.Length]; return (WordInfo)morphinfos[morphinfos.Add(wordinfo)]; }
private static WordInfo lookupWordMorphs(string word) { // OVERVIEW: This functions only gets called when the word was not found with // an exact match. So, enumerate all the parts of speech, then enumerate // all of the word's morphs in each category. Perform a lookup on each // morph and save the morph/strength/part-of-speech data sets. Finally, // loop over all the data sets and then pick the strongest one. ArrayList wordinfos = new ArrayList(); // for each part of speech... for (int i = 0; i < enums.Length; i++) { // get a valid part of speech Wnlib.PartsOfSpeech pos = enums[i]; if (pos == Wnlib.PartsOfSpeech.Unknown) continue; // generate morph list Wnlib.MorphStr morphs = new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of(pos)); string morph = ""; while ((morph = morphs.next()) != null) { // get an index to a synset collection Wnlib.Index index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos)); // none found? if (index == null) continue; // save the wordinfo WordInfo wordinfo = getMorphInfo(wordinfos, morph); wordinfo.senseCounts[i] = index.sense_cnt; } } // search the wordinfo list for the best match WordInfo bestWordInfo = new WordInfo(); int maxStrength = 0; foreach (WordInfo wordinfo in wordinfos) { // for each part of speech... int maxSenseCount = 0; int strength = 0; for (int i = 0; i < enums.Length; i++) { // get a valid part of speech Wnlib.PartsOfSpeech pos = enums[i]; if (pos == Wnlib.PartsOfSpeech.Unknown) continue; // determine part of speech and strength strength += wordinfo.senseCounts[i]; if (wordinfo.senseCounts[i] > maxSenseCount) { maxSenseCount = wordinfo.senseCounts[i]; wordinfo.partOfSpeech = pos; } } // best match? if (strength > maxStrength) { maxStrength = strength; bestWordInfo = wordinfo; } } return bestWordInfo; }
private static WordInfo lookupWord(string word) { // OVERVIEW: For each part of speech, look for the word. // Compare relative strengths of the synsets in each category // to determine the most probable part of speech. // // PROBLEM: Word definitions are often context-based. It would be better // to find a way to search in-context in stead of just singling // out an individual word. // // SOLUTION: Modify FindPartOfSpeech to include a second argument, string // context. The pass the entire sentence as the context for part // of speech determination. // // PROBLEM: That's difficult to do so I'm going to keep this simple for now. int maxCount = 0; WordInfo wordinfo = new WordInfo(); wordinfo.partOfSpeech = Wnlib.PartsOfSpeech.Unknown; // for each part of speech... Wnlib.PartsOfSpeech[] enums = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech)); wordinfo.senseCounts = new int[enums.Length]; for (int i = 0; i < enums.Length; i++) { // get a valid part of speech Wnlib.PartsOfSpeech pos = enums[i]; if (pos == Wnlib.PartsOfSpeech.Unknown) continue; // get an index to a synset collection Wnlib.Index index = Wnlib.Index.lookup(word, Wnlib.PartOfSpeech.of(pos)); // none found? if (index == null) continue; // does this part of speech have a higher sense count? wordinfo.senseCounts[i] = index.sense_cnt; if (wordinfo.senseCounts[i] > maxCount) { maxCount = wordinfo.senseCounts[i]; wordinfo.partOfSpeech = pos; } } return wordinfo; }
private static WordInfo LookupWordMorphs(string word) { // OVERVIEW: This functions only gets called when the word was not found with // an exact match. So, enumerate all the parts of speech, then enumerate // all of the word's morphs in each category. Perform a lookup on each // morph and save the morph/strength/part-of-speech data sets. Finally, // loop over all the data sets and then pick the strongest one. ArrayList wordinfos = new ArrayList(); // for each part of speech... for (int i = 0; i < Enums.Length; i++) { // get a valid part of speech Wnlib.PartsOfSpeech pos = Enums[i]; if (pos == Wnlib.PartsOfSpeech.Unknown) { continue; } // generate morph list Wnlib.MorphStr morphs = new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of(pos)); string morph = ""; while ((morph = morphs.next()) != null) { // get an index to a synset collection Wnlib.Index index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos)); // none found? if (index == null) { continue; } // save the wordinfo WordInfo wordinfo = GetMorphInfo(wordinfos, morph); wordinfo.SenseCounts[i] = index.SenseCnt; } } // search the wordinfo list for the best match WordInfo bestWordInfo = new WordInfo(); int maxStrength = 0; foreach (WordInfo wordinfo in wordinfos) { // for each part of speech... int maxSenseCount = 0; int strength = 0; for (int i = 0; i < Enums.Length; i++) { // get a valid part of speech Wnlib.PartsOfSpeech pos = Enums[i]; if (pos == Wnlib.PartsOfSpeech.Unknown) { continue; } // determine part of speech and strength strength += wordinfo.SenseCounts[i]; if (wordinfo.SenseCounts[i] > maxSenseCount) { maxSenseCount = wordinfo.SenseCounts[i]; wordinfo.partOfSpeech = pos; } } // best match? if (strength > maxStrength) { maxStrength = strength; bestWordInfo = wordinfo; } } return(bestWordInfo); }
public static string findadjinfo(string word, ArrayList texts) { foreach (string s in texts) { if (Adj.Contains(s.ToUpper())) { ArrayList Adjects = (ArrayList)Adj[s.ToUpper()]; if (Adjects.Count == 1) { return("PADJ"); } for (int i = 0; i < Adjects.Count; i++) { foreach (string r in ((string)Adjects[i]).Split(',')) { if (word == r) { switch (i) { case 0: return("PADJ"); case 1: return("CADJ"); case 2: return("SADJ"); } } } } } else if (word == s.ToUpper() + "ER") { return("CADJ"); } else if (word == s.ToUpper() + "EST") { return("SADJ"); } else if (s.ToUpper().EndsWith("E") && (word == s.ToUpper().Substring(0, s.Length - 1) + "ER") || word == s.ToUpper().Substring(0, s.Length - 1) + "EST") { if (word == s.ToUpper().Substring(0, s.Length - 1) + "ER") { return("CADJ"); } if (word == s.ToUpper().Substring(0, s.Length - 1) + "EST") { return("SADJ"); } } else { WordInfo wi = FindWordInfo(word, tagged_only, false); if (wi.senseCounts[3] > 0) { return("PADJ"); } } } return(null); }
private static ArrayList findverbinfo(string word, ArrayList texts) { ArrayList verbinfo = new ArrayList(); foreach (string s in texts) { if (Verbs.Contains(s.ToUpper())) { ArrayList verbs = (ArrayList)Verbs[s.ToUpper()]; for (int i = 0; i < verbs.Count; i++) { foreach (string r in ((string)verbs[i]).Split(',')) { if (word == r) { switch (i) { case 0: if (!verbinfo.Contains("VINF")) { verbinfo.Add("VINF"); } break; case 1: if (!verbinfo.Contains("V")) { verbinfo.Add("V"); } break; case 2: if (!verbinfo.Contains("VPSP")) { verbinfo.Add("VPSP"); } break; case 3: if (!verbinfo.Contains("VING")) { verbinfo.Add("VING"); } break; case 4: if (!verbinfo.Contains("V")) { verbinfo.Add("V"); } break; } } } } if (word == s.ToUpper() + "ING") { if (!verbinfo.Contains("VING")) { verbinfo.Add("VING"); } } if (s.ToUpper().EndsWith("E") && (word == s.ToUpper().Substring(0, s.Length - 1) + "ING" || word == s.ToUpper().Substring(0, s.Length - 1) + "ES")) { if (word == s.ToUpper().Substring(0, s.Length - 1) + "ING") { if (!verbinfo.Contains("VING")) { verbinfo.Add("VING"); } else if (word == s.ToUpper().Substring(0, s.Length - 1) + "ES") { if (!verbinfo.Contains("V")) { verbinfo.Add("V"); } } } } if (word == s.ToUpper() + "S" || word == s.ToUpper() + "ES") { if (!verbinfo.Contains("V")) { verbinfo.Add("V"); } } if (s.ToUpper().EndsWith("Y")) { if (word == s.ToUpper().Substring(0, s.Length - 1) + "IES") { if (!verbinfo.Contains("V")) { verbinfo.Add("V"); } } } } if (word == s.ToUpper() + "ED") { if (!verbinfo.Contains("VPSP")) { verbinfo.Add("VPSP"); } } else if (word == s.ToUpper() + "ING") { if (!verbinfo.Contains("VING")) { verbinfo.Add("VING"); } } else if (word == s.ToUpper() + "S" || word == s.ToUpper() + "ES") { if (!verbinfo.Contains("V")) { verbinfo.Add("V"); } } else if (s.ToUpper().EndsWith("E") && (word == s.ToUpper().Substring(0, s.Length - 1) + "ING" || word == s.ToUpper().Substring(0, s.Length - 1) + "ES" || word == s.ToUpper().Substring(0, s.Length - 1) + "ED")) { if (word == s.ToUpper().Substring(0, s.Length - 1) + "ING") { if (!verbinfo.Contains("VING")) { verbinfo.Add("VING"); } } else if (word == s.ToUpper().Substring(0, s.Length - 1) + "ES") { if (!verbinfo.Contains("V")) { verbinfo.Add("V"); } } else if (word == s.ToUpper().Substring(0, s.Length - 1) + "ED") { if (!verbinfo.Contains("VPSP")) { verbinfo.Add("VPSP"); } } } else if (s.ToUpper().EndsWith("Y") && word == s.ToUpper().Substring(0, s.Length - 1) + "IES") { if (word == s.ToUpper().Substring(0, s.Length - 1) + "IES") { if (!verbinfo.Contains("V")) { verbinfo.Add("V"); } } } else { WordInfo wi = FindWordInfo(word, tagged_only, false); if (wi.senseCounts[2] > 0 && !verbinfo.Contains("VINF")) { verbinfo.Add("VINF"); } } } if (!verbinfo.Contains("V") && (verbinfo.Contains("VINF") || verbinfo.Contains("VPSP"))) { verbinfo.Add("V"); } return(verbinfo); }
public static bool IsPlural(string Word) { foreach (string s in Noun.Keys) { if (s.ToUpper() == Word) { return(true); } } /* * "s", "ses", "xes", "zes", "ches", "shes", "men", "ies",*/ /*"", "s", "x", "z", "ch", "sh", "man", "y",*/ if (Word.EndsWith("CHES")) { Word = Word.Remove(Word.Length - 4, 4); Word += "CH"; } else if (Word.EndsWith("SHES")) { Word = Word.Remove(Word.Length - 4, 4); Word += "SH"; } else if (Word.EndsWith("SES")) { Word = Word.Remove(Word.Length - 3, 3); Word += "S"; } else if (Word.EndsWith("XES")) { Word = Word.Remove(Word.Length - 3, 3); Word += "X"; } else if (Word.EndsWith("ZES")) { Word = Word.Remove(Word.Length - 3, 3); Word += "Z"; } else if (Word.EndsWith("MEN")) { Word = Word.Remove(Word.Length - 3, 3); Word += "MAN"; } else if (Word.EndsWith("IES")) { Word = Word.Remove(Word.Length - 3, 3); Word += "Y"; } else if (Word.EndsWith("S")) { Word = Word.Remove(Word.Length - 1, 1); } else { return(false); } WordInfo wi = FindWordInfo(Word, tagged_only, false); if (wi.senseCounts[1] > 0) { return(true); } else { return(false); } }
private static WordInfo lookupWordMorphs(string word, bool tagged_only) { // OVERVIEW: This functions only gets called when the word was not found with // an exact match. So, enumerate all the parts of speech, then enumerate // all of the word's morphs in each category. Perform a lookup on each // morph and save the morph/strength/part-of-speech data sets. Finally, // loop over all the data sets and then pick the strongest one. ArrayList wordinfos = new ArrayList(); // for each part of speech... for (int i = 0; i < enums.Length; i++) { // get a valid part of speech Wnlib.PartsOfSpeech pos = enums[i]; if (pos == Wnlib.PartsOfSpeech.Unknown) { continue; } // generate morph list Wnlib.MorphStr morphs = new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of(pos)); string morph = ""; while ((morph = morphs.next()) != null) { // get an index to a synset collection Wnlib.Index index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos)); // none found? if (index == null) { continue; } // none tagged if (tagged_only && index.tagsense_cnt == 0) { continue; } // save the wordinfo WordInfo wordinfo = getMorphInfo(wordinfos, morph); if (tagged_only) { wordinfo.senseCounts[i] = index.tagsense_cnt; } else { wordinfo.senseCounts[i] = index.sense_cnt; } } } return(WordInfo.Compine(wordinfos)); /* * // search the wordinfo list for the best match * WordInfo bestWordInfo = new WordInfo(); * int maxStrength = 0; * foreach( WordInfo wordinfo in wordinfos ) * { * // for each part of speech... * int maxSenseCount = 0; * int strength = 0; * for( int i=0; i<enums.Length; i++ ) * { * // get a valid part of speech * Wnlib.PartsOfSpeech pos = enums[i]; * if( pos == Wnlib.PartsOfSpeech.Unknown ) * continue; * * // determine part of speech and strength * strength += wordinfo.senseCounts[i]; * if( wordinfo.senseCounts[i] > maxSenseCount ) * { * maxSenseCount = wordinfo.senseCounts[i]; * wordinfo.partOfSpeech = pos; * } * } * * // best match? * if( strength > maxStrength ) * { * maxStrength = strength; * bestWordInfo = wordinfo; * } * } * * return bestWordInfo; */ }
private static WordInfo lookupWord(string word, bool tagged_only) { // OVERVIEW: For each part of speech, look for the word. // Compare relative strengths of the synsets in each category // to determine the most probable part of speech. // // PROBLEM: Word definitions are often context-based. It would be better // to find a way to search in-context in stead of just singling // out an individual word. // // SOLUTION: Modify FindPartOfSpeech to include a second argument, string // context. The pass the entire sentence as the context for part // of speech determination. // // PROBLEM: That's difficult to do so I'm going to keep this simple for now. int maxCount = 0; WordInfo wordinfo = new WordInfo(); wordinfo.partOfSpeech = Wnlib.PartsOfSpeech.Unknown; wordinfo.texts = new ArrayList(); wordinfo.texts.Add(word); // for each part of speech... Wnlib.PartsOfSpeech[] enums = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech)); wordinfo.senseCounts = new int[enums.Length]; for (int i = 0; i < enums.Length; i++) { // get a valid part of speech Wnlib.PartsOfSpeech pos = enums[i]; if (pos == Wnlib.PartsOfSpeech.Unknown) { continue; } // get an index to a synset collection Wnlib.Index index = Wnlib.Index.lookup(word, Wnlib.PartOfSpeech.of(pos)); // none found? if (index == null) { continue; } // none tagged if (tagged_only && index.tagsense_cnt == 0) { continue; } // does this part of speech have a higher sense count? if (tagged_only) { wordinfo.senseCounts[i] = index.tagsense_cnt; } else { wordinfo.senseCounts[i] = index.sense_cnt; } if (wordinfo.senseCounts[i] > maxCount) { maxCount = wordinfo.senseCounts[i]; wordinfo.partOfSpeech = pos; } } return(wordinfo); }