public static int GetSynsetIndex(string word, PartsOfSpeech pos) { word = word.ToLower(); //word=RemoveBadChars (word); Index index = Wnlib.Index.lookup(word, PartOfSpeech.of(pos)); if (index == null) { MorphStr morphs = new MorphStr(word, PartOfSpeech.of(pos)); string morph = ""; while ((morph = morphs.next()) != null) { index = Index.lookup(morph, PartOfSpeech.of(pos)); if (index != null) { break; } } } if (index == null) { return(-1); } else { return(0); } }
public void OverviewFor(string t, string p, ref bool b, ref SearchSet obj, ArrayList list) { PartOfSpeech pos = PartOfSpeech.of(p); SearchSet ss = WNDB.is_defined(t, pos); MorphStr ms = new MorphStr(t, pos); bool checkmorphs = false; checkmorphs = AddSearchFor(t, pos, list); // do a search if (checkmorphs) { HasMatch = true; } if (!HasMatch) { // loop through morphs (if there are any) string m; while ((m = ms.next()) != null) { if (m != t) { ss = ss + WNDB.is_defined(m, pos); AddSearchFor(m, pos, list); } } } b = ss.NonEmpty; obj = ss; }
public MyWordInfo[] FindSynonyms(ref MyWordInfo pos, bool includeMorphs) { pos.Word = pos.Word.ToLower(); Index index = Index.lookup(pos.Word, PartOfSpeech.of(pos.Pos)); if (index == null) { if (!includeMorphs) { return(null); } var morphs = new MorphStr(pos.Word, PartOfSpeech.of(pos.Pos)); string morph = ""; while ((morph = morphs.next()) != null) { index = Index.lookup(morph, PartOfSpeech.of(pos.Pos)); pos.Word = morph; if (index != null) { break; } } } return(index == null ? null : LookupCandidates(index, pos)); }
public static int GetType(string strSense) { bool blnIsHuman = false; Search se; SearchType sty; PartOfSpeech pos = null; if (strSense == "") { return(0); } string[] strarr = strSense.Split('#'); if (strarr.Length > 1) { switch (strarr[1]) { case "n": pos = PartOfSpeech.of(PartsOfSpeech.Noun); break; default: // there is an error break; } } sty = new SearchType(true, "HYPERPTR"); //se = new Search(strarr[0], true, pos, sty, int.Parse( strarr[2])); se = new Search(strarr[0], true, pos, sty, int.Parse(strarr[2])); SynSetList slist = se.senses; if (slist.Count > 0) { blnIsHuman = IsHumanHelper(slist); if (blnIsHuman) { return(1); } else { return(2); } } else { return(0); } }
public static IEnumerable<string> GetMorphs(string str, PartOfSpeech pos, WordNetContext context) { if (pos == null) { var poses = (new string[] { "n", "v", "a" }).Select(s => PartOfSpeech.of(s)); foreach(var p in poses) { foreach (var s in GetMorphs(str, p, context)) { yield return s; } } yield break; } if (pos.clss == "SATELLITE") pos = PartOfSpeech.of("adj"); var parts = str.Split(' '); int cnt = parts.Length; string tmp = null; /* first try exception list */ var e = GetExceptions(str, pos, context); if (e.Any()) { foreach(var s in e) yield return s; } else if (pos.name == "verb" && cnt > 1 && HasPreposition(parts)) { yield return MorphPreposition(str, parts, pos, context); yield break; } else /* then try simply morph on original string */ if (pos.name != "verb" && MorphWord(str, pos, morph => tmp = morph, context)) { yield return tmp; } else { bool isChanged = false; for (int i = 0; i < parts.Length; i++) { string word = parts[i]; if (word.Contains('-')) { bool isSubChanged = false; var subs = word.Split('-'); for (int j = 0; j < subs.Length; j++) { isSubChanged |= MorphWord(subs[j], pos, morph => subs[j] = morph, context); } if (isSubChanged) { parts[i] = string.Join("-", subs); isChanged = true; } } else { isChanged |= MorphWord(word, pos, morph => parts[i] = morph, context); } } if (isChanged) { var s = string.Join(" ", parts); if (IsDefinded(s, pos, context)) yield return s; } } }
private static string MorphPreposition(string str, string[] parts, PartOfSpeech pos, WordNetContext context) { string retval; /* Assume that the verb is the first word in the phrase. Strip it off, check for validity, then try various morphs with the rest of the phrase tacked on, trying to find a match. */ string rest = str.Substring(parts.First().Length); string end = null; if (parts.Length > 2) { // more than 2 words MorphWord(parts.Last(), pos, morph => end = rest.Substring(0, parts.Last().Length) + morph, context); } string word = parts.First(); if (!word.All(c => char.IsLetterOrDigit(c))) return null; /* First try to find the verb in the exception list */ var e = GetExceptions(word, PartOfSpeech.of("verb"), context); foreach (string excWord in e) { retval = excWord + rest; if (IsDefinded(retval, PartOfSpeech.of("verb"), context)) return retval; if (end != null) { retval = excWord + end; if (IsDefinded(retval, PartOfSpeech.of("verb"), context)) return retval; } } var psufx = sufx[PartOfSpeech.of("verb").ident]; for (int i = 0; i < psufx.Length; i++) { string suffix = psufx[i]; if (word.EndsWith(suffix)) // ending is different { string excWord = word.Substring(0, word.Length - suffix.Length) + addr[PartOfSpeech.of("verb").ident][i]; retval = excWord + rest; if (IsDefinded(retval, PartOfSpeech.of("verb"), context)) return retval; if (end != null) { retval = excWord + end; if (IsDefinded(retval, PartOfSpeech.of("verb"), context)) return retval; } } } if (end != null) { return word + end; } return null; }
private void FillSenses(ParseTree parsetree, ParseNode node, ref ArrayList wordinfoArr, ref int j) { if (node.Children != null) { for (int i = 0; i < node.Children.Count; i++) { ParseNode pn = (ParseNode)node.Children[i]; FillSenses(parsetree, pn, ref wordinfoArr, ref j); } } else { SentenceParser dummysp = new SentenceParser(); string str = node.Goal; if (str == "PPN") { WordSenseDisambiguator wsd = new WordSenseDisambiguator(); double score; double num; if (double.TryParse(node.Text, out num)) { MyWordInfo[] ret = wsd.MMG_Disambiguate(new MyWordInfo[] { new MyWordInfo("Number", PartsOfSpeech.Noun) }, out score); PartOfSpeech pos = PartOfSpeech.of(PartsOfSpeech.Noun); Index index = Wnlib.Index.lookup(ret[0].Word.ToLower(), pos); SynSet sense = new SynSet(index, ret[0].Sense, null); node.Sense = sense.defn; node.SenseNo = ret[0].Sense; } else { MyWordInfo[] ret = wsd.MMG_Disambiguate(new MyWordInfo[] { new MyWordInfo("proper_name", PartsOfSpeech.Noun) }, out score); PartOfSpeech pos = PartOfSpeech.of(PartsOfSpeech.Noun); Index index = Wnlib.Index.lookup(ret[0].Word.ToLower(), pos); SynSet sense = new SynSet(index, ret[0].Sense, null); node.Sense = sense.defn; node.SenseNo = ret[0].Sense; } } else if (str == "N" || str.Contains("NPP") || (str == "VING") || str.Contains("PPJ") || (str == "VPSP") || (str == "BE1") || (str == "BE2") || (str == "V") || str.Contains("CPADJ") || str.Contains("ADJ") || str.Contains("PADV") || str.Contains("ADV") || str == "VINF") { ///ta3deelat 5/7///////// string [] spltstr = DisambRes[j].ToString().Split(':'); VerbSense VS = new VerbSense(); string[] spltstr2 = new string[10]; string[] spltstr3 = new string[10]; if (node.Senses != null && node.Goal.Contains("V")) { VS = (VerbSense)node.Senses[0]; spltstr2 = VS.Sense.ToString().Split('#'); spltstr3 = spltstr[0].Split(' '); ArrayList results = dummysp.GetINFOfVerb(spltstr2[0]); if (results.Count > 0) { spltstr2[0] = (string)results[0]; } if (spltstr3[0] == spltstr2[0]) { if (NodesSenses.Count > 0) { node.Sense = (string)NodesSenses[0]; node.SenseNo = (int)SensesNos[0]; SensesNos.RemoveAt(0); NodesSenses.RemoveAt(0); j++; } } } if (str == "N") { node.Sense = (string)NodesSenses[0]; node.SenseNo = (int)SensesNos[0]; SensesNos.RemoveAt(0); NodesSenses.RemoveAt(0); j++; } else { string NodeWord = SyntacticAnalyzer.SentenceParser.GetWordString(parsetree, node); spltstr3 = spltstr[0].Split(' '); string nodeWord = NodeWord.ToLower(); if (node.Goal == "V" || node.Goal == "BE1" || node.Goal == "VINF" || node.Goal == "VPSP" || node.Goal == "VING") { ArrayList results = dummysp.GetINFOfVerb(nodeWord); if (results.Count > 0) { nodeWord = (string)results[0]; } } string dummy = spltstr3[0]; dummy = dummy.Remove(dummy.Length - 1); dummy = dummy + "ies"; string dummy2 = nodeWord + "ing"; string dummy3 = dummy2 + "s"; string dummy4 = nodeWord.Remove(nodeWord.Length - 1); dummy4 = dummy4 + "ing"; if (spltstr3[0] == nodeWord || spltstr3[0] + 's' == nodeWord || spltstr3[0] + "es" == nodeWord || dummy == nodeWord || spltstr3[0] == dummy2 || spltstr3[0] == dummy3 || spltstr3[0] == dummy4 || spltstr3[0] == dummy4 + 's') { if (NodesSenses.Count > 0) { node.Sense = (string)NodesSenses[0]; node.SenseNo = (int)SensesNos[0]; SensesNos.RemoveAt(0); NodesSenses.RemoveAt(0); j++; } } } } } }
public void ConstructMapping() { string concept = ""; string word = ""; int ID = -1; string senseNo = ""; string Sense = ""; string Pos = ""; WordOlogy WO = new WordOlogy(); ArrayList wordologyArr = new ArrayList(); int conceptcounter = 0; LoadOntology(); FileStream allConceptsFile = new FileStream(_ontologyDirectoryPath + @"\AllConcepts.txt", FileMode.Open); StreamReader allConceptsFileReader = new StreamReader(allConceptsFile); string _wordologyDirectoryPath = @"..\..\..\wordology\"; BinaryFormatter bf = new BinaryFormatter(); FileStream fs = new FileStream( _wordologyDirectoryPath + "\\wordology.txt", FileMode.Create); int indxWatcherconceptCounter = 0; int NoMapLexConcepts = 0; int CannotGetSenseExeption = 0; int AllSensesMapped = 0; while ((concept = allConceptsFileReader.ReadLine()) != null) { indxWatcherconceptCounter++; string Conceptpath = _ontologyDirectoryPath + @"\" + concept[0] + @"\" + concept; Concept C = (Concept)Onto[concept]; Property maplexProperty = C.FullProperties["ENGLISH1"]; List <MyWordInfo> maplexsenses = new List <MyWordInfo>(); MyWordInfo mwi = new MyWordInfo(); int NoOfSensesSucceeded = 0; if (maplexProperty != null) { for (int i = 0; i < maplexProperty.Fillers.Count; i++) { string tmp = maplexProperty.Fillers[i].ScalarFiller; char[] charr = new char[] { '-', '_' }; string[] splt = tmp.Split(charr); //there r fillers with no type & a-bomb masalan if (splt.Length > 1) { mwi = new MyWordInfo(); for (int k = 0; k < splt.Length - 2; k++) { mwi.Word += splt[k] + " "; } mwi.Word += splt[splt.Length - 2]; if (splt[splt.Length - 1].Length == 2) { if (splt[splt.Length - 1][0] == 'v') { mwi.Pos = Wnlib.PartsOfSpeech.Verb; } else if (splt[splt.Length - 1][0] == 'n') { mwi.Pos = Wnlib.PartsOfSpeech.Noun; } else if (splt[splt.Length - 1][0] == 'a') { mwi.Pos = Wnlib.PartsOfSpeech.Adj; } else if (splt[splt.Length - 1][0] == 'r') { mwi.Pos = Wnlib.PartsOfSpeech.Adv; } else { mwi.Pos = Wnlib.PartsOfSpeech.Unknown; } } else { mwi.Pos = Wnlib.PartsOfSpeech.Unknown; mwi.Word += " " + splt[splt.Length - 1]; } if (i == 0 || (maplexsenses.Count > 0 && (mwi.Word != maplexsenses[maplexsenses.Count - 1].Word || mwi.Pos != maplexsenses[maplexsenses.Count - 1].Pos))) { maplexsenses.Add(mwi); } } //ne loop 3al ontology kolaha } if (maplexsenses.Count > 0) { MyWordInfo[] maplexArray = new MyWordInfo[maplexsenses.Count]; for (int j = 0; j < maplexsenses.Count; j++) { maplexArray[j] = maplexsenses[j]; } WordSenseDisambiguator wsd = new WordSenseDisambiguator(); MyWordInfo[] res = new MyWordInfo[maplexArray.Length]; res = wsd.Disambiguate(maplexArray); int i = 0; foreach (MyWordInfo wi in res) { string tmp = maplexProperty.Fillers[i].ScalarFiller; char[] charr = new char[] { '-', '_' }; string[] splt = tmp.Split(charr); if (splt.Length > 1 && splt[splt.Length - 1].Length == 2) { WO.SenseNo = splt[splt.Length - 1]; } else { // "sense doesn't have POS"; } Wnlib.PartOfSpeech p = Wnlib.PartOfSpeech.of((Wnlib.PartsOfSpeech)wi.Pos); try { Wnlib.Index index = Wnlib.Index.lookup(wi.Word.ToLower(), p); SynSet sense = new SynSet(index, res[i].Sense, null); WO.Sense = sense.defn; AllSensesMapped++; NoOfSensesSucceeded++; try { WO.Pos = p.name; } catch { WO.Pos = wi.Pos.ToString(); } ID++; WO.Word = wi.Word; WO.ID = ID; WO.Concept = concept; WO.Word = word; } catch { }; if (NoOfSensesSucceeded == 0) { CannotGetSenseExeption++; } i++; // bf.Serialize(fs, "\n" + WO); wordologyArr.Add(WO); } conceptcounter++; } } else { NoMapLexConcepts++; //new part Wnlib.Index index; Wnlib.PartOfSpeech p; Search se; try { index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Noun)); if (index != null) { WO.Pos = "noun"; Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun); foreach (Opt o in relatedness) { for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++) { se = new Search(concept, true, PartOfSpeech.of("noun"), o.sch, senseNumber); SynSet sense = new SynSet(index, senseNumber, se); WO.Concept = concept; WO.Word = concept; WO.Sense = sense.defn; WO.ID = ID; ID++; NoOfSensesSucceeded++; AllSensesMapped++; //bf.Serialize(fs, "\n" + WO); wordologyArr.Add(WO); } } } } catch { } try { index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Verb)); if (index != null) { WO.Pos = "verb"; Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Verb); foreach (Opt o in relatedness) { for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++) { se = new Search(concept, true, PartOfSpeech.of("verb"), o.sch, senseNumber); SynSet sense = new SynSet(index, senseNumber, se); WO.Sense = sense.defn; WO.Concept = concept; WO.Word = concept; WO.ID = ID; ID++; NoOfSensesSucceeded++; AllSensesMapped++; //bf.Serialize(fs, "\n" + WO); wordologyArr.Add(WO); } } } } catch { } try { index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adj)); if (index != null) { WO.Pos = "adj"; Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Adj); foreach (Opt o in relatedness) { for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++) { se = new Search(concept, true, PartOfSpeech.of("adj"), o.sch, senseNumber); SynSet sense = new SynSet(index, senseNumber, se); WO.Sense = sense.defn; WO.Concept = concept; WO.Word = concept; WO.ID = ID; ID++; NoOfSensesSucceeded++; AllSensesMapped++; //bf.Serialize(fs, "\n" + WO); wordologyArr.Add(WO); } } } } catch { } try { index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adv)); if (index != null) { WO.Pos = "adv"; Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun); foreach (Opt o in relatedness) { for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++) { se = new Search(concept, true, PartOfSpeech.of("adv"), o.sch, senseNumber); SynSet sense = new SynSet(index, senseNumber, se); WO.Sense = sense.defn; WO.Concept = concept; WO.Word = concept; WO.ID = ID; ID++; NoOfSensesSucceeded++; AllSensesMapped++; //bf.Serialize(fs, "\n" + WO); wordologyArr.Add(WO); } } } } catch { } if (NoOfSensesSucceeded != 0) { conceptcounter++; } } }//end while allConceptsFileReader.Close(); allConceptsFile.Close(); bf.Serialize(fs, wordologyArr); fs.Close(); MessageBox.Show("no map-lex concepts number = " + NoMapLexConcepts.ToString()); MessageBox.Show("can't getsense pos number = " + CannotGetSenseExeption.ToString()); MessageBox.Show(conceptcounter.ToString()); }
/// <summary> /// Convert method creates db file from wndb files /// dictpath - path to wndb data files /// context - dest db context /// </summary> public static void Convert(string dictPack, string jsonFile) { WNDB wndb = new WNDB(dictPack); var poses = (new[] { "n", "v", "a", "r" }).Select(s => PartOfSpeech.of(s)); // Convert to Dictionary // lemma -> { SynSetGroup: PosSymbol, Synsets = { synset: synonims, definitions, examples } } var dict = new Dictionary <string, List <ExpSynSetGroup> >(); foreach (var pos in poses) { Console.WriteLine("Process Data of {0}", pos.name); foreach (var data in wndb.GetData(pos)) { //data.adj includes 'a' & 's' pos symbols char posSymbol = pos.symbol.First(); bool singleWord = false; if (data.origWords.Count() == 1) { var w = data.origWords.First().word; singleWord = w == w.ToLower(); } var synSet = new SynSet { // Skip synonims if where is a single lowercase word Synonims = (singleWord) ? null : data.origWords.Select(ow => ow.word).ToArray(), Definition = (data.definitions.Count() == 1) ? data.definitions.First() : null, Definitions = (data.definitions.Count() > 1) ? data.definitions : null, Example = (data.examples?.Count() == 1) ? data.examples.First() : null, Examples = (data.examples?.Count() > 1) ? data.examples : null }; foreach (var lemma in data.origWords.Select(ow => ow.word.ToLower())) { var synGrps = dict.GetValue(lemma); if (synGrps != null) { var grp = synGrps.FirstOrDefault(g => g.PosSymbol == posSymbol); if (grp == null) { synGrps.Add(new ExpSynSetGroup(posSymbol, synSet)); } else { grp.Synsets.Add(synSet); } } else { dict.Add(lemma, new List <ExpSynSetGroup> { new ExpSynSetGroup(posSymbol, synSet) }); } } } } // exceptions //TODO: remove morphes, ... var excepts = new Dictionary <string, List <DictException> >(); foreach (var pos in poses) { Console.WriteLine("Process Exceptions of {0}", pos.name); foreach (var exwords in wndb.GetExceptions(pos)) { var morph = Morph.GetBasicForm(exwords[0], pos); for (int i = 1; i < exwords.Length; i++) { var baseForm = exwords[i]; if (baseForm == exwords[0] || baseForm == morph) { //Console.WriteLine($"Skip: {(exwords[0])} -> {baseForm}/{morph}"); continue; } List <ExpSynSetGroup> synGrps = dict.GetValue(baseForm); if (synGrps == null && baseForm.Contains('-')) { baseForm = baseForm.Replace('-', ' '); dict.TryGetValue(baseForm, out synGrps); } if (synGrps != null) { var posSymbols = string.Join("", synGrps.Select(sg => sg.PosSymbol)); var except = new DictException { BasicForm = baseForm, PosSymbols = posSymbols }; List <DictException> baseForms; if (excepts.TryGetValue(exwords[0], out baseForms)) { if (!baseForms.Any(e => e.BasicForm == baseForm)) { baseForms.Add(except); } } else { excepts.Add(exwords[0], new List <DictException> { except }); } } } } } Console.WriteLine("Save changes"); var storage = new ExpDictStorage { SynSets = dict, Exceptions = excepts }; var serializer = new JsonSerializer(); serializer.NullValueHandling = NullValueHandling.Ignore; using (var stream = File.Open(jsonFile, FileMode.Create)) using (var writer = new BsonWriter(stream)) { serializer.Serialize(writer, storage); } }
private void MapConceptsWithOutMapLex(string concept) { Wnlib.Index index; Wnlib.PartOfSpeech p; Search se; WordOlogy WO = new WordOlogy(); int NoOfSensesSucceeded = 0; try { index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Noun)); if (index != null) { WO.Pos = "noun"; Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun); foreach (Opt o in relatedness) { for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++) { se = new Search(concept, true, PartOfSpeech.of("noun"), o.sch, senseNumber); SynSet sense = new SynSet(index, senseNumber, se); WO.Concept = concept; WO.Word = concept; WO.Sense = sense.defn; WO.ID = ID; ID++; NoOfSensesSucceeded++; //AllSensesMapped++; //bf.Serialize(fs, "\n" + WO); wordologyArr.Add(WO); } } } } catch { } try { index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Verb)); if (index != null) { WO.Pos = "verb"; Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Verb); foreach (Opt o in relatedness) { for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++) { se = new Search(concept, true, PartOfSpeech.of("verb"), o.sch, senseNumber); SynSet sense = new SynSet(index, senseNumber, se); WO.Sense = sense.defn; WO.Concept = concept; WO.Word = concept; WO.ID = ID; ID++; NoOfSensesSucceeded++; // AllSensesMapped++; //bf.Serialize(fs, "\n" + WO); wordologyArr.Add(WO); } } } } catch { } try { index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adj)); if (index != null) { WO.Pos = "adj"; Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Adj); foreach (Opt o in relatedness) { for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++) { se = new Search(concept, true, PartOfSpeech.of("adj"), o.sch, senseNumber); SynSet sense = new SynSet(index, senseNumber, se); WO.Sense = sense.defn; WO.Concept = concept; WO.Word = concept; WO.ID = ID; ID++; NoOfSensesSucceeded++; //AllSensesMapped++; //bf.Serialize(fs, "\n" + WO); wordologyArr.Add(WO); } } } } catch { } try { index = Wnlib.Index.lookup(concept.ToLower(), PartOfSpeech.of(PartsOfSpeech.Adv)); if (index != null) { WO.Pos = "adv"; Opt[] relatedness = WordsMatching.Relatedness.GetRelatedness(PartsOfSpeech.Noun); foreach (Opt o in relatedness) { for (int senseNumber = 0; senseNumber < index.sense_cnt; senseNumber++) { se = new Search(concept, true, PartOfSpeech.of("adv"), o.sch, senseNumber); SynSet sense = new SynSet(index, senseNumber, se); WO.Sense = sense.defn; WO.Concept = concept; WO.Word = concept; WO.ID = ID; ID++; NoOfSensesSucceeded++; //AllSensesMapped++; //bf.Serialize(fs, "\n" + WO); wordologyArr.Add(WO); } } } } catch { } if (NoOfSensesSucceeded != 0) { conceptcounter++; } }
/// <summary> /// Convert method creates db file from wndb files /// dictpath - path to wndb data files /// context - dest db context /// </summary> public static void Convert(string dictPack, WordNetContext context) { WNDB wndb = new WNDB(dictPack); var synWords = new List <string>(); // int ind; var wordToLemma = new Dictionary <string, Lemma>(); var words = new Dictionary <string, Writing>(); var poses = (new [] { "n", "v", "a", "r" }).Select(s => PartOfSpeech.of(s)); foreach (var pos in poses) { Console.WriteLine("Process Data of {0}", pos.name); // ind = 0; foreach (var data in wndb.GetData(pos)) { if (data.pos != pos.symbol && !(data.pos == "s" && pos.symbol == "a")) //data.adj includes 'a' & 's' pos symbols { throw new Exception("pos!=data.pos"); } var synset = new SynSet { Pos = data.pos }; context.SynSets.Add(synset); synWords.Clear(); foreach (var oword in data.origWords) { Lemma lemma; string lcWord = oword.word.ToLower(); // add lemma if (!wordToLemma.TryGetValue(lcWord, out lemma)) { lemma = new Lemma { Value = lcWord, Poses = data.pos }; wordToLemma.Add(lcWord, lemma); context.Lemmas.Add(lemma); } else if (!lemma.Poses.Contains(data.pos)) { lemma.Poses += data.pos; } if (synWords.IndexOf(lcWord) < 0) { synWords.Add(lcWord); // add SynSet <-> Lemma relation context.SynsetLemmas.Add(new SynsetLemma { SynSet = synset, Lemma = lemma }); } // add original word if it differs from lemma Writing word; if (lcWord != oword.word) { if (!words.TryGetValue(oword.word, out word)) { word = new Writing { Value = oword.word, Lemma = lemma }; words.Add(oword.word, word); context.Writings.Add(word); } else if (word.Lemma != lemma) { Console.WriteLine("Word mix: {0} {1} {2}", oword.word, lemma.Value, word.Lemma.Value); continue; } } } synset.Definition = string.Join(";", data.definitions); synset.Example = string.Join(";", data.examples); // ind++; // if (ind % 1000 == 0) // ShowProgress(ind.ToString()); } Console.WriteLine("Save changes"); context.SaveChanges(); // exceptions //TODO: remove morphes, ... Console.WriteLine("Process Exceptions of {0}", pos.name); // ind = 0; foreach (var exwords in GetExceptions(wndb, pos)) { for (int i = 1; i < exwords.Length; i++) { if (exwords[i] == exwords[0]) { continue; } Lemma lemma; if (wordToLemma.TryGetValue(exwords[i], out lemma) || (exwords[i].Contains('-') && wordToLemma.TryGetValue(exwords[i].Replace('-', ' '), out lemma))) { context.Excepts.Add(new Except { Value = exwords[0], MainForm = exwords[i], Lemma = lemma }); } // else // { // Console.WriteLine("Lemma not found {0}", exwords[i]); // context.Excepts.Add(new Except { Value = exwords[0], MainForm = exwords[i] }); // } } // ind++; // if (ind % 1000 == 0) // ShowProgress(ind.ToString()); } Console.WriteLine("Save changes"); context.SaveChanges(); } //Console.WriteLine("Save changes"); context.SaveChanges(); }
public IEnumerable <string> GetMorphs(string str, PartOfSpeech pos) { if (pos.clss == "SATELLITE") { pos = PartOfSpeech.of("adj"); } var parts = str.Split(' '); int cnt = parts.Length; string tmp = null; /* first try exception list */ var e = _storage.GetBasicForms(str, pos); if (e.Any()) { foreach (var s in e) { yield return(s); } } else if (pos.name == "verb" && cnt > 1 && HasPreposition(parts)) { yield return(MorphPreposition(str, parts, pos)); yield break; } else /* then try simply morph on original string */ if (pos.name != "verb" && MorphWord(str, pos, morph => tmp = morph)) { yield return(tmp); } else { bool isChanged = false; for (int i = 0; i < parts.Length; i++) { string word = parts[i]; if (word.Contains('-')) { bool isSubChanged = false; var subs = word.Split('-'); for (int j = 0; j < subs.Length; j++) { isSubChanged |= MorphWord(subs[j], pos, morph => subs[j] = morph); } if (isSubChanged) { parts[i] = string.Join("-", subs); isChanged = true; } } else { isChanged |= MorphWord(word, pos, morph => parts[i] = morph); } } if (isChanged) { var s = string.Join(" ", parts); if (_storage.IsDefinded(s, pos)) { yield return(s); } } } }