/* find the example sentence references in the example sentence index file */ bool findExample() { bool retval = false; StreamReader fp = new StreamReader(WNDB.path + "SENTIDX.VRB"); Byte[] b = System.Text.Encoding.Unicode.GetBytes(fp.ReadToEnd()); MemoryStream ms = new MemoryStream(b); //StreamReader fp = new StreamReader(mms); int wdnum = whichword - 1; Lexeme lx = words[wdnum]; string tbuf = lx.word + "%" + pos.ident + ":" + fnum + ":" + lx.uniq + "::"; string str = WNDB.binSearch(tbuf, ms); //fp); if (str != null) { str = str.Substring(lx.word.Length + 11); StrTok st = new StrTok(str, ' ', ',', '\n'); string offset; while ((offset = st.next()) != null) { getExample(offset, lx.word); retval = true; } } fp.Close(); return(retval); }
} // for serialization public SynSet(int off, PartOfSpeech p, string wd, Search sch, int sens) { pos = p; hereiam = off; search = sch; sense = sens; StreamReader f = WNDB.data(p); f.DiscardBufferedData(); //f.BaseStream.Seek(off,SeekOrigin.Begin); f.BaseStream.Position = off; string rec = f.ReadLine(); //string str; if (!rec.StartsWith(off.ToString("D8"))) { Console.WriteLine("Error reading " + p.name + " file! " + off + ": " + rec); WNDB.reopen(p); f = WNDB.data(p); f.DiscardBufferedData(); // f.BaseStream.Seek(off,SeekOrigin.Begin); f.BaseStream.Position = off; rec = f.ReadLine(); //if (!rec.StartsWith(off.ToString("D8"))) // str = ""; //else // Console.WriteLine("Recovered..."); } Parse(rec, pos, wd); }
string morphword(string word) { string end = ""; string tmpbuf = ""; if (word == null) { return(null); } Exceptions e = new Exceptions(word, pos); string tmp = e.next(); if (tmp != null) { return(tmp); } if (pos.name == "adverb") { return(null); } if (pos.name == "noun") { if (word.EndsWith("ful")) { tmpbuf = word.Substring(0, word.Length - 3); end = "ful"; } else if (word.EndsWith("ss") || word.Length <= 2) { return(null); } } if (tmpbuf.Length == 0) { tmpbuf = word; } int offset = offsets[pos.ident]; int cnt = cnts[pos.ident]; for (int i = 0; i < cnt; i++) { if (tmpbuf.EndsWith(sufx[i + offset])) { // TDMS 11 Oct 2005 - bug fix - "word" substituted with "tmpbuf" as per // wordnet code morph.c //string retval = word.Substring(0,word.Length-sufx[i+offset].Length)+addr[i+offset]; string retval = tmpbuf.Substring(0, tmpbuf.Length - sufx[i + offset].Length) + addr[i + offset]; if (WNDB.is_defined(retval, pos).NonEmpty) { return(retval + end); } } } return(null); }
static Exceptions() { excfps = new StreamReader[PartOfSpeech.parts.Count]; IDictionaryEnumerator d = PartOfSpeech.parts.GetEnumerator(); while (d.MoveNext()) { PartOfSpeech p = (PartOfSpeech)(d.Value); excfps[p.ident] = new StreamReader(WNDB.ExcFile(p)); } }
void getExample(string off, string wd) { StreamReader fp = new StreamReader(WNDB.path + "SENTS.VRB"); Byte[] b = System.Text.Encoding.Unicode.GetBytes(fp.ReadToEnd()); MemoryStream ms = new MemoryStream(b); string line = WNDB.binSearch(off, ms); //fp); line = line.Substring(line.IndexOf(' ') + 1); search.buf += " EX: " + line.Replace("%s", wd); fp.Close(); isDirty = true; // TDMS 19 July 2006 - attempt to tie the logic which // populates buf to the logic that defines whether the // synset is populated with relevant information }
public Exceptions(string word, PartOfSpeech pos) { if (laststr != ((System.IO.FileStream)(excfps[pos.ident].BaseStream)).Name) { laststr = ((System.IO.FileStream)(excfps[pos.ident].BaseStream)).Name; StreamReader fs = excfps[pos.ident]; Byte[] b = System.Text.Encoding.Unicode.GetBytes(fs.ReadToEnd()); excfps[pos.ident].BaseStream.Seek(0, SeekOrigin.Begin); exc = new MemoryStream(b); } line = WNDB.binSearch(word, exc); //excfps[pos.ident]); if (line != null) { endlp = line.IndexOf(' '); } }
public WNDBpart(PartOfSpeech p) { try { Console.WriteLine("WNDBpart"); index = new StreamReader(WNDB.IndexFile(p)); data = new StreamReader(WNDB.DataFile(p)); fps[p] = this; } catch { MessageBox.Show("Bad dict path"); // TODO: handle bad dict path // ignore errors - as the user is locating the dictionary location // wordnet classes are trying to instantiate based on an incorrect dict path } }
/* From search.c: * Find word in index file and return parsed entry in data structure. * Input word must be exact match of string in database. */ // From the WordNet Manual (http://wordnet.princeton.edu/man/wnsearch.3WN.html) // index_lookup() finds searchstr in the index file for pos and returns a pointer // to the parsed entry in an Index data structure. searchstr must exactly match the // form of the word (lower case only, hyphens and underscores in the same places) in // the index file. NULL is returned if a match is not found. public static Index lookup(string word, PartOfSpeech pos) { int j; if (word == "") { return(null); } // TDMS 14 Aug 2005 - changed to allow for numbers as well // because the database contains searches that can start with // numerals //if (!char.IsLetter(word[0])) if (!char.IsLetter(word[0]) && !char.IsNumber(word[0])) { return(null); } string line = WNDB.binSearch(word, pos); if (line == null) { return(null); } Index idx = new Index(); StrTok st = new StrTok(line); idx.wd = st.next(); /* the word */ idx.pos = PartOfSpeech.of(st.next()); /* the part of speech */ idx.sense_cnt = int.Parse(st.next()); /* collins count */ int ptruse_cnt = int.Parse(st.next()); /* number of pointers types */ idx.ptruse = new PointerType[ptruse_cnt]; for (j = 0; j < ptruse_cnt; j++) { idx.ptruse[j] = PointerType.of(st.next()); } int off_cnt = int.Parse(st.next()); idx.offs = new int[off_cnt]; idx.tagsense_cnt = int.Parse(st.next()); for (j = 0; j < off_cnt; j++) { idx.offs[j] = int.Parse(st.next()); } return(idx); }
// From the WordNet Manual (http://wordnet.princeton.edu/man/wnsearch.3WN.html) // findtheinfo() is the primary search algorithm for use with database interface // applications. Search results are automatically formatted, and a pointer to the // text buffer is returned. All searches listed in WNHOME/include/wnconsts.h can be // done by findtheinfo(). void findtheinfo() { SynSet cursyn = null; Indexes ixs = new Indexes(word, pos); Index idx = null; int depth = sch.rec ? 1 : 0; senses = new SynSetList(); switch (sch.ptp.mnemonic) { case "OVERVIEW": WNOverview(); break; case "FREQ": if (countSenses == null) { countSenses = new ArrayList(); } while ((idx = ixs.next()) != null) { countSenses.Add(idx.offs.Length); buf += "Sense " + countSenses.Count + ": " + idx.offs.Length; } break; case "WNGREP": strings = WNDB.wngrep(word, pos); for (int wi = 0; wi < strings.Count; wi++) { buf += (string)strings[wi] + "\n"; } break; case "VERBGROUP": goto case "RELATIVES"; case "RELATIVES": while ((idx = ixs.next()) != null) { relatives(idx); } break; default: /* look at all spellings of word */ while ((idx = ixs.next()) != null) { /* Print extra sense msgs if looking at all senses */ if (whichsense == ALLSENSES) { buf += "\n"; } /* Go through all of the searchword's senses in the * database and perform the search requested. */ for (int sense = 0; sense < idx.offs.Length; sense++) { if (whichsense == ALLSENSES || whichsense == sense + 1) { prflag = false; /* Determine if this synset has already been done * with a different spelling. If so, skip it. */ for (int j = 0; j < senses.Count; j++) { SynSet ss = (SynSet)senses[j]; if (ss.hereiam == idx.offs[sense]) { goto skipit; } } cursyn = new SynSet(idx, sense, this); //TODO: moved senses.add(cursyn) from here to each case and handled it differently according to search - this handling needs to be verified to ensure the filter is not to limiting switch (sch.ptp.mnemonic) { case "ANTPTR": if (pos.name == "adj") { cursyn.traceAdjAnt(); } else { cursyn.tracePtrs(sch.ptp, pos, depth); } if (cursyn.isDirty) { // TDMS 25 Oct 2005 - restrict to relevant values cursyn.frames.Clear(); // TDMS 03 Jul 2006 - frames get added in wordnet.cs after filtering senses.Add(cursyn); } /* * if (cursyn.senses != null ) * if (cursyn.senses.isDirty) * { // TDMS 25 Oct 2005 - restrict to relevant values * cursyn.frames.Clear(); // TDMS 03 Jul 2006 - frames get added in wordnet.cs after filtering * senses.Add(cursyn); * } */ // perform the senses restrictions based upon pos /* * switch(pos.name) { * case "verb": * if (cursyn.senses != null) // TDMS 25 Oct 2005 - restrict to relevant values * senses.Add(cursyn); * break; * * default: * if (cursyn.senses != null && cursyn.sense != 0) // TDMS 25 Oct 2005 - restrict to relevant values * senses.Add(cursyn); * break; * } */ break; case "COORDS": //eg. search for 'car', select Noun -> 'Coordinate Terms' cursyn.traceCoords(PointerType.of("HYPOPTR"), pos, depth); if (cursyn.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values { senses.Add(cursyn); } /* * if (cursyn.senses != null ) * if (cursyn.senses.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values * senses.Add(cursyn); */ break; case "FRAMES": //eg. search for 'right', select Verb -> 'Sample Sentences' cursyn.strFrame(true); // TDMS 03 JUL 2006 fixed relevancy check if (cursyn.sense != 0) // TDMS 25 Oct 2005 - restrict to relevant values if (cursyn.isDirty) { senses.Add(cursyn); } /* * if (cursyn.frames.Count != 0) // TDMS 03 Jul 2006 - only add frame if there are any retrieved * senses.Add(cursyn); */ break; case "MERONYM": //eg. search for 'car', select Noun -> 'Meronym' senses.isDirty = false; cursyn.tracePtrs(PointerType.of("HASMEMBERPTR"), pos, depth); cursyn.tracePtrs(PointerType.of("HASSTUFFPTR"), pos, depth); cursyn.tracePtrs(PointerType.of("HASPARTPTR"), pos, depth); if (cursyn.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values { senses.Add(cursyn); } /* * if (cursyn.senses != null ) * if (cursyn.senses.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values * senses.Add(cursyn); */ break; case "HOLONYM": //eg. search for 'car', select Noun -> 'Holonyms' cursyn.tracePtrs(PointerType.of("ISMEMBERPTR"), pos, depth); cursyn.tracePtrs(PointerType.of("ISSTUFFPTR"), pos, depth); cursyn.tracePtrs(PointerType.of("ISPARTPTR"), pos, depth); // if (cursyn.senses != null && cursyn.sense != 0) // TDMS 25 Oct 2005 - restrict to relevant values // senses.Add(cursyn); if (cursyn.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values { senses.Add(cursyn); } /* * if (cursyn.senses != null ) * if (cursyn.senses.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values * senses.Add(cursyn); */ break; case "HMERONYM": //eg. search for 'car', select Noun -> 'Meronyms Tree' cursyn.partsAll(sch.ptp); if (cursyn.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values { senses.Add(cursyn); } // senses.Add(SearchTrack.ssParent); /* * if (cursyn.senses != null ) * if (cursyn.senses.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values * senses.Add(cursyn); */ // if (cursyn.sense != 0) // TDMS 25 Oct 2005 - restrict to relevant values // senses.Add(cursyn); break; case "HHOLONYM": cursyn.partsAll(sch.ptp); if (cursyn.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values { senses.Add(cursyn); } /* * if (cursyn.senses != null) // && cursyn.sense != 0) // TDMS 25 Oct 2005 - restrict to relevant values * senses.Add(cursyn); */ break; case "SEEALSOPTR": cursyn.seealso(); if (cursyn.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values { senses.Add(cursyn); } /* * if (cursyn.sense != 0) // TDMS 25 Oct 2005 - restrict to relevant values * senses.Add(cursyn); */ break; case "SIMPTR": goto case "HYPERPTR"; case "SYNS": goto case "HYPERPTR"; case "HYPERPTR": //eg. search for 'car', select Noun -> 'Synonyms/Hypernyms, ordered by estimated frequency' wordsFrom(cursyn); cursyn.strsns(sense + 1); prflag = true; cursyn.tracePtrs(sch.ptp, pos, depth); if (pos.name == "adj") { cursyn.tracePtrs(PointerType.of("PERTPTR"), pos, depth); cursyn.tracePtrs(PointerType.of("PPLPTR"), pos, depth); } else if (pos.name == "adv") { cursyn.tracePtrs(PointerType.of("PERTPTR"), pos, depth); } if (pos.name == "verb") { cursyn.strFrame(false); } if (cursyn.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values { senses.Add(cursyn); } // senses.Add(cursyn); break; case "NOMINALIZATIONS": // 26/8/05 - changed "DERIVATION" to "NOMINALIZATIONS" - this needs to be verified // derivation - TDMS cursyn.tracenomins(sch.ptp); if (cursyn.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values { senses.Add(cursyn); } /* * if (cursyn.sense != 0) // TDMS 25 Oct 2005 - restrict to relevant values * senses.Add(cursyn); */ break; case "CLASSIFICATION": goto case "CLASS"; case "CLASS": //eg. search for 'car', select Noun -> 'Domain Terms' cursyn.traceclassif(sch.ptp, new SearchType(false, sch.ptp)); if (cursyn.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values { senses.Add(cursyn); } /* * if (cursyn.senses != null ) * if (cursyn.senses.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values * senses.Add(cursyn); */ break; case "HYPOPTR": //eg. search for 'car', select Noun -> 'Hyponyms' cursyn.tracePtrs(sch.ptp, pos, depth); if (cursyn.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values { senses.Add(cursyn); } /* * if (cursyn.senses != null ) * if (cursyn.senses.isDirty) * { // TDMS 25 Oct 2005 - restrict to relevant values * cursyn.frames.Clear(); // TDMS 03 Jul 2006 - frames get added in wordnet.cs after filtering * senses.Add(cursyn); * } */ break; default: cursyn.tracePtrs(sch.ptp, pos, depth); if (cursyn.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values { senses.Add(cursyn); } /* * if (cursyn.senses != null ) * if (cursyn.senses.isDirty) // TDMS 25 Oct 2005 - restrict to relevant values * senses.Add(cursyn); */ break; } skipit :; } } } break; } }
public string next() { string word, tmp; int prep, cnt, st_idx = 0, end_idx = 0, end_idx1, end_idx2; string append = ""; /* first time through for this string */ if (firsttime) { firsttime = false; cnt = str.Split('_').Length; svprep = 0; /* first try exception list */ e = new Exceptions(str, pos); if ((tmp = e.next()) != null && tmp != str) { svcnt = 1; /* force next time to pass NULL */ return(tmp); } /* then try simply morph on original string */ if (pos.name != "verb" && ((tmp = morphword(str)) != null) && str != tmp) { return(tmp); } if (pos.name == "verb" && cnt > 1 && (prep = hasprep(str, cnt)) != 0) { svprep = prep; return(morphprep(str)); } else { svcnt = cnt = str.Split('_').Length; while (--cnt > 0) { end_idx1 = str.Substring(st_idx).IndexOf('_') + st_idx; end_idx2 = str.Substring(st_idx).IndexOf('-') + st_idx; if (end_idx1 >= st_idx && end_idx2 >= st_idx) { if (end_idx1 < end_idx2) { end_idx = end_idx1; append = "_"; } else { end_idx = end_idx2; append = "-"; } } else if (end_idx1 >= st_idx) { end_idx = end_idx1; append = "_"; } else { end_idx = end_idx2; append = "-"; } if (end_idx < 0) { return(null); } word = str.Substring(st_idx, end_idx - st_idx); if ((tmp = morphword(word)) != null) { searchstr += tmp; } else { searchstr += word; } searchstr += append; st_idx = end_idx + 1; } word = str.Substring(st_idx); if ((tmp = morphword(word)) != null) { searchstr += tmp; } else { searchstr += word; } if (searchstr != str && WNDB.is_defined(searchstr, pos).NonEmpty) { return(searchstr); } else { return(null); } } } else // not firsttime { if (svprep > 0) { svprep = 0; return(null); } else if (svcnt == 1) { return(e.next()); } else { svcnt = 1; e = new Exceptions(str, pos); if ((tmp = e.next()) != null && tmp != str) { return(tmp); } return(null); } } }
string morphprep(string s) { string excWord, lastwd = null; int i, offset, cnt, rest, last; string word, end, retval; /* Assume that the verb is the first word in the phrase. Strip it * off, check for validity, then try various morphs with the * rest of the phrase tacked on, trying to find a match. */ rest = s.IndexOf('_'); last = s.LastIndexOf('_'); end = ""; if (rest != last) { // more than 2 words lastwd = morphword(s.Substring(last + 1)); if (lastwd != null) { end = s.Substring(rest, last - rest + 1) + lastwd; } } word = s.Substring(0, rest); for (i = 0; i < word.Length; i++) { if (!char.IsLetterOrDigit(word[i])) { return(null); } } offset = offsets[PartOfSpeech.of("verb").ident]; cnt = cnts[PartOfSpeech.of("verb").ident]; /* First try to find the verb in the exception list */ Exceptions e = new Exceptions(word, PartOfSpeech.of("verb")); while ((excWord = e.next()) != null && excWord != word) { retval = excWord + s.Substring(rest); if (WNDB.is_defined(retval, PartOfSpeech.of("verb")).NonEmpty) { return(retval); } else if (lastwd != null) { retval = excWord + end; if (WNDB.is_defined(retval, PartOfSpeech.of("verb")).NonEmpty) { return(retval); } } } for (i = 0; i < cnt; i++) { if ((excWord = wordbase(word, i + offset)) != null && excWord != word) // ending is different { retval = excWord + s.Substring(rest); if (WNDB.is_defined(retval, PartOfSpeech.of("verb")).NonEmpty) { return(retval); } else if (lastwd != null) { retval = excWord + end; if (WNDB.is_defined(retval, PartOfSpeech.of("verb")).NonEmpty) { return(retval); } } } } retval = word + s.Substring(rest); if (s != retval) { return(retval); } if (lastwd != null) { retval = word + end; if (s != retval) { return(retval); } } return(null); }
public static string binSearch(string word, PartOfSpeech pos) { return(binSearch(word, WNDB.index(pos))); }