예제 #1
0
파일: SynSet.cs 프로젝트: ywscr/MindMap
        /* find the example sentence references in the example sentence index file */
        bool findExample()
        {
            bool retval = false;

            StreamReader fp = new StreamReader(WNDB.path + "SENTIDX.VRB");

            Byte[] b = System.Text.Encoding.Unicode.GetBytes(fp.ReadToEnd());

            MemoryStream ms = new MemoryStream(b);
            //StreamReader fp = new StreamReader(mms);

            int    wdnum = whichword - 1;
            Lexeme lx    = words[wdnum];
            string tbuf  = lx.word + "%" + pos.ident + ":" + fnum + ":" + lx.uniq + "::";
            string str   = WNDB.binSearch(tbuf, ms);          //fp);

            if (str != null)
            {
                str = str.Substring(lx.word.Length + 11);
                StrTok st = new StrTok(str, ' ', ',', '\n');
                string offset;
                while ((offset = st.next()) != null)
                {
                    getExample(offset, lx.word);
                    retval = true;
                }
            }
            fp.Close();
            return(retval);
        }
예제 #2
0
        /* From search.c:
         * Find word in index file and return parsed entry in data structure.
         * Input word must be exact match of string in database. */

        // From the WordNet Manual (http://wordnet.princeton.edu/man/wnsearch.3WN.html)
        // index_lookup() finds searchstr in the index file for pos and returns a pointer
        // to the parsed entry in an Index data structure. searchstr must exactly match the
        // form of the word (lower case only, hyphens and underscores in the same places) in
        // the index file. NULL is returned if a match is not found.
        public static Index lookup(string word, PartOfSpeech pos)
        {
            int j;

            if (word == "")
            {
                return(null);
            }
            // TDMS 14 Aug 2005 - changed to allow for numbers as well
            // because the database contains searches that can start with
            // numerals
            //if (!char.IsLetter(word[0]))
            if (!char.IsLetter(word[0]) && !char.IsNumber(word[0]))
            {
                return(null);
            }
            string line = WNDB.binSearch(word, pos);

            if (line == null)
            {
                return(null);
            }
            Index  idx = new Index();
            StrTok st  = new StrTok(line);

            idx.wd        = st.next();                  /* the word */
            idx.pos       = PartOfSpeech.of(st.next()); /* the part of speech */
            idx.sense_cnt = int.Parse(st.next());       /* collins count */
            int ptruse_cnt = int.Parse(st.next());      /* number of pointers types */

            idx.ptruse = new PointerType[ptruse_cnt];
            for (j = 0; j < ptruse_cnt; j++)
            {
                idx.ptruse[j] = PointerType.of(st.next());
            }
            int off_cnt = int.Parse(st.next());

            idx.offs         = new int[off_cnt];
            idx.tagsense_cnt = int.Parse(st.next());
            for (j = 0; j < off_cnt; j++)
            {
                idx.offs[j] = int.Parse(st.next());
            }
            return(idx);
        }
예제 #3
0
파일: SynSet.cs 프로젝트: ywscr/MindMap
        void Parse(string s, PartOfSpeech fpos, string word)
        {
            int    j;
            StrTok st  = new StrTok(s);
            int    off = int.Parse(st.next());

            fnum = int.Parse(st.next());
            string       f   = st.next();
            PartOfSpeech pos = PartOfSpeech.of(f);

            if (pos.clss == "SATELLITE")
            {
                sstype = AdjSynSetType.IndirectAnt;
            }
            int wcnt = int.Parse(st.next(), NumberStyles.HexNumber);

            words = new Lexeme[wcnt];
            for (j = 0; j < wcnt; j++)
            {
                words[j]      = new Lexeme();
                words[j].word = st.next();
                words[j].uniq = int.Parse(st.next(), NumberStyles.HexNumber);

                // Thanh Dao 7 Nov 2005 - Added missing word sense values
                int ss = getsearchsense(j + 1);
                words[j].wnsns = ss;

                if (words[j].word.ToLower() == word)
                {
                    whichword = j + 1;
                }
            }
            int pcnt = int.Parse(st.next());

            ptrs = new Pointer[pcnt];
            for (j = 0; j < pcnt; j++)
            {
                string p = st.next();
                ptrs[j] = new Pointer(p);
                if (fpos.name == "adj" && sstype == AdjSynSetType.DontKnow)
                {
                    if (ptrs[j].ptp.ident == ANTPTR)                     // TDMS 11 JUL 2006 - change comparison to int //.mnemonic=="ANTPTR")
                    {
                        sstype = AdjSynSetType.DirectAnt;
                    }
                    else if (ptrs[j].ptp.ident == PERTPTR)                   // TDMS 11 JUL 2006 - change comparison to int //mnemonic=="PERTPTR")
                    {
                        sstype = AdjSynSetType.Pertainym;
                    }
                }
                ptrs[j].off = int.Parse(st.next());
                ptrs[j].pos = PartOfSpeech.of(st.next());
                int sx = int.Parse(st.next(), NumberStyles.HexNumber);
                ptrs[j].sce = sx >> 8;
                ptrs[j].dst = sx & 0xff;
            }
            f = st.next();
            if (f != "|")
            {
                int fcnt = int.Parse(f);
                for (j = 0; j < fcnt; j++)
                {
                    f = st.next();                     // +
                    Frame fr = Frame.frame(int.Parse(st.next()));
                    frames.Add(new SynSetFrame(fr, int.Parse(st.next(), NumberStyles.HexNumber)));
                }
                f = st.next();
            }
            defn = s.Substring(s.IndexOf('|') + 1);
        }