Ejemplo n.º 1
0
        public override Synset[] GetSynsets(string lemma, string partOfSpeech)
        {
            var synsets = new List <Synset>();

            IndexWord indexWord = GetIndexWord(lemma, partOfSpeech);

            if (indexWord != null)
            {
                foreach (int synsetOffset in indexWord.SynsetOffsets)
                {
                    Synset synset = CreateSynset(partOfSpeech, synsetOffset);
                    synsets.Add(synset);
                }
            }

            return(synsets.ToArray());
        }
Ejemplo n.º 2
0
        public override Synset GetSynset(string lemma, string partOfSpeech, int senseNumber)
        {
            if (senseNumber < 1)
            {
                throw new ArgumentOutOfRangeException("senseNumber", senseNumber, "cannot be less than 1");
            }

            IndexWord indexWord = GetIndexWord(lemma, partOfSpeech);

            if (indexWord != null)
            {
                if (senseNumber > (indexWord.SynsetOffsets.Length + 1))
                {
                    return(null);
                }
                Synset synset = CreateSynset(partOfSpeech, indexWord.SynsetOffsets[senseNumber - 1]);
                return(synset);
            }
            return(null);
        }
Ejemplo n.º 3
0
        protected internal override Synset CreateSynset(string partOfSpeech, int synsetOffset)
        {
            StreamReader dataFile = _dataFileDictionary[partOfSpeech].DataFile;

            dataFile.DiscardBufferedData();
            dataFile.BaseStream.Seek(synsetOffset, SeekOrigin.Begin);
            string record = dataFile.ReadLine();

            var tokenizer = new Tokenizer(record);
            var nextToken = tokenizer.NextToken();
            int offset    = int.Parse(nextToken);


            var    nt = int.Parse(tokenizer.NextToken());
            string lexicographerFile = _lexicographerFiles[nt];
            string synsetType        = tokenizer.NextToken();
            int    wordCount         = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);

            var words = new string[wordCount];

            for (int iCurrentWord = 0; iCurrentWord < wordCount; iCurrentWord++)
            {
                words[iCurrentWord] = tokenizer.NextToken().Replace("_", " ");
                int uniqueID = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);
            }

            int relationCount = int.Parse(tokenizer.NextToken());
            var relations     = new Relation[relationCount];

            for (int currentRelation = 0; currentRelation < relationCount; currentRelation++)
            {
                string relationTypeKey = tokenizer.NextToken();
//				if (fpos.name=="adj" && sstype==AdjSynSetType.DontKnow)
//				{
//					if (ptrs[j].ptp.mnemonic=="ANTPTR")
//						sstype = AdjSynSetType.DirectAnt;
//					else if (ptrs[j].ptp.mnemonic=="PERTPTR")
//						sstype = AdjSynSetType.Pertainym;
//				}
                int    targetSynsetOffset = int.Parse(tokenizer.NextToken());
                string targetPartOfSpeech = tokenizer.NextToken();
                switch (targetPartOfSpeech)
                {
                case "n":
                    targetPartOfSpeech = "noun";
                    break;

                case "v":
                    targetPartOfSpeech = "verb";
                    break;

                case "a":
                case "s":
                    targetPartOfSpeech = "adjective";
                    break;

                case "r":
                    targetPartOfSpeech = "adverb";
                    break;
                }

                int sourceTarget = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);
                if (sourceTarget == 0)
                {
                    relations[currentRelation] = new Relation(this, (RelationType)_relationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech);
                }
                else
                {
                    int sourceWord = sourceTarget >> 8;
                    int targetWord = sourceTarget & 0xff;
                    relations[currentRelation] = new Relation(this, (RelationType)_relationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech, sourceWord, targetWord);
                }
            }
            string frameData = tokenizer.NextToken();

            if (frameData != "|")
            {
                int frameCount = int.Parse(frameData);
                for (int currentFrame = 0; currentFrame < frameCount; currentFrame++)
                {
                    frameData = tokenizer.NextToken();                     // +
                    int frameNumber = int.Parse(tokenizer.NextToken());
                    int wordID      = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);
                }
                frameData = tokenizer.NextToken();
            }
            string gloss = record.Substring(record.IndexOf('|') + 1);

            var synset = new Synset(synsetOffset, gloss, words, lexicographerFile, relations);

            return(synset);
        }