예제 #1
0
        public override Synset[] GetSynsets(string lemma, string partOfSpeech)
        {
            List <Synset> synsets = new List <Synset>();

            IndexWord indexWord = GetIndexWord(lemma, partOfSpeech);

            if (indexWord != null)
            {
                foreach (int synsetOffset in indexWord.SynsetOffsets)
                {
                    Synset synset = CreateSynset(partOfSpeech, synsetOffset);
                    synsets.Add(synset);
                }
            }

            return(synsets.ToArray());
        }
예제 #2
0
        public override Synset[] GetSynsets(string lemma)
        {
            var synsets = new List <Synset>();

            foreach (string partOfSpeech in _dataFileDictionary.Keys)
            {
                IndexWord indexWord = GetIndexWord(lemma, partOfSpeech);

                if (indexWord != null)
                {
                    foreach (int synsetOffset in indexWord.SynsetOffsets)
                    {
                        Synset synset = CreateSynset(partOfSpeech, synsetOffset);
                        synsets.Add(synset);
                    }
                }
            }
            return(synsets.ToArray());
        }
예제 #3
0
        public override Synset GetSynset(string lemma, string partOfSpeech, int senseNumber)
        {
            if (senseNumber < 1)
            {
                throw new ArgumentOutOfRangeException("senseNumber", senseNumber, "cannot be less than 1");
            }

            IndexWord indexWord = GetIndexWord(lemma, partOfSpeech);

            if (indexWord != null)
            {
                if (senseNumber > (indexWord.SynsetOffsets.Length + 1))
                {
                    return(null);
                }
                Synset synset = CreateSynset(partOfSpeech, indexWord.SynsetOffsets[senseNumber - 1]);
                return(synset);
            }
            return(null);
        }
예제 #4
0
        protected internal override Synset CreateSynset(string partOfSpeech, int synsetOffset)
        {
            StreamReader dataFile = mDataFileDictionary[partOfSpeech].DataFile;

            dataFile.DiscardBufferedData();
            dataFile.BaseStream.Seek(synsetOffset, SeekOrigin.Begin);
            string record = dataFile.ReadLine();

            Tokenizer tokenizer         = new Tokenizer(record);
            int       offset            = int.Parse(tokenizer.NextToken());
            string    lexicographerFile = mLexicographerFiles[int.Parse(tokenizer.NextToken())];
            string    synsetType        = tokenizer.NextToken();
            int       wordCount         = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);

            string[] words = new string[wordCount];
            for (int iCurrentWord = 0; iCurrentWord < wordCount; iCurrentWord++)
            {
                words[iCurrentWord] = tokenizer.NextToken().Replace("_", " ");
                int uniqueID = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);
            }

            int relationCount = int.Parse(tokenizer.NextToken());

            Relation[] relations = new Relation[relationCount];
            for (int currentRelation = 0; currentRelation < relationCount; currentRelation++)
            {
                string relationTypeKey = tokenizer.NextToken();
//				if (fpos.name=="adj" && sstype==AdjSynSetType.DontKnow)
//				{
//					if (ptrs[j].ptp.mnemonic=="ANTPTR")
//						sstype = AdjSynSetType.DirectAnt;
//					else if (ptrs[j].ptp.mnemonic=="PERTPTR")
//						sstype = AdjSynSetType.Pertainym;
//				}
                int    targetSynsetOffset = int.Parse(tokenizer.NextToken());
                string targetPartOfSpeech = tokenizer.NextToken();
                switch (targetPartOfSpeech)
                {
                case "n":
                    targetPartOfSpeech = "noun";
                    break;

                case "v":
                    targetPartOfSpeech = "verb";
                    break;

                case "a":
                case "s":
                    targetPartOfSpeech = "adjective";
                    break;

                case "r":
                    targetPartOfSpeech = "adverb";
                    break;
                }

                int sourceTarget = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);
                if (sourceTarget == 0)
                {
                    relations[currentRelation] = new Relation(this, (RelationType)mRelationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech);
                }
                else
                {
                    int sourceWord = sourceTarget >> 8;
                    int targetWord = sourceTarget & 0xff;
                    relations[currentRelation] = new Relation(this, (RelationType)mRelationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech, sourceWord, targetWord);
                }
            }
            string frameData = tokenizer.NextToken();

            if (frameData != "|")
            {
                int frameCount = int.Parse(frameData);
                for (int currentFrame = 0; currentFrame < frameCount; currentFrame++)
                {
                    frameData = tokenizer.NextToken();                     // +
                    int frameNumber = int.Parse(tokenizer.NextToken());
                    int wordID      = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);
                }
                frameData = tokenizer.NextToken();
            }
            string gloss = record.Substring(record.IndexOf('|') + 1);

            Synset synset = new Synset(synsetOffset, gloss, words, lexicographerFile, relations);

            return(synset);
        }
예제 #5
0
		//private void  getParents(Synset synset, System.Collections.IList parents)
		//{
            //Pointer[] pointers = synset.getPointers();
            //for (int pi = 0, pn = pointers.length; pi < pn; pi++)
            //{
            //    if (pointers[pi].getType() == PointerType.HYPERNYM)
            //    {
            //        Synset parent = pointers[pi].getTargetSynset();
            //        parents.Add(System.Convert.ToString(parent.getOffset()));
            //        getParents(parent, parents);
            //    }
            //}
		//}

        private void getParents(Synset currentSynset, List<string> parentOffsets)
        {
            for (int currentRelation = 0;currentRelation < currentSynset.RelationCount;currentRelation++)
            {
                Relation relation = currentSynset.GetRelation(currentRelation);
                if (relation.SynsetRelationType.Name == "Hypernym")
                {
                    Synset parentSynset = relation.TargetSynset;
                    parentOffsets.Add(parentSynset.Offset.ToString(System.Globalization.CultureInfo.InvariantCulture));
                    getParents(parentSynset, parentOffsets);
                }
            }
        }
예제 #6
0
        protected internal override Synset CreateSynset(string partOfSpeech, int synsetOffset)
        {
            StreamReader dataFile = mDataFileDictionary[partOfSpeech].DataFile;
            dataFile.DiscardBufferedData();
            dataFile.BaseStream.Seek(synsetOffset, SeekOrigin.Begin);
            string record = dataFile.ReadLine();

            Tokenizer tokenizer = new Tokenizer(record);
            int offset = int.Parse(tokenizer.NextToken());
            string lexicographerFile = mLexicographerFiles[int.Parse(tokenizer.NextToken())];
            string synsetType = tokenizer.NextToken();
            int wordCount = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);

            string[] words = new string[wordCount];
            for (int iCurrentWord = 0; iCurrentWord < wordCount; iCurrentWord++)
            {
                words[iCurrentWord] = tokenizer.NextToken().Replace("_", " ");
                int uniqueID = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);
            }

            int relationCount = int.Parse(tokenizer.NextToken());
            Relation[] relations = new Relation[relationCount];
            for (int currentRelation = 0; currentRelation < relationCount; currentRelation++)
            {
                string relationTypeKey = tokenizer.NextToken();
            //				if (fpos.name=="adj" && sstype==AdjSynSetType.DontKnow)
            //				{
            //					if (ptrs[j].ptp.mnemonic=="ANTPTR")
            //						sstype = AdjSynSetType.DirectAnt;
            //					else if (ptrs[j].ptp.mnemonic=="PERTPTR")
            //						sstype = AdjSynSetType.Pertainym;
            //				}
                int targetSynsetOffset = int.Parse(tokenizer.NextToken());
                string targetPartOfSpeech = tokenizer.NextToken();
                switch (targetPartOfSpeech)
                {
                    case "n":
                        targetPartOfSpeech = "noun";
                        break;
                    case "v":
                        targetPartOfSpeech = "verb";
                        break;
                    case "a":
                    case "s":
                        targetPartOfSpeech = "adjective";
                        break;
                    case "r":
                        targetPartOfSpeech = "adverb";
                        break;
                }

                int sourceTarget = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);
                if (sourceTarget == 0)
                {
                    relations[currentRelation] = new Relation(this, (RelationType)mRelationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech);
                }
                else
                {
                    int sourceWord = sourceTarget >> 8;
                    int targetWord = sourceTarget & 0xff;
                    relations[currentRelation] = new Relation(this, (RelationType)mRelationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech, sourceWord, targetWord);
                }
            }
            string frameData = tokenizer.NextToken();
            if (frameData != "|")
            {
                int frameCount = int.Parse(frameData);
                for (int currentFrame = 0; currentFrame < frameCount; currentFrame++)
                {
                    frameData = tokenizer.NextToken(); // +
                    int frameNumber = int.Parse(tokenizer.NextToken());
                    int wordID = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);
                }
                frameData = tokenizer.NextToken();
            }
            string gloss = record.Substring(record.IndexOf('|') + 1);

            Synset synset = new Synset(synsetOffset, gloss, words, lexicographerFile, relations);
            return synset;
        }