コード例 #1
0
ファイル: Synset.cs プロジェクト: ronnyMakhuddin/SharperNLP
 internal Synset(int offset, string gloss, string[] wordList, string lexicographerFile, Relation[] relations)
 {
     mOffset = offset;
     mGloss = gloss;
     mWordList = wordList;
     mLexicographerFile = lexicographerFile;
     mRelations = relations;
 }
コード例 #2
0
        protected internal override Synset CreateSynset(string partOfSpeech, int synsetOffset)
        {
            StreamReader dataFile = mDataFileDictionary[partOfSpeech].DataFile;
            dataFile.DiscardBufferedData();
            dataFile.BaseStream.Seek(synsetOffset, SeekOrigin.Begin);
            string record = dataFile.ReadLine();

            Tokenizer tokenizer = new Tokenizer(record);
            int offset = int.Parse(tokenizer.NextToken());
            string lexicographerFile = mLexicographerFiles[int.Parse(tokenizer.NextToken())];
            string synsetType = tokenizer.NextToken();
            int wordCount = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);

            string[] words = new string[wordCount];
            for (int iCurrentWord = 0; iCurrentWord < wordCount; iCurrentWord++)
            {
                words[iCurrentWord] = tokenizer.NextToken().Replace("_", " ");
                int uniqueID = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);
            }

            int relationCount = int.Parse(tokenizer.NextToken());
            Relation[] relations = new Relation[relationCount];
            for (int currentRelation = 0; currentRelation < relationCount; currentRelation++)
            {
                string relationTypeKey = tokenizer.NextToken();
            //				if (fpos.name=="adj" && sstype==AdjSynSetType.DontKnow)
            //				{
            //					if (ptrs[j].ptp.mnemonic=="ANTPTR")
            //						sstype = AdjSynSetType.DirectAnt;
            //					else if (ptrs[j].ptp.mnemonic=="PERTPTR")
            //						sstype = AdjSynSetType.Pertainym;
            //				}
                int targetSynsetOffset = int.Parse(tokenizer.NextToken());
                string targetPartOfSpeech = tokenizer.NextToken();
                switch (targetPartOfSpeech)
                {
                    case "n":
                        targetPartOfSpeech = "noun";
                        break;
                    case "v":
                        targetPartOfSpeech = "verb";
                        break;
                    case "a":
                    case "s":
                        targetPartOfSpeech = "adjective";
                        break;
                    case "r":
                        targetPartOfSpeech = "adverb";
                        break;
                }

                int sourceTarget = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);
                if (sourceTarget == 0)
                {
                    relations[currentRelation] = new Relation(this, (RelationType)mRelationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech);
                }
                else
                {
                    int sourceWord = sourceTarget >> 8;
                    int targetWord = sourceTarget & 0xff;
                    relations[currentRelation] = new Relation(this, (RelationType)mRelationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech, sourceWord, targetWord);
                }
            }
            string frameData = tokenizer.NextToken();
            if (frameData != "|")
            {
                int frameCount = int.Parse(frameData);
                for (int currentFrame = 0; currentFrame < frameCount; currentFrame++)
                {
                    frameData = tokenizer.NextToken(); // +
                    int frameNumber = int.Parse(tokenizer.NextToken());
                    int wordID = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);
                }
                frameData = tokenizer.NextToken();
            }
            string gloss = record.Substring(record.IndexOf('|') + 1);

            Synset synset = new Synset(synsetOffset, gloss, words, lexicographerFile, relations);
            return synset;
        }
コード例 #3
0
        protected internal override Synset CreateSynset(string partOfSpeech, int synsetOffset)
        {
            StreamReader dataFile = _dataFileDictionary[partOfSpeech].DataFile;

            dataFile.DiscardBufferedData();
            dataFile.BaseStream.Seek(synsetOffset, SeekOrigin.Begin);
            string record = dataFile.ReadLine();

            var tokenizer = new Tokenizer(record);
            var nextToken = tokenizer.NextToken();
            int offset    = int.Parse(nextToken);


            var    nt = int.Parse(tokenizer.NextToken());
            string lexicographerFile = _lexicographerFiles[nt];
            string synsetType        = tokenizer.NextToken();
            int    wordCount         = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);

            var words = new string[wordCount];

            for (int iCurrentWord = 0; iCurrentWord < wordCount; iCurrentWord++)
            {
                words[iCurrentWord] = tokenizer.NextToken().Replace("_", " ");
                int uniqueID = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);
            }

            int relationCount = int.Parse(tokenizer.NextToken());
            var relations     = new Relation[relationCount];

            for (int currentRelation = 0; currentRelation < relationCount; currentRelation++)
            {
                string relationTypeKey = tokenizer.NextToken();
//				if (fpos.name=="adj" && sstype==AdjSynSetType.DontKnow)
//				{
//					if (ptrs[j].ptp.mnemonic=="ANTPTR")
//						sstype = AdjSynSetType.DirectAnt;
//					else if (ptrs[j].ptp.mnemonic=="PERTPTR")
//						sstype = AdjSynSetType.Pertainym;
//				}
                int    targetSynsetOffset = int.Parse(tokenizer.NextToken());
                string targetPartOfSpeech = tokenizer.NextToken();
                switch (targetPartOfSpeech)
                {
                case "n":
                    targetPartOfSpeech = "noun";
                    break;

                case "v":
                    targetPartOfSpeech = "verb";
                    break;

                case "a":
                case "s":
                    targetPartOfSpeech = "adjective";
                    break;

                case "r":
                    targetPartOfSpeech = "adverb";
                    break;
                }

                int sourceTarget = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);
                if (sourceTarget == 0)
                {
                    relations[currentRelation] = new Relation(this, (RelationType)_relationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech);
                }
                else
                {
                    int sourceWord = sourceTarget >> 8;
                    int targetWord = sourceTarget & 0xff;
                    relations[currentRelation] = new Relation(this, (RelationType)_relationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech, sourceWord, targetWord);
                }
            }
            string frameData = tokenizer.NextToken();

            if (frameData != "|")
            {
                int frameCount = int.Parse(frameData);
                for (int currentFrame = 0; currentFrame < frameCount; currentFrame++)
                {
                    frameData = tokenizer.NextToken();                     // +
                    int frameNumber = int.Parse(tokenizer.NextToken());
                    int wordID      = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber);
                }
                frameData = tokenizer.NextToken();
            }
            string gloss = record.Substring(record.IndexOf('|') + 1);

            var synset = new Synset(synsetOffset, gloss, words, lexicographerFile, relations);

            return(synset);
        }