internal Synset(int offset, string gloss, string[] wordList, string lexicographerFile, Relation[] relations) { mOffset = offset; mGloss = gloss; mWordList = wordList; mLexicographerFile = lexicographerFile; mRelations = relations; }
protected internal override Synset CreateSynset(string partOfSpeech, int synsetOffset) { StreamReader dataFile = mDataFileDictionary[partOfSpeech].DataFile; dataFile.DiscardBufferedData(); dataFile.BaseStream.Seek(synsetOffset, SeekOrigin.Begin); string record = dataFile.ReadLine(); Tokenizer tokenizer = new Tokenizer(record); int offset = int.Parse(tokenizer.NextToken()); string lexicographerFile = mLexicographerFiles[int.Parse(tokenizer.NextToken())]; string synsetType = tokenizer.NextToken(); int wordCount = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber); string[] words = new string[wordCount]; for (int iCurrentWord = 0; iCurrentWord < wordCount; iCurrentWord++) { words[iCurrentWord] = tokenizer.NextToken().Replace("_", " "); int uniqueID = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber); } int relationCount = int.Parse(tokenizer.NextToken()); Relation[] relations = new Relation[relationCount]; for (int currentRelation = 0; currentRelation < relationCount; currentRelation++) { string relationTypeKey = tokenizer.NextToken(); // if (fpos.name=="adj" && sstype==AdjSynSetType.DontKnow) // { // if (ptrs[j].ptp.mnemonic=="ANTPTR") // sstype = AdjSynSetType.DirectAnt; // else if (ptrs[j].ptp.mnemonic=="PERTPTR") // sstype = AdjSynSetType.Pertainym; // } int targetSynsetOffset = int.Parse(tokenizer.NextToken()); string targetPartOfSpeech = tokenizer.NextToken(); switch (targetPartOfSpeech) { case "n": targetPartOfSpeech = "noun"; break; case "v": targetPartOfSpeech = "verb"; break; case "a": case "s": targetPartOfSpeech = "adjective"; break; case "r": targetPartOfSpeech = "adverb"; break; } int sourceTarget = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber); if (sourceTarget == 0) { relations[currentRelation] = new Relation(this, (RelationType)mRelationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech); } else { int sourceWord = sourceTarget >> 8; int targetWord = sourceTarget & 0xff; relations[currentRelation] = new Relation(this, (RelationType)mRelationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech, sourceWord, targetWord); } } string frameData = tokenizer.NextToken(); if (frameData != "|") { int frameCount = int.Parse(frameData); for (int currentFrame = 0; currentFrame < frameCount; currentFrame++) { frameData = tokenizer.NextToken(); // + int frameNumber = int.Parse(tokenizer.NextToken()); int wordID = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber); } frameData = tokenizer.NextToken(); } string gloss = record.Substring(record.IndexOf('|') + 1); Synset synset = new Synset(synsetOffset, gloss, words, lexicographerFile, relations); return synset; }
protected internal override Synset CreateSynset(string partOfSpeech, int synsetOffset) { StreamReader dataFile = _dataFileDictionary[partOfSpeech].DataFile; dataFile.DiscardBufferedData(); dataFile.BaseStream.Seek(synsetOffset, SeekOrigin.Begin); string record = dataFile.ReadLine(); var tokenizer = new Tokenizer(record); var nextToken = tokenizer.NextToken(); int offset = int.Parse(nextToken); var nt = int.Parse(tokenizer.NextToken()); string lexicographerFile = _lexicographerFiles[nt]; string synsetType = tokenizer.NextToken(); int wordCount = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber); var words = new string[wordCount]; for (int iCurrentWord = 0; iCurrentWord < wordCount; iCurrentWord++) { words[iCurrentWord] = tokenizer.NextToken().Replace("_", " "); int uniqueID = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber); } int relationCount = int.Parse(tokenizer.NextToken()); var relations = new Relation[relationCount]; for (int currentRelation = 0; currentRelation < relationCount; currentRelation++) { string relationTypeKey = tokenizer.NextToken(); // if (fpos.name=="adj" && sstype==AdjSynSetType.DontKnow) // { // if (ptrs[j].ptp.mnemonic=="ANTPTR") // sstype = AdjSynSetType.DirectAnt; // else if (ptrs[j].ptp.mnemonic=="PERTPTR") // sstype = AdjSynSetType.Pertainym; // } int targetSynsetOffset = int.Parse(tokenizer.NextToken()); string targetPartOfSpeech = tokenizer.NextToken(); switch (targetPartOfSpeech) { case "n": targetPartOfSpeech = "noun"; break; case "v": targetPartOfSpeech = "verb"; break; case "a": case "s": targetPartOfSpeech = "adjective"; break; case "r": targetPartOfSpeech = "adverb"; break; } int sourceTarget = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber); if (sourceTarget == 0) { relations[currentRelation] = new Relation(this, (RelationType)_relationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech); } else { int sourceWord = sourceTarget >> 8; int targetWord = sourceTarget & 0xff; relations[currentRelation] = new Relation(this, (RelationType)_relationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech, sourceWord, targetWord); } } string frameData = tokenizer.NextToken(); if (frameData != "|") { int frameCount = int.Parse(frameData); for (int currentFrame = 0; currentFrame < frameCount; currentFrame++) { frameData = tokenizer.NextToken(); // + int frameNumber = int.Parse(tokenizer.NextToken()); int wordID = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber); } frameData = tokenizer.NextToken(); } string gloss = record.Substring(record.IndexOf('|') + 1); var synset = new Synset(synsetOffset, gloss, words, lexicographerFile, relations); return(synset); }