public override Synset[] GetSynsets(string lemma, string partOfSpeech) { List <Synset> synsets = new List <Synset>(); IndexWord indexWord = GetIndexWord(lemma, partOfSpeech); if (indexWord != null) { foreach (int synsetOffset in indexWord.SynsetOffsets) { Synset synset = CreateSynset(partOfSpeech, synsetOffset); synsets.Add(synset); } } return(synsets.ToArray()); }
public override Synset[] GetSynsets(string lemma) { var synsets = new List <Synset>(); foreach (string partOfSpeech in _dataFileDictionary.Keys) { IndexWord indexWord = GetIndexWord(lemma, partOfSpeech); if (indexWord != null) { foreach (int synsetOffset in indexWord.SynsetOffsets) { Synset synset = CreateSynset(partOfSpeech, synsetOffset); synsets.Add(synset); } } } return(synsets.ToArray()); }
public override Synset GetSynset(string lemma, string partOfSpeech, int senseNumber) { if (senseNumber < 1) { throw new ArgumentOutOfRangeException("senseNumber", senseNumber, "cannot be less than 1"); } IndexWord indexWord = GetIndexWord(lemma, partOfSpeech); if (indexWord != null) { if (senseNumber > (indexWord.SynsetOffsets.Length + 1)) { return(null); } Synset synset = CreateSynset(partOfSpeech, indexWord.SynsetOffsets[senseNumber - 1]); return(synset); } return(null); }
protected internal override Synset CreateSynset(string partOfSpeech, int synsetOffset) { StreamReader dataFile = mDataFileDictionary[partOfSpeech].DataFile; dataFile.DiscardBufferedData(); dataFile.BaseStream.Seek(synsetOffset, SeekOrigin.Begin); string record = dataFile.ReadLine(); Tokenizer tokenizer = new Tokenizer(record); int offset = int.Parse(tokenizer.NextToken()); string lexicographerFile = mLexicographerFiles[int.Parse(tokenizer.NextToken())]; string synsetType = tokenizer.NextToken(); int wordCount = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber); string[] words = new string[wordCount]; for (int iCurrentWord = 0; iCurrentWord < wordCount; iCurrentWord++) { words[iCurrentWord] = tokenizer.NextToken().Replace("_", " "); int uniqueID = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber); } int relationCount = int.Parse(tokenizer.NextToken()); Relation[] relations = new Relation[relationCount]; for (int currentRelation = 0; currentRelation < relationCount; currentRelation++) { string relationTypeKey = tokenizer.NextToken(); // if (fpos.name=="adj" && sstype==AdjSynSetType.DontKnow) // { // if (ptrs[j].ptp.mnemonic=="ANTPTR") // sstype = AdjSynSetType.DirectAnt; // else if (ptrs[j].ptp.mnemonic=="PERTPTR") // sstype = AdjSynSetType.Pertainym; // } int targetSynsetOffset = int.Parse(tokenizer.NextToken()); string targetPartOfSpeech = tokenizer.NextToken(); switch (targetPartOfSpeech) { case "n": targetPartOfSpeech = "noun"; break; case "v": targetPartOfSpeech = "verb"; break; case "a": case "s": targetPartOfSpeech = "adjective"; break; case "r": targetPartOfSpeech = "adverb"; break; } int sourceTarget = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber); if (sourceTarget == 0) { relations[currentRelation] = new Relation(this, (RelationType)mRelationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech); } else { int sourceWord = sourceTarget >> 8; int targetWord = sourceTarget & 0xff; relations[currentRelation] = new Relation(this, (RelationType)mRelationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech, sourceWord, targetWord); } } string frameData = tokenizer.NextToken(); if (frameData != "|") { int frameCount = int.Parse(frameData); for (int currentFrame = 0; currentFrame < frameCount; currentFrame++) { frameData = tokenizer.NextToken(); // + int frameNumber = int.Parse(tokenizer.NextToken()); int wordID = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber); } frameData = tokenizer.NextToken(); } string gloss = record.Substring(record.IndexOf('|') + 1); Synset synset = new Synset(synsetOffset, gloss, words, lexicographerFile, relations); return(synset); }
//private void getParents(Synset synset, System.Collections.IList parents) //{ //Pointer[] pointers = synset.getPointers(); //for (int pi = 0, pn = pointers.length; pi < pn; pi++) //{ // if (pointers[pi].getType() == PointerType.HYPERNYM) // { // Synset parent = pointers[pi].getTargetSynset(); // parents.Add(System.Convert.ToString(parent.getOffset())); // getParents(parent, parents); // } //} //} private void getParents(Synset currentSynset, List<string> parentOffsets) { for (int currentRelation = 0;currentRelation < currentSynset.RelationCount;currentRelation++) { Relation relation = currentSynset.GetRelation(currentRelation); if (relation.SynsetRelationType.Name == "Hypernym") { Synset parentSynset = relation.TargetSynset; parentOffsets.Add(parentSynset.Offset.ToString(System.Globalization.CultureInfo.InvariantCulture)); getParents(parentSynset, parentOffsets); } } }
protected internal override Synset CreateSynset(string partOfSpeech, int synsetOffset) { StreamReader dataFile = mDataFileDictionary[partOfSpeech].DataFile; dataFile.DiscardBufferedData(); dataFile.BaseStream.Seek(synsetOffset, SeekOrigin.Begin); string record = dataFile.ReadLine(); Tokenizer tokenizer = new Tokenizer(record); int offset = int.Parse(tokenizer.NextToken()); string lexicographerFile = mLexicographerFiles[int.Parse(tokenizer.NextToken())]; string synsetType = tokenizer.NextToken(); int wordCount = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber); string[] words = new string[wordCount]; for (int iCurrentWord = 0; iCurrentWord < wordCount; iCurrentWord++) { words[iCurrentWord] = tokenizer.NextToken().Replace("_", " "); int uniqueID = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber); } int relationCount = int.Parse(tokenizer.NextToken()); Relation[] relations = new Relation[relationCount]; for (int currentRelation = 0; currentRelation < relationCount; currentRelation++) { string relationTypeKey = tokenizer.NextToken(); // if (fpos.name=="adj" && sstype==AdjSynSetType.DontKnow) // { // if (ptrs[j].ptp.mnemonic=="ANTPTR") // sstype = AdjSynSetType.DirectAnt; // else if (ptrs[j].ptp.mnemonic=="PERTPTR") // sstype = AdjSynSetType.Pertainym; // } int targetSynsetOffset = int.Parse(tokenizer.NextToken()); string targetPartOfSpeech = tokenizer.NextToken(); switch (targetPartOfSpeech) { case "n": targetPartOfSpeech = "noun"; break; case "v": targetPartOfSpeech = "verb"; break; case "a": case "s": targetPartOfSpeech = "adjective"; break; case "r": targetPartOfSpeech = "adverb"; break; } int sourceTarget = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber); if (sourceTarget == 0) { relations[currentRelation] = new Relation(this, (RelationType)mRelationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech); } else { int sourceWord = sourceTarget >> 8; int targetWord = sourceTarget & 0xff; relations[currentRelation] = new Relation(this, (RelationType)mRelationTypeDictionary[relationTypeKey], targetSynsetOffset, targetPartOfSpeech, sourceWord, targetWord); } } string frameData = tokenizer.NextToken(); if (frameData != "|") { int frameCount = int.Parse(frameData); for (int currentFrame = 0; currentFrame < frameCount; currentFrame++) { frameData = tokenizer.NextToken(); // + int frameNumber = int.Parse(tokenizer.NextToken()); int wordID = int.Parse(tokenizer.NextToken(), System.Globalization.NumberStyles.HexNumber); } frameData = tokenizer.NextToken(); } string gloss = record.Substring(record.IndexOf('|') + 1); Synset synset = new Synset(synsetOffset, gloss, words, lexicographerFile, relations); return synset; }