private void SearchRulesForBatch(TwoDimensionalSet <string, string> binaryRules, ICollection <string> unaryRules, ICollection <string> words, Tree tree) { if (tree.IsLeaf()) { return; } if (tree.IsPreTerminal()) { words.Add(GetVocabWord(tree.Children()[0].Value())); return; } Tree[] children = tree.Children(); if (children.Length == 1) { unaryRules.Add(BasicCategory(children[0].Value())); SearchRulesForBatch(binaryRules, unaryRules, words, children[0]); } else { if (children.Length == 2) { binaryRules.Add(BasicCategory(children[0].Value()), BasicCategory(children[1].Value())); SearchRulesForBatch(binaryRules, unaryRules, words, children[0]); SearchRulesForBatch(binaryRules, unaryRules, words, children[1]); } else { throw new AssertionError("Expected a binarized tree"); } } }
public virtual void FilterRulesForBatch(TwoDimensionalSet <string, string> binaryRules, ICollection <string> unaryRules, ICollection <string> words) { TwoDimensionalMap <string, string, SimpleMatrix> newBinaryTransforms = TwoDimensionalMap.TreeMap(); TwoDimensionalMap <string, string, SimpleMatrix> newBinaryScores = TwoDimensionalMap.TreeMap(); foreach (Pair <string, string> binaryRule in binaryRules) { SimpleMatrix transform = binaryTransform.Get(binaryRule.First(), binaryRule.Second()); if (transform != null) { newBinaryTransforms.Put(binaryRule.First(), binaryRule.Second(), transform); } SimpleMatrix score = binaryScore.Get(binaryRule.First(), binaryRule.Second()); if (score != null) { newBinaryScores.Put(binaryRule.First(), binaryRule.Second(), score); } if ((transform == null && score != null) || (transform != null && score == null)) { throw new AssertionError(); } } binaryTransform = newBinaryTransforms; binaryScore = newBinaryScores; numBinaryMatrices = binaryTransform.Size(); IDictionary <string, SimpleMatrix> newUnaryTransforms = Generics.NewTreeMap(); IDictionary <string, SimpleMatrix> newUnaryScores = Generics.NewTreeMap(); foreach (string unaryRule in unaryRules) { SimpleMatrix transform = unaryTransform[unaryRule]; if (transform != null) { newUnaryTransforms[unaryRule] = transform; } SimpleMatrix score = unaryScore[unaryRule]; if (score != null) { newUnaryScores[unaryRule] = score; } if ((transform == null && score != null) || (transform != null && score == null)) { throw new AssertionError(); } } unaryTransform = newUnaryTransforms; unaryScore = newUnaryScores; numUnaryMatrices = unaryTransform.Count; IDictionary <string, SimpleMatrix> newWordVectors = Generics.NewTreeMap(); foreach (string word in words) { SimpleMatrix wordVector = wordVectors[word]; if (wordVector != null) { newWordVectors[word] = wordVector; } } wordVectors = newWordVectors; }
/// <summary> /// Filters the transform and score rules so that we only have the /// ones which appear in the trees given /// </summary> public virtual void FilterRulesForBatch(ICollection <Tree> trees) { TwoDimensionalSet <string, string> binaryRules = TwoDimensionalSet.TreeSet(); ICollection <string> unaryRules = new HashSet <string>(); ICollection <string> words = new HashSet <string>(); foreach (Tree tree in trees) { SearchRulesForBatch(binaryRules, unaryRules, words, tree); } FilterRulesForBatch(binaryRules, unaryRules, words); }
public static TwoDimensionalSet <string, string> GetBinaryMatrixNames(IList <TwoDimensionalMap <string, string, SimpleMatrix> > maps) { TwoDimensionalSet <string, string> matrixNames = new TwoDimensionalSet <string, string>(); foreach (TwoDimensionalMap <string, string, SimpleMatrix> map in maps) { foreach (TwoDimensionalMap.Entry <string, string, SimpleMatrix> entry in map) { matrixNames.Add(entry.GetFirstKey(), entry.GetSecondKey()); } } return(matrixNames); }
public virtual void FilterRulesForBatch(IDictionary <Tree, byte[]> compressedTrees) { TwoDimensionalSet <string, string> binaryRules = TwoDimensionalSet.TreeSet(); ICollection <string> unaryRules = new HashSet <string>(); ICollection <string> words = new HashSet <string>(); foreach (KeyValuePair <Tree, byte[]> entry in compressedTrees) { SearchRulesForBatch(binaryRules, unaryRules, words, entry.Key); foreach (Tree hypothesis in CacheParseHypotheses.ConvertToTrees(entry.Value)) { SearchRulesForBatch(binaryRules, unaryRules, words, hypothesis); } } FilterRulesForBatch(binaryRules, unaryRules, words); }
public Document() { // mentions may be removed from this due to post processing // all mentions (mentions will not be removed from this) positions = Generics.NewHashMap(); mentionheadPositions = Generics.NewHashMap(); roleSet = Generics.NewHashSet(); corefClusters = Generics.NewHashMap(); goldCorefClusters = null; allPredictedMentions = Generics.NewHashMap(); allGoldMentions = Generics.NewHashMap(); speakers = Generics.NewHashMap(); speakerPairs = Generics.NewHashSet(); incompatibles = TwoDimensionalSet.HashSet(); incompatibleClusters = TwoDimensionalSet.HashSet(); acronymCache = TwoDimensionalMap.HashMap(); }
public virtual void MergeAcronymCache(CorefCluster to, CorefCluster from) { TwoDimensionalSet <int, int> replacements = TwoDimensionalSet.HashSet(); foreach (int first in acronymCache.FirstKeySet()) { foreach (int second in acronymCache.Get(first).Keys) { if (acronymCache.Get(first, second)) { int other = null; if (first == from.clusterID) { other = second; } else { if (second == from.clusterID) { other = first; } } if (other != null && other != to.clusterID) { int cid1 = System.Math.Min(other, to.clusterID); int cid2 = System.Math.Max(other, to.clusterID); replacements.Add(cid1, cid2); } } } } foreach (int first_1 in replacements.FirstKeySet()) { foreach (int second in replacements.SecondKeySet(first_1)) { acronymCache.Put(first_1, second, true); } } }
public virtual void SetRulesForTrainingSet(IList <Tree> sentences, IDictionary <Tree, byte[]> compressedTrees) { TwoDimensionalSet <string, string> binaryRules = TwoDimensionalSet.TreeSet(); ICollection <string> unaryRules = new HashSet <string>(); ICollection <string> words = new HashSet <string>(); foreach (Tree sentence in sentences) { SearchRulesForBatch(binaryRules, unaryRules, words, sentence); foreach (Tree hypothesis in CacheParseHypotheses.ConvertToTrees(compressedTrees[sentence])) { SearchRulesForBatch(binaryRules, unaryRules, words, hypothesis); } } foreach (Pair <string, string> binary in binaryRules) { AddRandomBinaryMatrix(binary.first, binary.second); } foreach (string unary in unaryRules) { AddRandomUnaryMatrix(unary); } FilterRulesForBatch(binaryRules, unaryRules, words); }
/// <summary>The traditional way of initializing an empty model suitable for training.</summary> public SentimentModel(RNNOptions op, IList <Tree> trainingTrees) { this.op = op; rand = new Random(op.randomSeed); if (op.randomWordVectors) { InitRandomWordVectors(trainingTrees); } else { ReadWordVectors(); } if (op.numHid > 0) { this.numHid = op.numHid; } else { int size = 0; foreach (SimpleMatrix vector in wordVectors.Values) { size = vector.GetNumElements(); break; } this.numHid = size; } TwoDimensionalSet <string, string> binaryProductions = TwoDimensionalSet.HashSet(); if (op.simplifiedModel) { binaryProductions.Add(string.Empty, string.Empty); } else { // TODO // figure out what binary productions we have in these trees // Note: the current sentiment training data does not actually // have any constituent labels throw new NotSupportedException("Not yet implemented"); } ICollection <string> unaryProductions = Generics.NewHashSet(); if (op.simplifiedModel) { unaryProductions.Add(string.Empty); } else { // TODO // figure out what unary productions we have in these trees (preterminals only, after the collapsing) throw new NotSupportedException("Not yet implemented"); } this.numClasses = op.numClasses; identity = SimpleMatrix.Identity(numHid); binaryTransform = TwoDimensionalMap.TreeMap(); binaryTensors = TwoDimensionalMap.TreeMap(); binaryClassification = TwoDimensionalMap.TreeMap(); // When making a flat model (no symantic untying) the // basicCategory function will return the same basic category for // all labels, so all entries will map to the same matrix foreach (Pair <string, string> binary in binaryProductions) { string left = BasicCategory(binary.first); string right = BasicCategory(binary.second); if (binaryTransform.Contains(left, right)) { continue; } binaryTransform.Put(left, right, RandomTransformMatrix()); if (op.useTensors) { binaryTensors.Put(left, right, RandomBinaryTensor()); } if (!op.combineClassification) { binaryClassification.Put(left, right, RandomClassificationMatrix()); } } numBinaryMatrices = binaryTransform.Size(); binaryTransformSize = numHid * (2 * numHid + 1); if (op.useTensors) { binaryTensorSize = numHid * numHid * numHid * 4; } else { binaryTensorSize = 0; } binaryClassificationSize = (op.combineClassification) ? 0 : numClasses * (numHid + 1); unaryClassification = Generics.NewTreeMap(); // When making a flat model (no symantic untying) the // basicCategory function will return the same basic category for // all labels, so all entries will map to the same matrix foreach (string unary in unaryProductions) { unary = BasicCategory(unary); if (unaryClassification.Contains(unary)) { continue; } unaryClassification[unary] = RandomClassificationMatrix(); } numUnaryMatrices = unaryClassification.Count; unaryClassificationSize = numClasses * (numHid + 1); }