private void SearchRulesForBatch(TwoDimensionalSet <string, string> binaryRules, ICollection <string> unaryRules, ICollection <string> words, Tree tree)
 {
     if (tree.IsLeaf())
     {
         return;
     }
     if (tree.IsPreTerminal())
     {
         words.Add(GetVocabWord(tree.Children()[0].Value()));
         return;
     }
     Tree[] children = tree.Children();
     if (children.Length == 1)
     {
         unaryRules.Add(BasicCategory(children[0].Value()));
         SearchRulesForBatch(binaryRules, unaryRules, words, children[0]);
     }
     else
     {
         if (children.Length == 2)
         {
             binaryRules.Add(BasicCategory(children[0].Value()), BasicCategory(children[1].Value()));
             SearchRulesForBatch(binaryRules, unaryRules, words, children[0]);
             SearchRulesForBatch(binaryRules, unaryRules, words, children[1]);
         }
         else
         {
             throw new AssertionError("Expected a binarized tree");
         }
     }
 }
        public virtual void FilterRulesForBatch(TwoDimensionalSet <string, string> binaryRules, ICollection <string> unaryRules, ICollection <string> words)
        {
            TwoDimensionalMap <string, string, SimpleMatrix> newBinaryTransforms = TwoDimensionalMap.TreeMap();
            TwoDimensionalMap <string, string, SimpleMatrix> newBinaryScores     = TwoDimensionalMap.TreeMap();

            foreach (Pair <string, string> binaryRule in binaryRules)
            {
                SimpleMatrix transform = binaryTransform.Get(binaryRule.First(), binaryRule.Second());
                if (transform != null)
                {
                    newBinaryTransforms.Put(binaryRule.First(), binaryRule.Second(), transform);
                }
                SimpleMatrix score = binaryScore.Get(binaryRule.First(), binaryRule.Second());
                if (score != null)
                {
                    newBinaryScores.Put(binaryRule.First(), binaryRule.Second(), score);
                }
                if ((transform == null && score != null) || (transform != null && score == null))
                {
                    throw new AssertionError();
                }
            }
            binaryTransform   = newBinaryTransforms;
            binaryScore       = newBinaryScores;
            numBinaryMatrices = binaryTransform.Size();
            IDictionary <string, SimpleMatrix> newUnaryTransforms = Generics.NewTreeMap();
            IDictionary <string, SimpleMatrix> newUnaryScores     = Generics.NewTreeMap();

            foreach (string unaryRule in unaryRules)
            {
                SimpleMatrix transform = unaryTransform[unaryRule];
                if (transform != null)
                {
                    newUnaryTransforms[unaryRule] = transform;
                }
                SimpleMatrix score = unaryScore[unaryRule];
                if (score != null)
                {
                    newUnaryScores[unaryRule] = score;
                }
                if ((transform == null && score != null) || (transform != null && score == null))
                {
                    throw new AssertionError();
                }
            }
            unaryTransform   = newUnaryTransforms;
            unaryScore       = newUnaryScores;
            numUnaryMatrices = unaryTransform.Count;
            IDictionary <string, SimpleMatrix> newWordVectors = Generics.NewTreeMap();

            foreach (string word in words)
            {
                SimpleMatrix wordVector = wordVectors[word];
                if (wordVector != null)
                {
                    newWordVectors[word] = wordVector;
                }
            }
            wordVectors = newWordVectors;
        }
        /// <summary>
        /// Filters the transform and score rules so that we only have the
        /// ones which appear in the trees given
        /// </summary>
        public virtual void FilterRulesForBatch(ICollection <Tree> trees)
        {
            TwoDimensionalSet <string, string> binaryRules = TwoDimensionalSet.TreeSet();
            ICollection <string> unaryRules = new HashSet <string>();
            ICollection <string> words      = new HashSet <string>();

            foreach (Tree tree in trees)
            {
                SearchRulesForBatch(binaryRules, unaryRules, words, tree);
            }
            FilterRulesForBatch(binaryRules, unaryRules, words);
        }
예제 #4
0
        public static TwoDimensionalSet <string, string> GetBinaryMatrixNames(IList <TwoDimensionalMap <string, string, SimpleMatrix> > maps)
        {
            TwoDimensionalSet <string, string> matrixNames = new TwoDimensionalSet <string, string>();

            foreach (TwoDimensionalMap <string, string, SimpleMatrix> map in maps)
            {
                foreach (TwoDimensionalMap.Entry <string, string, SimpleMatrix> entry in map)
                {
                    matrixNames.Add(entry.GetFirstKey(), entry.GetSecondKey());
                }
            }
            return(matrixNames);
        }
        public virtual void FilterRulesForBatch(IDictionary <Tree, byte[]> compressedTrees)
        {
            TwoDimensionalSet <string, string> binaryRules = TwoDimensionalSet.TreeSet();
            ICollection <string> unaryRules = new HashSet <string>();
            ICollection <string> words      = new HashSet <string>();

            foreach (KeyValuePair <Tree, byte[]> entry in compressedTrees)
            {
                SearchRulesForBatch(binaryRules, unaryRules, words, entry.Key);
                foreach (Tree hypothesis in CacheParseHypotheses.ConvertToTrees(entry.Value))
                {
                    SearchRulesForBatch(binaryRules, unaryRules, words, hypothesis);
                }
            }
            FilterRulesForBatch(binaryRules, unaryRules, words);
        }
예제 #6
0
 public Document()
 {
     // mentions may be removed from this due to post processing
     // all mentions (mentions will not be removed from this)
     positions            = Generics.NewHashMap();
     mentionheadPositions = Generics.NewHashMap();
     roleSet              = Generics.NewHashSet();
     corefClusters        = Generics.NewHashMap();
     goldCorefClusters    = null;
     allPredictedMentions = Generics.NewHashMap();
     allGoldMentions      = Generics.NewHashMap();
     speakers             = Generics.NewHashMap();
     speakerPairs         = Generics.NewHashSet();
     incompatibles        = TwoDimensionalSet.HashSet();
     incompatibleClusters = TwoDimensionalSet.HashSet();
     acronymCache         = TwoDimensionalMap.HashMap();
 }
예제 #7
0
        public virtual void MergeAcronymCache(CorefCluster to, CorefCluster from)
        {
            TwoDimensionalSet <int, int> replacements = TwoDimensionalSet.HashSet();

            foreach (int first in acronymCache.FirstKeySet())
            {
                foreach (int second in acronymCache.Get(first).Keys)
                {
                    if (acronymCache.Get(first, second))
                    {
                        int other = null;
                        if (first == from.clusterID)
                        {
                            other = second;
                        }
                        else
                        {
                            if (second == from.clusterID)
                            {
                                other = first;
                            }
                        }
                        if (other != null && other != to.clusterID)
                        {
                            int cid1 = System.Math.Min(other, to.clusterID);
                            int cid2 = System.Math.Max(other, to.clusterID);
                            replacements.Add(cid1, cid2);
                        }
                    }
                }
            }
            foreach (int first_1 in replacements.FirstKeySet())
            {
                foreach (int second in replacements.SecondKeySet(first_1))
                {
                    acronymCache.Put(first_1, second, true);
                }
            }
        }
        public virtual void SetRulesForTrainingSet(IList <Tree> sentences, IDictionary <Tree, byte[]> compressedTrees)
        {
            TwoDimensionalSet <string, string> binaryRules = TwoDimensionalSet.TreeSet();
            ICollection <string> unaryRules = new HashSet <string>();
            ICollection <string> words      = new HashSet <string>();

            foreach (Tree sentence in sentences)
            {
                SearchRulesForBatch(binaryRules, unaryRules, words, sentence);
                foreach (Tree hypothesis in CacheParseHypotheses.ConvertToTrees(compressedTrees[sentence]))
                {
                    SearchRulesForBatch(binaryRules, unaryRules, words, hypothesis);
                }
            }
            foreach (Pair <string, string> binary in binaryRules)
            {
                AddRandomBinaryMatrix(binary.first, binary.second);
            }
            foreach (string unary in unaryRules)
            {
                AddRandomUnaryMatrix(unary);
            }
            FilterRulesForBatch(binaryRules, unaryRules, words);
        }
        /// <summary>The traditional way of initializing an empty model suitable for training.</summary>
        public SentimentModel(RNNOptions op, IList <Tree> trainingTrees)
        {
            this.op = op;
            rand    = new Random(op.randomSeed);
            if (op.randomWordVectors)
            {
                InitRandomWordVectors(trainingTrees);
            }
            else
            {
                ReadWordVectors();
            }
            if (op.numHid > 0)
            {
                this.numHid = op.numHid;
            }
            else
            {
                int size = 0;
                foreach (SimpleMatrix vector in wordVectors.Values)
                {
                    size = vector.GetNumElements();
                    break;
                }
                this.numHid = size;
            }
            TwoDimensionalSet <string, string> binaryProductions = TwoDimensionalSet.HashSet();

            if (op.simplifiedModel)
            {
                binaryProductions.Add(string.Empty, string.Empty);
            }
            else
            {
                // TODO
                // figure out what binary productions we have in these trees
                // Note: the current sentiment training data does not actually
                // have any constituent labels
                throw new NotSupportedException("Not yet implemented");
            }
            ICollection <string> unaryProductions = Generics.NewHashSet();

            if (op.simplifiedModel)
            {
                unaryProductions.Add(string.Empty);
            }
            else
            {
                // TODO
                // figure out what unary productions we have in these trees (preterminals only, after the collapsing)
                throw new NotSupportedException("Not yet implemented");
            }
            this.numClasses      = op.numClasses;
            identity             = SimpleMatrix.Identity(numHid);
            binaryTransform      = TwoDimensionalMap.TreeMap();
            binaryTensors        = TwoDimensionalMap.TreeMap();
            binaryClassification = TwoDimensionalMap.TreeMap();
            // When making a flat model (no symantic untying) the
            // basicCategory function will return the same basic category for
            // all labels, so all entries will map to the same matrix
            foreach (Pair <string, string> binary in binaryProductions)
            {
                string left  = BasicCategory(binary.first);
                string right = BasicCategory(binary.second);
                if (binaryTransform.Contains(left, right))
                {
                    continue;
                }
                binaryTransform.Put(left, right, RandomTransformMatrix());
                if (op.useTensors)
                {
                    binaryTensors.Put(left, right, RandomBinaryTensor());
                }
                if (!op.combineClassification)
                {
                    binaryClassification.Put(left, right, RandomClassificationMatrix());
                }
            }
            numBinaryMatrices   = binaryTransform.Size();
            binaryTransformSize = numHid * (2 * numHid + 1);
            if (op.useTensors)
            {
                binaryTensorSize = numHid * numHid * numHid * 4;
            }
            else
            {
                binaryTensorSize = 0;
            }
            binaryClassificationSize = (op.combineClassification) ? 0 : numClasses * (numHid + 1);
            unaryClassification      = Generics.NewTreeMap();
            // When making a flat model (no symantic untying) the
            // basicCategory function will return the same basic category for
            // all labels, so all entries will map to the same matrix
            foreach (string unary in unaryProductions)
            {
                unary = BasicCategory(unary);
                if (unaryClassification.Contains(unary))
                {
                    continue;
                }
                unaryClassification[unary] = RandomClassificationMatrix();
            }
            numUnaryMatrices        = unaryClassification.Count;
            unaryClassificationSize = numClasses * (numHid + 1);
        }