Пример #1
0
 public DependencyParser(int order, string modelFileName, bool createForest)
 {
     DependencyPipe pipe = order == 2 ? new DependencyPipe2O(true) : new DependencyPipe(true);
     pipe.setLabel(createForest);
     m_pipe = pipe;
     m_params = new Parameters(pipe.DataAlphabet.Count);
     m_decoder = SecondOrder ? new DependencyDecoder2O(pipe) : new DependencyDecoder(pipe);
     LoadModel(modelFileName);
     pipe.CloseAlphabets();
 }
Пример #2
0
 public DependencyParser(DependencyPipe pipe)
 {
     m_pipe = pipe;
     m_params = new Parameters(pipe.DataAlphabet.Count);
     m_decoder = SecondOrder ? new DependencyDecoder2O(pipe) : new DependencyDecoder(pipe);
 }
Пример #3
0
        // TODO: sina: rename it to ReadFeatureVector
        public DependencyInstance GetFeatureVector(BinaryReader reader,
                                                   DependencyInstance inst,
                                                   FeatureVector[,,] fvs,
                                                   double[,,] probs,
                                                   FeatureVector[,,] fvsTrips,
                                                   double[,,] probsTrips,
                                                   FeatureVector[,,] fvsSibs,
                                                   double[,,] probsSibs,
                                                   FeatureVector[,,,] ntFvs,
                                                   double[,,,] ntProbs,
                                                   Parameters @params)
        {
            int length = inst.Length;

            // Get production crap.
            for (int w1 = 0; w1 < length; w1++)
            {
                for (int w2 = w1 + 1; w2 < length; w2++)
                {
                    for (int ph = 0; ph < 2; ph++)
                    {
                        var prodFV = new FeatureVector();

                        int indx = reader.ReadInt32();
                        while (indx != -2)
                        {
                            AddNewFeature(indx, 1.0, prodFV);
                            indx = reader.ReadInt32();
                        }

                        double prodProb = @params.GetScore(prodFV);
                        fvs[w1, w2, ph] = prodFV;
                        probs[w1, w2, ph] = prodProb;
                    }
                }
            }
            int last = reader.ReadInt32();
            if (last != -3)
            {
                Console.WriteLine("Error reading file.");
                throw new Exception("Bad File Format");
            }

            if (Labeled)
            {
                for (int w1 = 0; w1 < length; w1++)
                {
                    for (int t = 0; t < Types.Length; t++)
                    {
                        string type = Types[t];

                        for (int ph = 0; ph < 2; ph++)
                        {
                            for (int ch = 0; ch < 2; ch++)
                            {
                                var prodFV = new FeatureVector();

                                int indx = reader.ReadInt32();
                                while (indx != -2)
                                {
                                    AddNewFeature(indx, 1.0, prodFV);
                                    indx = reader.ReadInt32();
                                }

                                double ntProb = @params.GetScore(prodFV);
                                ntFvs[w1, t, ph, ch] = prodFV;
                                ntProbs[w1, t, ph, ch] = ntProb;
                            }
                        }
                    }
                }
                last = reader.ReadInt32();
                if (last != -3)
                {
                    Console.WriteLine("Error reading file.");
                    throw new Exception("Bad File Format");
                }
            }

            for (int w1 = 0; w1 < length; w1++)
            {
                for (int w2 = w1; w2 < length; w2++)
                {
                    for (int w3 = w2 + 1; w3 < length; w3++)
                    {
                        var prodFV = new FeatureVector();

                        int indx = reader.ReadInt32();
                        while (indx != -2)
                        {
                            AddNewFeature(indx, 1.0, prodFV);
                            indx = reader.ReadInt32();
                        }

                        double prodProb = @params.GetScore(prodFV);
                        fvsTrips[w1, w2, w3] = prodFV;
                        probsTrips[w1, w2, w3] = prodProb;
                    }
                }
                for (int w2 = w1; w2 >= 0; w2--)
                {
                    for (int w3 = w2 - 1; w3 >= 0; w3--)
                    {
                        var prodFV = new FeatureVector();

                        int indx = reader.ReadInt32();
                        while (indx != -2)
                        {
                            AddNewFeature(indx, 1.0, prodFV);

                            indx = reader.ReadInt32();
                        }

                        double prodProb = @params.GetScore(prodFV);
                        fvsTrips[w1, w2, w3] = prodFV;
                        probsTrips[w1, w2, w3] = prodProb;
                    }
                }
            }

            last = reader.ReadInt32();
            if (last != -3)
            {
                Console.WriteLine("Error reading file.");
                throw new Exception("Bad File Format");
            }

            for (int w1 = 0; w1 < length; w1++)
            {
                for (int w2 = 0; w2 < length; w2++)
                {
                    for (int wh = 0; wh < 2; wh++)
                    {
                        if (w1 != w2)
                        {
                            var prodFV = new FeatureVector();

                            int indx = reader.ReadInt32();
                            while (indx != -2)
                            {
                                AddNewFeature(indx, 1.0, prodFV);
                                indx = reader.ReadInt32();
                            }

                            double prodProb = @params.GetScore(prodFV);
                            fvsSibs[w1, w2, wh] = prodFV;
                            probsSibs[w1, w2, wh] = prodProb;
                        }
                    }
                }
            }

            last = reader.ReadInt32();
            if (last != -3)
            {
                Console.WriteLine("Error reading file.");
                throw new Exception("Bad File Format");
            }

            var nfv = new FeatureVector();
            int next = reader.ReadInt32();
            while (next != -4)
            {
                AddNewFeature(next, 1.0, nfv);
                next = reader.ReadInt32();
            }

            string[] toks = null;
            string[] pos = null;
            string[] labs = null;
            string actParseTree = null;
            try
            {
                int len = reader.ReadInt32(); //Added by MSR
                toks = new string[len];
                for (int i = 0; i < len; i++)
                {
                    toks[i] = reader.ReadString();
                }
                //next = reader.ReadInt32();
                len = reader.ReadInt32(); //Added by MSR
                pos = new string[len];
                for (int i = 0; i < len; i++)
                {
                    pos[i] = reader.ReadString();
                }
                next = reader.ReadInt32();
                len = reader.ReadInt32(); //Added by MSR

                labs = new string[len];
                for (int i = 0; i < len; i++)
                {
                    labs[i] = reader.ReadString();
                }
                next = reader.ReadInt32();
                actParseTree = reader.ReadString();
                next = reader.ReadInt32();
            }
            catch (Exception e)
            {
                // TODO: sina: A library MUST NOT call Environment.Exit in any form
                // throw exception instead.
                Console.WriteLine("Error reading file.");
                throw new Exception("Bad File Format");
            }

            if (next != -1)
            {
                // TODO: sina: A library MUST NOT call Environment.Exit in any form
                // throw exception instead.
                Console.WriteLine("Error reading file.");
                throw new Exception("Bad File Format");
            }

            var pti = new DependencyInstance(toks, pos, labs, nfv);
            pti.ActParseTree = actParseTree;
            return pti;
        }
Пример #4
0
        public void GetFeatureVector(DependencyInstance inst,
                                     FeatureVector[,,] fvs,
                                     double[,,] probs,
                                     FeatureVector[,,] fvsTrips,
                                     double[,,] probsTrips,
                                     FeatureVector[,,] fvsSibs,
                                     double[,,] probsSibs,
                                     FeatureVector[,,,] ntFvs,
                                     double[,,,] ntProbs, Parameters @params)
        {
            string[] toks = inst.Sentence;
            string[] pos = inst.POS;
            string[] labs = inst.Labs;

            var posA = new string[pos.Length];
            for (int i = 0; i < pos.Length; i++)
            {
                posA[i] = pos[i].SubstringWithIndex(0, 1);
            }

            // Get production crap.
            for (int w1 = 0; w1 < toks.Length; w1++)
            {
                for (int w2 = w1 + 1; w2 < toks.Length; w2++)
                {
                    for (int ph = 0; ph < 2; ph++)
                    {
                        bool attR = ph == 0 ? true : false;

                        int childInt = attR ? w2 : w1;
                        int parInt = attR ? w1 : w2;

                        FeatureVector prodFV = CreateFeatureVector(toks, pos, posA, w1, w2, attR,
                                                                   new FeatureVector());

                        double prodProb = @params.GetScore(prodFV);
                        fvs[w1, w2, ph] = prodFV;
                        probs[w1, w2, ph] = prodProb;
                    }
                }
            }

            if (Labeled)
            {
                for (int w1 = 0; w1 < toks.Length; w1++)
                {
                    for (int t = 0; t < Types.Length; t++)
                    {
                        string type = Types[t];

                        for (int ph = 0; ph < 2; ph++)
                        {
                            bool attR = ph == 0 ? true : false;

                            for (int ch = 0; ch < 2; ch++)
                            {
                                bool child = ch == 0 ? true : false;

                                FeatureVector prodFV = CreateFeatureVector(toks, pos, posA, w1,
                                                                           type, attR, child,
                                                                           new FeatureVector());

                                double ntProb = @params.GetScore(prodFV);
                                ntFvs[w1, t, ph, ch] = prodFV;
                                ntProbs[w1, t, ph, ch] = ntProb;
                            }
                        }
                    }
                }
            }

            for (int w1 = 0; w1 < toks.Length; w1++)
            {
                for (int w2 = w1; w2 < toks.Length; w2++)
                {
                    for (int w3 = w2 + 1; w3 < toks.Length; w3++)
                    {
                        FeatureVector prodFV = CreateFeatureVector(toks, pos, posA, w1, w2, w3,
                                                                   new FeatureVector());
                        double prodProb = @params.GetScore(prodFV);
                        fvsTrips[w1, w2, w3] = prodFV;
                        probsTrips[w1, w2, w3] = prodProb;
                    }
                }
                for (int w2 = w1; w2 >= 0; w2--)
                {
                    for (int w3 = w2 - 1; w3 >= 0; w3--)
                    {
                        FeatureVector prodFV = CreateFeatureVector(toks, pos, posA, w1, w2, w3,
                                                                   new FeatureVector());
                        double prodProb = @params.GetScore(prodFV);
                        fvsTrips[w1, w2, w3] = prodFV;
                        probsTrips[w1, w2, w3] = prodProb;
                    }
                }
            }

            for (int w1 = 0; w1 < toks.Length; w1++)
            {
                for (int w2 = 0; w2 < toks.Length; w2++)
                {
                    for (int wh = 0; wh < 2; wh++)
                    {
                        if (w1 != w2)
                        {
                            FeatureVector prodFV = CreateFeatureVectorSib(toks, pos, w1, w2, wh == 0,
                                                                          new FeatureVector());
                            double prodProb = @params.GetScore(prodFV);
                            fvsSibs[w1, w2, wh] = prodFV;
                            probsSibs[w1, w2, wh] = prodProb;
                        }
                    }
                }
            }
        }
Пример #5
0
        public DependencyInstance ReadFeatureVector(BinaryReader reader,
                                                   DependencyInstance inst,
                                                   FeatureVector[,,] fvs,
                                                   double[,,] probs,
                                                   FeatureVector[,,,] ntFvs,
                                                   double[,,,] ntProbs,
                                                   Parameters parameters)
        {
            int length = inst.Length;

            // Get production crap.
            for (int w1 = 0; w1 < length; w1++)
            {
                for (int w2 = w1 + 1; w2 < length; w2++)
                {
                    for (int ph = 0; ph < 2; ph++)
                    {
                        var prodFV = new FeatureVector();

                        int indx = reader.ReadInt32();
                        while (indx != -2)
                        {
                            AddNewFeature(indx, 1.0, prodFV);
                            indx = reader.ReadInt32();
                        }

                        double prodProb = parameters.GetScore(prodFV);
                        fvs[w1, w2, ph] = prodFV;
                        probs[w1, w2, ph] = prodProb;
                    }
                }
            }
            int last = reader.ReadInt32();
            if (last != -3)
            {
                Console.WriteLine("Error reading file.");

                throw new Exception("Bad File Format");
            }

            if (Labeled)
            {
                for (int w1 = 0; w1 < length; w1++)
                {
                    for (int t = 0; t < Types.Length; t++)
                    {
                        string type = Types[t];

                        for (int ph = 0; ph < 2; ph++)
                        {
                            for (int ch = 0; ch < 2; ch++)
                            {
                                var prodFV = new FeatureVector();

                                int indx = reader.ReadInt32();
                                while (indx != -2)
                                {
                                    AddNewFeature(indx, 1.0, prodFV);
                                    indx = reader.ReadInt32();
                                }

                                double ntProb = parameters.GetScore(prodFV);
                                ntFvs[w1, t, ph, ch] = prodFV;
                                ntProbs[w1, t, ph, ch] = ntProb;
                            }
                        }
                    }
                }
                last = reader.ReadInt32();
                if (last != -3)
                {
                    Console.WriteLine("Error reading file.");
                    throw new Exception("Bad File Format");
                }
            }

            var nfv = new FeatureVector();
            int next = reader.ReadInt32();
            while (next != -4)
            {
                AddNewFeature(next, 1.0, nfv);
                next = reader.ReadInt32();
            }

            string[] toks = null;
            string[] pos = null;
            string[] labs = null;
            string actParseTree = null;
            try
            {
                int len = reader.ReadInt32();
                toks = new string[len];
                for (int i = 0; i < len; i++)
                {
                    toks[i] = reader.ReadString();
                }
                next = reader.ReadInt32();
                len = reader.ReadInt32();
                pos = new string[len];
                for (int i = 0; i < len; i++)
                {
                    pos[i] = reader.ReadString();
                }
                next = reader.ReadInt32();
                len = reader.ReadInt32();
                labs = new string[len];
                for (int i = 0; i < len; i++)
                {
                    labs[i] = reader.ReadString();
                }
                next = reader.ReadInt32();
                actParseTree = reader.ReadString();
                next = reader.ReadInt32();
            }
            catch (Exception e)
            {
                Console.WriteLine("Error reading file.");
                throw new Exception("Bad File Format");
            }

            if (next != -1)
            {
                Console.WriteLine("Error reading file.");
                throw new Exception("Bad File Format");
            }

            var pti = new DependencyInstance(toks, pos, labs, nfv);
            pti.ActParseTree = actParseTree;
            return pti;
        }