Ejemplo n.º 1
0
        public double NumErrorsDepNoPunc(DependencyInstance inst, string pred, string act)
        {
            string[] actSpans = act.Split(' ');
            string[] predSpans = pred.Split(' ');

            string[] pos = inst.POS;

            int correct = 0;
            int numPunc = 0;

            for (int i = 0; i < predSpans.Length; i++)
            {
                string p = predSpans[i].Split(':')[0];
                string a = actSpans[i].Split(':')[0];
                if (pos[i + 1].Matches(@"[,:\.'`]+"))
                {
                    numPunc++;
                    continue;
                }
                if (p.Equals(a))
                {
                    correct++;
                }
            }

            return ((double) actSpans.Length - numPunc - correct);
        }
Ejemplo n.º 2
0
 public KBestParseForest(int start, int end, DependencyInstance inst, int K)
 {
     this.K = K;
     Chart = new ParseForestItem[end + 1,end + 1,2,2,K];
     m_start = start;
     m_end = end;
     m_sent = inst.Sentence;
     m_pos = inst.POS;
 }
Ejemplo n.º 3
0
        public double NumErrorsDep(DependencyInstance inst, string pred, string act)
        {
            string[] actSpans = act.Split(' ');
            string[] predSpans = pred.Split(' ');

            int correct = 0;

            for (int i = 0; i < predSpans.Length; i++)
            {
                string p = predSpans[i].Split(':')[0];
                string a = actSpans[i].Split(':')[0];
                if (p.Equals(a))
                {
                    correct++;
                }
            }

            return ((double) actSpans.Length - correct);
        }
        // same as decode, except return K best
        public object[,] DecodeNonProjective(DependencyInstance inst,
                                             FeatureVector[,,] fvs,
                                             double[,,] probs,
                                             FeatureVector[,,] fvsTrips,
                                             double[,,] probsTrips,
                                             FeatureVector[,,] fvsSibs,
                                             double[,,] probsSibs,
                                             FeatureVector[,,,] ntFvs,
                                             double[,,,] ntProbs, int K)
        {
            string[] toks = inst.Sentence;
            string[] pos = inst.POS;

            object[,] orig = DecodeProjective(inst, fvs, probs, fvsTrips, probsTrips, fvsSibs, probsSibs, ntFvs,
                                              ntProbs, 1);
            string[] o = ((string) orig[0, 1]).Split(' ');
            var par = new int[o.Length + 1];
            var labs = new int[o.Length + 1];
            labs[0] = 0;
            par[0] = -1;
            for (int i = 1; i < par.Length; i++)
            {
                par[i] = int.Parse(o[i - 1].Split("\\|".ToCharArray())[0]);
                labs[i] = m_pipe.Labeled ? int.Parse(o[i - 1].Split(':')[1]) : 0;
            }

            Rearrange(probs, probsTrips, probsSibs, ntProbs, par, labs);

            string pars = "";
            for (int i = 1; i < par.Length; i++)
                pars += par[i] + "|" + i + ":" + labs[i] + " ";

            orig[0, 0] = ((DependencyPipe2O) m_pipe).CreateFeatureVector(toks, pos, labs, par);
            orig[0, 1] = pars.Trim();

            return orig;
        }
Ejemplo n.º 5
0
 public double NumErrors(DependencyInstance inst, string pred, string act)
 {
     if (LossType==LossTypes.NoPunc)
         return NumErrorsDepNoPunc(inst, pred, act) + NumErrorsLabelNoPunc(inst, pred, act);
     return NumErrorsDep(inst, pred, act) + NumErrorsLabel(inst, pred, act);
 }
Ejemplo n.º 6
0
        public void UpdateParamsMIRA(DependencyInstance inst, object[,] d, double upd)
        {
            string actParseTree = inst.ActParseTree;
            FeatureVector actFV = inst.Fv;

            int K = 0;
            for (int i = 0; i < d.GetLength(0) && d[i, 0] != null; i++)
            {
                K = i + 1;
            }

            var b = new double[K];
            var lamDist = new double[K];
            var dist = new FeatureVector[K];

            for (int k = 0; k < K; k++)
            {
                lamDist[k] = GetScore(actFV)
                              - GetScore((FeatureVector) d[k, 0]);
                b[k] = NumErrors(inst, (string) d[k, 1], actParseTree);
                b[k] -= lamDist[k];
                dist[k] = FeatureVector.GetDistVector(actFV, (FeatureVector) d[k, 0]);
            }

            double[] alpha = hildreth(dist, b);

            FeatureVector fv = null;
            int res = 0;
            for (int k = 0; k < K; k++)
            {
                fv = dist[k];
                foreach (Feature feature in fv.FVector)
                {
                    if (feature.Index < 0)
                        continue;
                    parameters[feature.Index] += alpha[k]*feature.Value;
                    Total[feature.Index] += upd*alpha[k]*feature.Value;
                }
            }
        }
Ejemplo n.º 7
0
        private void TrainingIter(DependencyInstance[] il, string trainfile, string train_forest, int iter)
        {
            int numUpd = 0;
            var in_ = new BinaryReader(new FileStream(train_forest, FileMode.Open));
            bool evaluateI = true;

            for (int i = 0; i < il.Length; i++)
            {
                if ((i + 1)%100 == 0)
                    Console.WriteLine("  " + (i + 1) + " instances");

                DependencyInstance inst = il[i];

                int length = inst.Length;

                // Get production crap.
                var fvs = new FeatureVector[length,length,2];
                var probs = new double[length,length,2];
                var ntFvs = new FeatureVector[length,m_pipe.Types.Length,2,2];
                var ntProbs = new double[length,m_pipe.Types.Length,2,2];
                var fvsTrips = new FeatureVector[length,length,length];
                var probsTrips = new double[length,length,length];
                var fvsSibs = new FeatureVector[length,length,2];
                var probsSibs = new double[length,length,2];

                if (SecondOrder)
                    inst = ((DependencyPipe2O) m_pipe).GetFeatureVector(in_, inst, fvs, probs,
                                                                      fvsTrips, probsTrips,
                                                                      fvsSibs, probsSibs,
                                                                      ntFvs, ntProbs, m_params);
                else
                    inst = m_pipe.ReadFeatureVector(in_, inst, fvs, probs, ntFvs, ntProbs, m_params);

                var upd = (double) (NumIters*il.Length - (il.Length*(iter - 1) + (i + 1)) + 1);
                int K = TrainK;
                object[,] d = null;
                if (DecodeType==ProjectiveTypes.Projective)
                {
                    if (SecondOrder)
                        d = ((DependencyDecoder2O) m_decoder).DecodeProjective(inst, fvs, probs,
                                                                             fvsTrips, probsTrips,
                                                                             fvsSibs, probsSibs,
                                                                             ntFvs, ntProbs, K);
                    else
                        d = m_decoder.DecodeProjective(inst, fvs, probs, ntFvs, ntProbs, K);
                }
                if (DecodeType==ProjectiveTypes.NonProjective)
                {
                    if (SecondOrder)
                        d = ((DependencyDecoder2O) m_decoder).DecodeNonProjective(inst, fvs, probs,
                                                                                fvsTrips, probsTrips,
                                                                                fvsSibs, probsSibs,
                                                                                ntFvs, ntProbs, K);
                    else
                        d = m_decoder.decodeNonProjective(inst, fvs, probs, ntFvs, ntProbs, K);
                }
                m_params.UpdateParamsMIRA(inst, d, upd);
            }
            Console.WriteLine("");

            Console.WriteLine("  " + il.Length + " instances");

            in_.Close();
        }
Ejemplo n.º 8
0
        public void Train(DependencyInstance[] il, string trainfile, string trainForest)
        {
            Console.WriteLine("About to Train");
            Console.WriteLine("Num Feats: " + m_pipe.DataAlphabet.Count);

            int i = 0;
            for (i = 0; i < NumIters; i++)
            {
                Console.WriteLine("========================");
                Console.WriteLine("Iteration: " + i);
                Console.WriteLine("========================");
                Console.Write("Processed: ");

                long start = DateTime.Now.Ticks*10000;

                TrainingIter(il, trainfile, trainForest, i + 1);

                long end = DateTime.Now.Ticks*10000;
                Console.WriteLine("Training iter took: " + (end - start));
            }

            m_params.AverageParams(i*il.Length);
        }
Ejemplo n.º 9
0
        public override void WritePossibleFeatures(DependencyInstance inst, BinaryWriter out_)
        {
            string[] toks = inst.Sentence;
            string[] pos = inst.POS;
            string[] labs = inst.Labs;

            var posA = new string[pos.Length];
            for (int i = 0; i < pos.Length; i++)
            {
                posA[i] = pos[i].SubstringWithIndex(0, 1);
            }

            try
            {
                for (int w1 = 0; w1 < toks.Length; w1++)
                {
                    for (int w2 = w1 + 1; w2 < toks.Length; w2++)
                    {
                        for (int ph = 0; ph < 2; ph++)
                        {
                            bool attR = ph == 0 ? true : false;

                            FeatureVector prodFV = CreateFeatureVector(toks, pos, posA, w1, w2, attR,
                                                                       new FeatureVector());
                            foreach (Feature feature in prodFV.FVector)
                            {
                                if (feature.Index >= 0)
                                    out_.Write(feature.Index);
                            }
                            out_.Write(-2);
                        }
                    }
                }

                out_.Write(-3);

                if (Labeled)
                {
                    for (int w1 = 0; w1 < toks.Length; w1++)
                    {
                        for (int t = 0; t < Types.Length; t++)
                        {
                            string type = Types[t];

                            for (int ph = 0; ph < 2; ph++)
                            {
                                bool attR = ph == 0 ? true : false;

                                for (int ch = 0; ch < 2; ch++)
                                {
                                    bool child = ch == 0 ? true : false;

                                    FeatureVector prodFV = CreateFeatureVector(toks, pos, posA, w1,
                                                                               type,
                                                                               attR, child,
                                                                               new FeatureVector());
                                    foreach (Feature feature in prodFV.FVector)
                                    {
                                        if (feature.Index >= 0)
                                            out_.Write(feature.Index);
                                    }

                                    out_.Write(-2);
                                }
                            }
                        }
                    }

                    out_.Write(-3);
                }

                for (int w1 = 0; w1 < toks.Length; w1++)
                {
                    for (int w2 = w1; w2 < toks.Length; w2++)
                    {
                        for (int w3 = w2 + 1; w3 < toks.Length; w3++)
                        {
                            FeatureVector prodFV = CreateFeatureVector(toks, pos, posA, w1, w2, w3,
                                                                       new FeatureVector());
                            foreach (Feature feature in prodFV.FVector)
                            {
                                if (feature.Index >= 0)
                                    out_.Write(feature.Index);
                            }

                            out_.Write(-2);
                        }
                    }
                    for (int w2 = w1; w2 >= 0; w2--)
                    {
                        for (int w3 = w2 - 1; w3 >= 0; w3--)
                        {
                            FeatureVector prodFV = CreateFeatureVector(toks, pos, posA, w1, w2, w3,
                                                                       new FeatureVector());
                            foreach (Feature feature in prodFV.FVector)
                            {
                                if (feature.Index >= 0)
                                    out_.Write(feature.Index);
                            }
                            out_.Write(-2);
                        }
                    }
                }

                out_.Write(-3);

                for (int w1 = 0; w1 < toks.Length; w1++)
                {
                    for (int w2 = 0; w2 < toks.Length; w2++)
                    {
                        for (int wh = 0; wh < 2; wh++)
                        {
                            if (w1 != w2)
                            {
                                FeatureVector prodFV = CreateFeatureVectorSib(toks, pos, w1, w2, wh == 0,
                                                                              new FeatureVector());
                                foreach (Feature feature in prodFV.FVector)
                                {
                                    if (feature.Index >= 0)
                                        out_.Write(feature.Index);
                                }
                                out_.Write(-2);
                            }
                        }
                    }
                }

                out_.Write(-3);

                foreach (Feature feature in inst.Fv.FVector)
                {
                    out_.Write(feature.Index);
                }

                out_.Write(-4);
                out_.Write(inst.Sentence.Length);
                foreach (string s in inst.Sentence)
                {
                    out_.Write(s);
                }
                out_.Write(inst.POS.Length);
                foreach (string s in inst.POS)
                {
                    out_.Write(s);
                }
                out_.Write(-6);
                out_.Write(inst.Labs.Length);
                foreach (string s in inst.Labs)
                {
                    out_.Write(s);
                }
                out_.Write(-7);
                out_.Write(inst.ActParseTree);

                out_.Write(-1);
            }
            catch (IOException)
            {
            }
        }
Ejemplo n.º 10
0
        public void GetFeatureVector(DependencyInstance inst,
                                     FeatureVector[,,] fvs,
                                     double[,,] probs,
                                     FeatureVector[,,] fvsTrips,
                                     double[,,] probsTrips,
                                     FeatureVector[,,] fvsSibs,
                                     double[,,] probsSibs,
                                     FeatureVector[,,,] ntFvs,
                                     double[,,,] ntProbs, Parameters @params)
        {
            string[] toks = inst.Sentence;
            string[] pos = inst.POS;
            string[] labs = inst.Labs;

            var posA = new string[pos.Length];
            for (int i = 0; i < pos.Length; i++)
            {
                posA[i] = pos[i].SubstringWithIndex(0, 1);
            }

            // Get production crap.
            for (int w1 = 0; w1 < toks.Length; w1++)
            {
                for (int w2 = w1 + 1; w2 < toks.Length; w2++)
                {
                    for (int ph = 0; ph < 2; ph++)
                    {
                        bool attR = ph == 0 ? true : false;

                        int childInt = attR ? w2 : w1;
                        int parInt = attR ? w1 : w2;

                        FeatureVector prodFV = CreateFeatureVector(toks, pos, posA, w1, w2, attR,
                                                                   new FeatureVector());

                        double prodProb = @params.GetScore(prodFV);
                        fvs[w1, w2, ph] = prodFV;
                        probs[w1, w2, ph] = prodProb;
                    }
                }
            }

            if (Labeled)
            {
                for (int w1 = 0; w1 < toks.Length; w1++)
                {
                    for (int t = 0; t < Types.Length; t++)
                    {
                        string type = Types[t];

                        for (int ph = 0; ph < 2; ph++)
                        {
                            bool attR = ph == 0 ? true : false;

                            for (int ch = 0; ch < 2; ch++)
                            {
                                bool child = ch == 0 ? true : false;

                                FeatureVector prodFV = CreateFeatureVector(toks, pos, posA, w1,
                                                                           type, attR, child,
                                                                           new FeatureVector());

                                double ntProb = @params.GetScore(prodFV);
                                ntFvs[w1, t, ph, ch] = prodFV;
                                ntProbs[w1, t, ph, ch] = ntProb;
                            }
                        }
                    }
                }
            }

            for (int w1 = 0; w1 < toks.Length; w1++)
            {
                for (int w2 = w1; w2 < toks.Length; w2++)
                {
                    for (int w3 = w2 + 1; w3 < toks.Length; w3++)
                    {
                        FeatureVector prodFV = CreateFeatureVector(toks, pos, posA, w1, w2, w3,
                                                                   new FeatureVector());
                        double prodProb = @params.GetScore(prodFV);
                        fvsTrips[w1, w2, w3] = prodFV;
                        probsTrips[w1, w2, w3] = prodProb;
                    }
                }
                for (int w2 = w1; w2 >= 0; w2--)
                {
                    for (int w3 = w2 - 1; w3 >= 0; w3--)
                    {
                        FeatureVector prodFV = CreateFeatureVector(toks, pos, posA, w1, w2, w3,
                                                                   new FeatureVector());
                        double prodProb = @params.GetScore(prodFV);
                        fvsTrips[w1, w2, w3] = prodFV;
                        probsTrips[w1, w2, w3] = prodProb;
                    }
                }
            }

            for (int w1 = 0; w1 < toks.Length; w1++)
            {
                for (int w2 = 0; w2 < toks.Length; w2++)
                {
                    for (int wh = 0; wh < 2; wh++)
                    {
                        if (w1 != w2)
                        {
                            FeatureVector prodFV = CreateFeatureVectorSib(toks, pos, w1, w2, wh == 0,
                                                                          new FeatureVector());
                            double prodProb = @params.GetScore(prodFV);
                            fvsSibs[w1, w2, wh] = prodFV;
                            probsSibs[w1, w2, wh] = prodProb;
                        }
                    }
                }
            }
        }
Ejemplo n.º 11
0
        public DependencyInstance CreateInstance(ref string[] toks,ref string[] pos,out string []labs, out int[] deps)
        {
            ReadLines(ref toks,ref pos, out labs, out deps);

            FeatureVector fv = CreateFeatureVector(toks, pos, labs, deps);

            var pti = new DependencyInstance(toks, pos, labs, fv);

            string spans = "";
            for (int i = 1; i < deps.Length; i++)
            {
                spans += deps[i]+"|" + i + ":"+TypeAlphabet.LookupIndex(labs[i])+" ";
            }
            pti.ActParseTree = spans.Trim();

            return pti;
        }
Ejemplo n.º 12
0
        public object[,] decodeNonProjective(DependencyInstance inst,
                                             FeatureVector[,,] fvs,
                                             double[,,] probs,
                                             FeatureVector[,,,] nt_fvs,
                                             double[,,,] nt_probs, int K)
        {
            string[] pos = inst.POS;

            int numWords = inst.Sentence.Length;
            var oldI = new int[numWords,numWords];
            var oldO = new int[numWords,numWords];
            var scoreMatrix = new double[numWords,numWords];
            var orig_scoreMatrix = new double[numWords,numWords];
            var curr_nodes = new bool[numWords];
            var reps = new Dictionary<int, int>[numWords];

            int[,] static_types = null;
            if (m_pipe.Labeled)
            {
                static_types = GetTypes(nt_probs, pos.Length);
            }

            for (int i = 0; i < numWords; i++)
            {
                curr_nodes[i] = true;
                reps[i] = new Dictionary<int, int>();
                reps[i].Add(i, 0);
                for (int j = 0; j < numWords; j++)
                {
                    // score of edge (i,j) i --> j
                    scoreMatrix[i, j] = probs[i < j ? i : j, i < j ? j : i, i < j ? 0 : 1]
                                        + (m_pipe.Labeled
                                               ? nt_probs[i, static_types[i, j], i < j ? 0 : 1, 1]
                                                 + nt_probs[j, static_types[i, j], i < j ? 0 : 1, 0]
                                               : 0.0);
                    orig_scoreMatrix[i, j] = probs[i < j ? i : j, i < j ? j : i, i < j ? 0 : 1]
                                             + (m_pipe.Labeled
                                                    ? nt_probs[i, static_types[i, j], i < j ? 0 : 1, 1]
                                                      + nt_probs[j, static_types[i, j], i < j ? 0 : 1, 0]
                                                    : 0.0);
                    oldI[i, j] = i;
                    oldO[i, j] = j;

                    if (i == j || j == 0)
                        continue; // no self loops of i --> 0
                }
            }

            Dictionary<int, int> final_edges = chuLiuEdmonds(scoreMatrix, curr_nodes, oldI, oldO, false,
                                                             new Dictionary<int, int>(), reps);
            var par = new int[numWords];
            int[] ns = final_edges.Keys.ToArray();
            for (int i = 0; i < ns.Length; i++)
            {
                int ch = ns[i];
                int pr = final_edges[ns[i]];
                par[ch] = pr;
            }

            int[] n_par = getKChanges(par, orig_scoreMatrix, Math.Min(K, par.Length));
            int new_k = 1;
            for (int i = 0; i < n_par.Length; i++)
                if (n_par[i] > -1) new_k++;

            // Create Feature Vectors;
            var fin_par = new int[new_k,numWords];
            int fin_parFirstLen = new_k;
            int fin_par_secondLen = numWords;
            var fin_fv = new FeatureVector[new_k,numWords];
            int len = fin_par.GetLength(1);
            for (int i = 0; i < len; i++)
            {
                fin_par[0, i] = par[i];
            }
            int c = 1;
            for (int i = 0; i < n_par.Length; i++)
            {
                if (n_par[i] > -1)
                {
                    var t_par = new int[par.Length];
                    for (int j = 0; j < t_par.Length; j++)
                        t_par[j] = par[j];
                    t_par[i] = n_par[i];
                    len = t_par.Length;
                    for (int ct = 0; ct < len; ct++)
                    {
                        fin_par[c, ct] = t_par[ct];
                    }
                    c++;
                }
            }
            for (int k = 0; k < fin_parFirstLen; k++)
            {
                for (int i = 0; i < fin_par_secondLen; i++)
                {
                    int ch = i;
                    int pr = fin_par[k, i];
                    if (pr != -1)
                    {
                        fin_fv[k, ch] = fvs[ch < pr ? ch : pr, ch < pr ? pr : ch, ch < pr ? 1 : 0];
                        if (m_pipe.Labeled)
                        {
                            fin_fv[k, ch] = FeatureVector.Cat(fin_fv[k, ch],
                                      nt_fvs[ch, static_types[pr, ch], ch < pr ? 1 : 0, 0]);
                            fin_fv[k, ch] = FeatureVector.Cat(fin_fv[k, ch],
                                      nt_fvs[pr, static_types[pr, ch], ch < pr ? 1 : 0, 1]);
                        }
                    }
                    else
                    {
                        fin_fv[k, ch] = new FeatureVector();
                    }
                }
            }

            var fin = new FeatureVector[new_k];
            var result = new string[new_k];
            for (int k = 0; k < fin.Length; k++)
            {
                fin[k] = new FeatureVector();
                for (int i = 1; i < fin_fv.GetLength(k); i++) //doubt of Index
                    fin[k] = FeatureVector.Cat(fin_fv[k, i], fin[k]);
                result[k] = "";
                for (int i = 1; i < par.Length; i++)
                    result[k] += fin_par[k, i] + "|" + i + (m_pipe.Labeled ? ":" + static_types[fin_par[k, i], i] : ":0") + " ";
            }

            // create d.
            var d = new object[new_k,2];

            for (int k = 0; k < new_k; k++)
            {
                d[k, 0] = fin[k];
                d[k, 1] = result[k].Trim();
            }

            return d;
        }
        // same as decode, except return K best
        public object[,] DecodeProjective(DependencyInstance inst,
                                          FeatureVector[,,] fvs,
                                          double[,,] probs,
                                          FeatureVector[,,] fvsTrips,
                                          double[,,] probsTrips,
                                          FeatureVector[,,] fvsSibs,
                                          double[,,] probsSibs,
                                          FeatureVector[,,,] ntFvs,
                                          double[,,,] ntProbs, int K)
        {
            string[] toks = inst.Sentence;
            string[] pos = inst.POS;

            int[,] staticTypes = null;
            if (m_pipe.Labeled)
            {
                staticTypes = GetTypes(ntProbs, toks.Length);
            }

            var pf = new KBestParseForest2O(0, toks.Length - 1, inst, K);

            for (int s = 0; s < toks.Length; s++)
            {
                pf.Add(s, -1, 0, 0.0, new FeatureVector());
                pf.Add(s, -1, 1, 0.0, new FeatureVector());
            }

            for (int j = 1; j < toks.Length; j++)
            {
                for (int s = 0; s < toks.Length && s + j < toks.Length; s++)
                {
                    int t = s + j;

                    FeatureVector prodFvSt = fvs[s, t, 0];
                    FeatureVector prodFvTs = fvs[s, t, 1];
                    double prodProbSt = probs[s, t, 0];
                    double prodProbTs = probs[s, t, 1];

                    int type1 = m_pipe.Labeled ? staticTypes[s, t] : 0;
                    int type2 = m_pipe.Labeled ? staticTypes[t, s] : 0;

                    FeatureVector ntFvS01 = ntFvs[s, type1, 0, 1];
                    FeatureVector ntFvS10 = ntFvs[s, type2, 1, 0];
                    FeatureVector ntFvT00 = ntFvs[t, type1, 0, 0];
                    FeatureVector ntFvT11 = ntFvs[t, type2, 1, 1];
                    double ntProbS01 = ntProbs[s, type1, 0, 1];
                    double ntProbS10 = ntProbs[s, type2, 1, 0];
                    double ntProbT00 = ntProbs[t, type1, 0, 0];
                    double ntProbT11 = ntProbs[t, type2, 1, 1];
                    double prodProb = 0.0;

                    if (true)
                    {
                        // case when R == S
                        ParseForestItem[] b1 = pf.GetItems(s, s, 0, 0);
                        ParseForestItem[] c1 = pf.GetItems(s + 1, t, 1, 0);
                        if (!(b1 == null || c1 == null))
                        {
                            FeatureVector prodFvSst = pf.Cat(fvsTrips[s, s, t], fvsSibs[s, t, 0]);
                            double prodProbSst = probsTrips[s, s, t] + probsSibs[s, t, 0];

                            int[,] pairs = pf.GetKBestPairs(b1, c1);

                            for (int k = 0; k < K; k++)
                            {
                                if (pairs[k, 0] == -1 || pairs[k, 1] == -1)
                                    break;

                                int comp1 = pairs[k, 0];
                                int comp2 = pairs[k, 1];

                                double bc = b1[comp1].Prob + c1[comp2].Prob;

                                // create sibling pair
                                // create parent pair: S->T and S->(start,T)
                                bc += prodProbSt + prodProbSst;

                                FeatureVector fvFin = pf.Cat(prodFvSt, prodFvSst);
                                if (m_pipe.Labeled)
                                {
                                    bc += ntProbS01 + ntProbT00;
                                    fvFin = FeatureVector.Cat(ntFvS01, FeatureVector.Cat(ntFvT00, fvFin));
                                }

                                pf.Add(s, s, t, type1, 0, 1, bc, fvFin, b1[comp1], c1[comp2]);
                            }
                        }

                        // case when R == T
                        b1 = pf.GetItems(s, t - 1, 0, 0);
                        c1 = pf.GetItems(t, t, 1, 0);
                        if (!(b1 == null || c1 == null))
                        {
                            FeatureVector prodFvStt = pf.Cat(fvsTrips[t, t, s], fvsSibs[t, s, 0]);
                            double prodProbStt = probsTrips[t, t, s] + probsSibs[t, s, 0];

                            int[,] pairs = pf.GetKBestPairs(b1, c1);

                            for (int k = 0; k < K; k++)
                            {
                                if (pairs[k, 0] == -1 || pairs[k, 1] == -1)
                                    break;

                                int comp1 = pairs[k, 0];
                                int comp2 = pairs[k, 1];

                                double bc = b1[comp1].Prob + c1[comp2].Prob;

                                // create sibling pair
                                // create parent pair: S->T and S->(start,T)
                                bc += prodProbTs + prodProbStt;

                                FeatureVector fvFin = pf.Cat(prodFvTs, prodFvStt);
                                if (m_pipe.Labeled)
                                {
                                    bc += ntProbT11 + ntProbS10;
                                    fvFin = FeatureVector.Cat(ntFvT11, FeatureVector.Cat(ntFvS10, fvFin));
                                }

                                pf.Add(s, t, t, type2, 1, 1, bc, fvFin, b1[comp1], c1[comp2]);
                            }
                        }
                    }

                    for (int r = s; r < t; r++)
                    {
                        // First case - create sibling
                        ParseForestItem[] b1 = pf.GetItems(s, r, 0, 0);
                        ParseForestItem[] c1 = pf.GetItems(r + 1, t, 1, 0);

                        if (!(b1 == null || c1 == null))
                        {
                            int[,] pairs = pf.GetKBestPairs(b1, c1);

                            for (int k = 0; k < K; k++)
                            {
                                if (pairs[k, 0] == -1 || pairs[k, 1] == -1)
                                    break;

                                int comp1 = pairs[k, 0];
                                int comp2 = pairs[k, 1];

                                double bc = b1[comp1].Prob + c1[comp2].Prob;

                                pf.Add(s, r, t, -1, 0, 2, bc, new FeatureVector(), b1[comp1], c1[comp2]);
                                pf.Add(s, r, t, -1, 1, 2, bc, new FeatureVector(), b1[comp1], c1[comp2]);
                            }
                        }
                    }

                    for (int r = s + 1; r < t; r++)
                    {
                        // S -> (R,T)
                        ParseForestItem[] b1 = pf.GetItems(s, r, 0, 1);
                        ParseForestItem[] c1 = pf.GetItems(r, t, 0, 2);

                        if (!(b1 == null || c1 == null))
                        {
                            int[,] pairs = pf.GetKBestPairs(b1, c1);

                            for (int k = 0; k < K; k++)
                            {
                                if (pairs[k, 0] == -1 || pairs[k, 1] == -1)
                                    break;

                                int comp1 = pairs[k, 0];
                                int comp2 = pairs[k, 1];

                                double bc = b1[comp1].Prob + c1[comp2].Prob;

                                bc += prodProbSt + probsTrips[s, r, t] + probsSibs[r, t, 1];
                                FeatureVector fv_fin = pf.Cat(prodFvSt, pf.Cat(fvsTrips[s, r, t], fvsSibs[r, t, 1]));

                                if (m_pipe.Labeled)
                                {
                                    bc += ntProbS01 + ntProbT00;
                                    fv_fin = FeatureVector.Cat(ntFvS01, FeatureVector.Cat(ntFvT00, fv_fin));
                                }

                                pf.Add(s, r, t, type1, 0, 1, bc, fv_fin, b1[comp1], c1[comp2]);
                            }
                        }

                        // T -> (R,S)
                        b1 = pf.GetItems(s, r, 1, 2);
                        c1 = pf.GetItems(r, t, 1, 1);

                        if (!(b1 == null || c1 == null))
                        {
                            int[,] pairs = pf.GetKBestPairs(b1, c1);

                            for (int k = 0; k < K; k++)
                            {
                                if (pairs[k, 0] == -1 || pairs[k, 1] == -1)
                                    break;

                                int comp1 = pairs[k, 0];
                                int comp2 = pairs[k, 1];

                                double bc = b1[comp1].Prob + c1[comp2].Prob;

                                bc += prodProbTs + probsTrips[t, r, s] + probsSibs[r, s, 1];

                                FeatureVector fvFin = pf.Cat(prodFvTs, pf.Cat(fvsTrips[t, r, s], fvsSibs[r, s, 1]));
                                if (m_pipe.Labeled)
                                {
                                    bc += ntProbT11 + ntProbS10;
                                    fvFin = FeatureVector.Cat(ntFvT11, FeatureVector.Cat(ntFvS10, fvFin));
                                }

                                pf.Add(s, r, t, type2, 1, 1, bc, fvFin, b1[comp1], c1[comp2]);
                            }
                        }
                    }

                    // Finish off pieces incom + Comp -> Comp
                    for (int r = s; r <= t; r++)
                    {
                        if (r != s)
                        {
                            ParseForestItem[] b1 = pf.GetItems(s, r, 0, 1);
                            ParseForestItem[] c1 = pf.GetItems(r, t, 0, 0);

                            if (!(b1 == null || c1 == null))
                            {
                                //continue;

                                int[,] pairs = pf.GetKBestPairs(b1, c1);
                                for (int k = 0; k < K; k++)
                                {
                                    if (pairs[k, 0] == -1 || pairs[k, 1] == -1)
                                        break;

                                    int comp1 = pairs[k, 0];
                                    int comp2 = pairs[k, 1];

                                    double bc = b1[comp1].Prob + c1[comp2].Prob;

                                    if (
                                        !pf.Add(s, r, t, -1, 0, 0, bc, new FeatureVector(), b1[comp1],
                                                c1[comp2]))
                                        break;
                                }
                            }
                        }

                        if (r != t)
                        {
                            ParseForestItem[] b1 = pf.GetItems(s, r, 1, 0);
                            ParseForestItem[] c1 = pf.GetItems(r, t, 1, 1);

                            if (!(b1 == null || c1 == null))
                            {
                                //continue;

                                int[,] pairs = pf.GetKBestPairs(b1, c1);
                                for (int k = 0; k < K; k++)
                                {
                                    if (pairs[k, 0] == -1 || pairs[k, 1] == -1)
                                        break;

                                    int comp1 = pairs[k, 0];
                                    int comp2 = pairs[k, 1];

                                    double bc = b1[comp1].Prob + c1[comp2].Prob;

                                    if (
                                        !pf.Add(s, r, t, -1, 1, 0, bc, new FeatureVector(), b1[comp1],
                                                c1[comp2]))
                                        break;
                                }
                            }
                        }
                    }
                }
            }

            return pf.GetBestParses();
        }
Ejemplo n.º 14
0
        public virtual void WritePossibleFeatures(DependencyInstance inst, BinaryWriter writer)
        {
            var toks = inst.Sentence;
            var pos = inst.POS;
            var labs = inst.Labs;

            var posA = new string[pos.Length];
            for (int i = 0; i < pos.Length; i++)
            {
                posA[i] = pos[i].SubstringWithIndex(0, 1);
            }

            try
            {
                for (int w1 = 0; w1 < toks.Length; w1++)
                {
                    for (int w2 = w1 + 1; w2 < toks.Length; w2++)
                    {
                        for (int ph = 0; ph < 2; ph++)
                        {
                            bool attR = ph == 0 ? true : false;

                            var childInt = attR ? w2 : w1;
                            var parInt = attR ? w1 : w2;

                            var prodFV = CreateFeatureVector(toks, pos, posA, w1, w2, attR,
                                                                       new FeatureVector());

                            foreach (Feature feature in prodFV.FVector)
                            {
                                if (feature.Index >= 0)
                                    writer.Write(feature.Index);
                            }
                            writer.Write(-2);
                        }
                    }
                }

                writer.Write(-3);

                if (Labeled)
                {
                    for (int w1 = 0; w1 < toks.Length; w1++)
                    {
                        for (int t = 0; t < Types.Length; t++)
                        {
                            string type = Types[t];

                            for (int ph = 0; ph < 2; ph++)
                            {
                                bool attR = ph == 0 ? true : false;

                                for (int ch = 0; ch < 2; ch++)
                                {
                                    bool child = ch == 0 ? true : false;

                                    var prodFV = CreateFeatureVector(toks, pos, posA, w1,
                                                                               type,
                                                                               attR, child,
                                                                               new FeatureVector());
                                    foreach (Feature feature in prodFV.FVector)
                                    {
                                        if (feature.Index >= 0)
                                            writer.Write(feature.Index);
                                    }
                                    writer.Write(-2);
                                }
                            }
                        }
                    }

                    writer.Write(-3);
                }
                foreach (Feature feature in inst.Fv.FVector)
                {
                    writer.Write(feature.Index);
                }

                writer.Write(-4);
                writer.Write(inst.Sentence.Length);
                foreach (string s in inst.Sentence)
                {
                    writer.Write(s);
                }
                writer.Write(-5);
                writer.Write(inst.POS.Length);
                foreach (string s in inst.POS)
                {
                    writer.Write(s);
                }
                writer.Write(-6);
                writer.Write(inst.Labs.Length);
                foreach (string s in inst.Labs)
                {
                    writer.Write(s);
                }
                writer.Write(-7);
                writer.Write(inst.ActParseTree);
                writer.Write(-1);
            }
            catch (IOException)
            {
            }
        }
Ejemplo n.º 15
0
        public DependencyInstance ReadFeatureVector(BinaryReader reader,
                                                   DependencyInstance inst,
                                                   FeatureVector[,,] fvs,
                                                   double[,,] probs,
                                                   FeatureVector[,,,] ntFvs,
                                                   double[,,,] ntProbs,
                                                   Parameters parameters)
        {
            int length = inst.Length;

            // Get production crap.
            for (int w1 = 0; w1 < length; w1++)
            {
                for (int w2 = w1 + 1; w2 < length; w2++)
                {
                    for (int ph = 0; ph < 2; ph++)
                    {
                        var prodFV = new FeatureVector();

                        int indx = reader.ReadInt32();
                        while (indx != -2)
                        {
                            AddNewFeature(indx, 1.0, prodFV);
                            indx = reader.ReadInt32();
                        }

                        double prodProb = parameters.GetScore(prodFV);
                        fvs[w1, w2, ph] = prodFV;
                        probs[w1, w2, ph] = prodProb;
                    }
                }
            }
            int last = reader.ReadInt32();
            if (last != -3)
            {
                Console.WriteLine("Error reading file.");

                throw new Exception("Bad File Format");
            }

            if (Labeled)
            {
                for (int w1 = 0; w1 < length; w1++)
                {
                    for (int t = 0; t < Types.Length; t++)
                    {
                        string type = Types[t];

                        for (int ph = 0; ph < 2; ph++)
                        {
                            for (int ch = 0; ch < 2; ch++)
                            {
                                var prodFV = new FeatureVector();

                                int indx = reader.ReadInt32();
                                while (indx != -2)
                                {
                                    AddNewFeature(indx, 1.0, prodFV);
                                    indx = reader.ReadInt32();
                                }

                                double ntProb = parameters.GetScore(prodFV);
                                ntFvs[w1, t, ph, ch] = prodFV;
                                ntProbs[w1, t, ph, ch] = ntProb;
                            }
                        }
                    }
                }
                last = reader.ReadInt32();
                if (last != -3)
                {
                    Console.WriteLine("Error reading file.");
                    throw new Exception("Bad File Format");
                }
            }

            var nfv = new FeatureVector();
            int next = reader.ReadInt32();
            while (next != -4)
            {
                AddNewFeature(next, 1.0, nfv);
                next = reader.ReadInt32();
            }

            string[] toks = null;
            string[] pos = null;
            string[] labs = null;
            string actParseTree = null;
            try
            {
                int len = reader.ReadInt32();
                toks = new string[len];
                for (int i = 0; i < len; i++)
                {
                    toks[i] = reader.ReadString();
                }
                next = reader.ReadInt32();
                len = reader.ReadInt32();
                pos = new string[len];
                for (int i = 0; i < len; i++)
                {
                    pos[i] = reader.ReadString();
                }
                next = reader.ReadInt32();
                len = reader.ReadInt32();
                labs = new string[len];
                for (int i = 0; i < len; i++)
                {
                    labs[i] = reader.ReadString();
                }
                next = reader.ReadInt32();
                actParseTree = reader.ReadString();
                next = reader.ReadInt32();
            }
            catch (Exception e)
            {
                Console.WriteLine("Error reading file.");
                throw new Exception("Bad File Format");
            }

            if (next != -1)
            {
                Console.WriteLine("Error reading file.");
                throw new Exception("Bad File Format");
            }

            var pti = new DependencyInstance(toks, pos, labs, nfv);
            pti.ActParseTree = actParseTree;
            return pti;
        }
Ejemplo n.º 16
0
        public DependencyInstance[] CreateInstances(string fileName,
                                                    string featFileName)
        {
            CreateAlphabet(fileName);

            Console.WriteLine("Num Features: " + DataAlphabet.Count);

            var reader =
                new StreamReader(new FileStream(fileName, FileMode.Open), Encoding.UTF8);
            string[][] lines = ReadLines(reader);

            var lt = new List<object>();

            BinaryWriter bWriter = CreateForest
                                    ? new BinaryWriter(new FileStream(featFileName, FileMode.Create)) //In doubt
                                    : null;

            int num1 = 0;
            while (lines != null)
            {
              //      Console.WriteLine("Creating Feature Vector Instance: " + num1);

                string[] toks = lines[0];
                string[] pos = lines[1];
                string[] labs = lines[2];
                string[] deps = lines[3];

                var deps1 = new int[deps.Length];
                for (int i = 0; i < deps.Length; i++)
                    deps1[i] = int.Parse(deps[i]);

                FeatureVector fv = CreateFeatureVector(toks, pos, labs, deps1);

                var pti_ = new DependencyInstance(toks, pos, labs, fv);

                string spans = "";
                for (int i = 1; i < deps.Length; i++)
                {
                    spans += deps[i] + "|" + i + ":" + TypeAlphabet.LookupIndex(labs[i]) + " ";
                }
                pti_.ActParseTree = spans.Trim();

                if (CreateForest)
                    WritePossibleFeatures(pti_, bWriter);
                pti_ = null;

                lt.Add(new DependencyInstance(toks.Length));

                lines = ReadLines(reader);
                num1++;
            }

            CloseAlphabets();

            var pti = new DependencyInstance[lt.Count];
            for (int i = 0; i < pti.Length; i++)
            {
                pti[i] = (DependencyInstance) lt[i];
            }

            if (CreateForest)
                bWriter.Close();

            reader.Close();

            return pti;
        }
Ejemplo n.º 17
0
        // TODO: sina: rename it to ReadFeatureVector
        public DependencyInstance GetFeatureVector(BinaryReader reader,
                                                   DependencyInstance inst,
                                                   FeatureVector[,,] fvs,
                                                   double[,,] probs,
                                                   FeatureVector[,,] fvsTrips,
                                                   double[,,] probsTrips,
                                                   FeatureVector[,,] fvsSibs,
                                                   double[,,] probsSibs,
                                                   FeatureVector[,,,] ntFvs,
                                                   double[,,,] ntProbs,
                                                   Parameters @params)
        {
            int length = inst.Length;

            // Get production crap.
            for (int w1 = 0; w1 < length; w1++)
            {
                for (int w2 = w1 + 1; w2 < length; w2++)
                {
                    for (int ph = 0; ph < 2; ph++)
                    {
                        var prodFV = new FeatureVector();

                        int indx = reader.ReadInt32();
                        while (indx != -2)
                        {
                            AddNewFeature(indx, 1.0, prodFV);
                            indx = reader.ReadInt32();
                        }

                        double prodProb = @params.GetScore(prodFV);
                        fvs[w1, w2, ph] = prodFV;
                        probs[w1, w2, ph] = prodProb;
                    }
                }
            }
            int last = reader.ReadInt32();
            if (last != -3)
            {
                Console.WriteLine("Error reading file.");
                throw new Exception("Bad File Format");
            }

            if (Labeled)
            {
                for (int w1 = 0; w1 < length; w1++)
                {
                    for (int t = 0; t < Types.Length; t++)
                    {
                        string type = Types[t];

                        for (int ph = 0; ph < 2; ph++)
                        {
                            for (int ch = 0; ch < 2; ch++)
                            {
                                var prodFV = new FeatureVector();

                                int indx = reader.ReadInt32();
                                while (indx != -2)
                                {
                                    AddNewFeature(indx, 1.0, prodFV);
                                    indx = reader.ReadInt32();
                                }

                                double ntProb = @params.GetScore(prodFV);
                                ntFvs[w1, t, ph, ch] = prodFV;
                                ntProbs[w1, t, ph, ch] = ntProb;
                            }
                        }
                    }
                }
                last = reader.ReadInt32();
                if (last != -3)
                {
                    Console.WriteLine("Error reading file.");
                    throw new Exception("Bad File Format");
                }
            }

            for (int w1 = 0; w1 < length; w1++)
            {
                for (int w2 = w1; w2 < length; w2++)
                {
                    for (int w3 = w2 + 1; w3 < length; w3++)
                    {
                        var prodFV = new FeatureVector();

                        int indx = reader.ReadInt32();
                        while (indx != -2)
                        {
                            AddNewFeature(indx, 1.0, prodFV);
                            indx = reader.ReadInt32();
                        }

                        double prodProb = @params.GetScore(prodFV);
                        fvsTrips[w1, w2, w3] = prodFV;
                        probsTrips[w1, w2, w3] = prodProb;
                    }
                }
                for (int w2 = w1; w2 >= 0; w2--)
                {
                    for (int w3 = w2 - 1; w3 >= 0; w3--)
                    {
                        var prodFV = new FeatureVector();

                        int indx = reader.ReadInt32();
                        while (indx != -2)
                        {
                            AddNewFeature(indx, 1.0, prodFV);

                            indx = reader.ReadInt32();
                        }

                        double prodProb = @params.GetScore(prodFV);
                        fvsTrips[w1, w2, w3] = prodFV;
                        probsTrips[w1, w2, w3] = prodProb;
                    }
                }
            }

            last = reader.ReadInt32();
            if (last != -3)
            {
                Console.WriteLine("Error reading file.");
                throw new Exception("Bad File Format");
            }

            for (int w1 = 0; w1 < length; w1++)
            {
                for (int w2 = 0; w2 < length; w2++)
                {
                    for (int wh = 0; wh < 2; wh++)
                    {
                        if (w1 != w2)
                        {
                            var prodFV = new FeatureVector();

                            int indx = reader.ReadInt32();
                            while (indx != -2)
                            {
                                AddNewFeature(indx, 1.0, prodFV);
                                indx = reader.ReadInt32();
                            }

                            double prodProb = @params.GetScore(prodFV);
                            fvsSibs[w1, w2, wh] = prodFV;
                            probsSibs[w1, w2, wh] = prodProb;
                        }
                    }
                }
            }

            last = reader.ReadInt32();
            if (last != -3)
            {
                Console.WriteLine("Error reading file.");
                throw new Exception("Bad File Format");
            }

            var nfv = new FeatureVector();
            int next = reader.ReadInt32();
            while (next != -4)
            {
                AddNewFeature(next, 1.0, nfv);
                next = reader.ReadInt32();
            }

            string[] toks = null;
            string[] pos = null;
            string[] labs = null;
            string actParseTree = null;
            try
            {
                int len = reader.ReadInt32(); //Added by MSR
                toks = new string[len];
                for (int i = 0; i < len; i++)
                {
                    toks[i] = reader.ReadString();
                }
                //next = reader.ReadInt32();
                len = reader.ReadInt32(); //Added by MSR
                pos = new string[len];
                for (int i = 0; i < len; i++)
                {
                    pos[i] = reader.ReadString();
                }
                next = reader.ReadInt32();
                len = reader.ReadInt32(); //Added by MSR

                labs = new string[len];
                for (int i = 0; i < len; i++)
                {
                    labs[i] = reader.ReadString();
                }
                next = reader.ReadInt32();
                actParseTree = reader.ReadString();
                next = reader.ReadInt32();
            }
            catch (Exception e)
            {
                // TODO: sina: A library MUST NOT call Environment.Exit in any form
                // throw exception instead.
                Console.WriteLine("Error reading file.");
                throw new Exception("Bad File Format");
            }

            if (next != -1)
            {
                // TODO: sina: A library MUST NOT call Environment.Exit in any form
                // throw exception instead.
                Console.WriteLine("Error reading file.");
                throw new Exception("Bad File Format");
            }

            var pti = new DependencyInstance(toks, pos, labs, nfv);
            pti.ActParseTree = actParseTree;
            return pti;
        }
Ejemplo n.º 18
0
        // static Type for each edge: run time O(n^3 + Tn^2) T is number of Types
        public object[,] DecodeProjective(DependencyInstance inst,
                                          FeatureVector[,,] fvs,
                                          double[,,] probs,
                                          FeatureVector[,,,] ntFvs,
                                          double[,,,] ntProbs, int K)
        {
            string[] toks = inst.Sentence;
            string[] pos = inst.POS;

            int[,] staticTypes = null;
            if (m_pipe.Labeled)
            {
                staticTypes = GetTypes(ntProbs, toks.Length);
            }

            var pf = new KBestParseForest(0, toks.Length - 1, inst, K);

            for (int s = 0; s < toks.Length; s++)
            {
                pf.Add(s, -1, 0, 0.0, new FeatureVector());
                pf.Add(s, -1, 1, 0.0, new FeatureVector());
            }

            for (int j = 1; j < toks.Length; j++)
            {
                for (int s = 0; s < toks.Length && s + j < toks.Length; s++)
                {
                    int t = s + j;

                    FeatureVector prodFvSt = fvs[s, t, 0];
                    FeatureVector prodFvTs = fvs[s, t, 1];
                    double prodProbSt = probs[s, t, 0];
                    double prodProbTs = probs[s, t, 1];

                    int type1 = m_pipe.Labeled ? staticTypes[s, t] : 0;
                    int type2 = m_pipe.Labeled ? staticTypes[t, s] : 0;

                    FeatureVector ntFvS01 = ntFvs[s, type1, 0, 1];
                    FeatureVector ntFvS10 = ntFvs[s, type2, 1, 0];
                    FeatureVector ntFvT00 = ntFvs[t, type1, 0, 0];
                    FeatureVector ntFvT11 = ntFvs[t, type2, 1, 1];
                    double ntProbS01 = ntProbs[s, type1, 0, 1];
                    double ntProbS10 = ntProbs[s, type2, 1, 0];
                    double ntProbT00 = ntProbs[t, type1, 0, 0];
                    double ntProbT11 = ntProbs[t, type2, 1, 1];

                    for (int r = s; r <= t; r++)
                    {
                        if (r != t)
                        {
                            ParseForestItem[] b1 = pf.GetItems(s, r, 0, 0);
                            ParseForestItem[] c1 = pf.GetItems(r + 1, t, 1, 0);

                            if (b1 != null && c1 != null)
                            {
                                int[,] pairs = pf.GetKBestPairs(b1, c1);
                                for (int k = 0; k < pairs.GetLength(0); k++)
                                {
                                    if (pairs[k, 0] == -1 || pairs[k, 1] == -1)
                                        break;

                                    int comp1 = pairs[k, 0];
                                    int comp2 = pairs[k, 1];

                                    double bc = b1[comp1].Prob + c1[comp2].Prob;

                                    double probFin = bc + prodProbSt;
                                    FeatureVector fv_fin = prodFvSt;
                                    if (m_pipe.Labeled)
                                    {
                                        fv_fin = FeatureVector.Cat(ntFvS01, FeatureVector.Cat(ntFvT00, fv_fin));
                                        probFin += ntProbS01 + ntProbT00;
                                    }
                                    pf.Add(s, r, t, type1, 0, 1, probFin, fv_fin, b1[comp1], c1[comp2]);

                                    probFin = bc + prodProbTs;
                                    fv_fin = prodFvTs;
                                    if (m_pipe.Labeled)
                                    {
                                        fv_fin = FeatureVector.Cat(ntFvT11, FeatureVector.Cat(ntFvS10, fv_fin));
                                        probFin += ntProbT11 + ntProbS10;
                                    }
                                    pf.Add(s, r, t, type2, 1, 1, probFin, fv_fin, b1[comp1], c1[comp2]);
                                }
                            }
                        }
                    }

                    for (int r = s; r <= t; r++)
                    {
                        if (r != s)
                        {
                            ParseForestItem[] b1 = pf.GetItems(s, r, 0, 1);
                            ParseForestItem[] c1 = pf.GetItems(r, t, 0, 0);
                            if (b1 != null && c1 != null)
                            {
                                int[,] pairs = pf.GetKBestPairs(b1, c1);
                                for (int k = 0; k < pairs.GetLength(0); k++)
                                {
                                    if (pairs[k, 0] == -1 || pairs[k, 1] == -1)
                                        break;

                                    int comp1 = pairs[k, 0];
                                    int comp2 = pairs[k, 1];

                                    double bc = b1[comp1].Prob + c1[comp2].Prob;

                                    if (!pf.Add(s, r, t, -1, 0, 0, bc,
                                                new FeatureVector(),
                                                b1[comp1], c1[comp2]))
                                    {
                                        break;
                                    }
                                }
                            }
                        }

                        if (r != t)
                        {
                            ParseForestItem[] b1 = pf.GetItems(s, r, 1, 0);
                            ParseForestItem[] c1 = pf.GetItems(r, t, 1, 1);
                            if (b1 != null && c1 != null)
                            {
                                int[,] pairs = pf.GetKBestPairs(b1, c1);
                                for (int k = 0; k < pairs.GetLength(0); k++)
                                {
                                    if (pairs[k, 0] == -1 || pairs[k, 1] == -1)
                                        break;

                                    int comp1 = pairs[k, 0];
                                    int comp2 = pairs[k, 1];

                                    double bc = b1[comp1].Prob + c1[comp2].Prob;

                                    if (!pf.Add(s, r, t, -1, 1, 0, bc,
                                                new FeatureVector(), b1[comp1], c1[comp2]))
                                        break;
                                }
                            }
                        }
                    }
                }
            }

            return pf.GetBestParses();
        }
Ejemplo n.º 19
0
        public DependencyInstance CreateInstance(StreamReader reader)
        {
            string[][] lines = ReadLines(reader);
            if (lines == null) return null;

            string[] toks = lines[0];
            string[] pos = lines[1];
            string[] labs = lines[2];
            string[] deps = lines[3];

            var deps1 = new int[deps.Length];
            for (int i = 0; i < deps.Length; i++)
                deps1[i] = int.Parse(deps[i]);

            FeatureVector fv = CreateFeatureVector(toks, pos, labs, deps1);

            var pti = new DependencyInstance(toks, pos, labs, fv);

            string spans = "";
            for (int i = 1; i < deps.Length; i++)
            {
                spans += deps[i] + "|" + i + ":" + TypeAlphabet.LookupIndex(labs[i]) + " ";
            }
            pti.ActParseTree = spans.Trim();

            return pti;
        }