Ejemplo n.º 1
0
        public void Deserialize(BinaryReader binRead)
        {
            lsett = new LemmatizerSettings(binRead);

            bool bSerializeExamples = binRead.ReadBoolean();

            elExamples = new ExampleList(binRead, lsett);

            ExampleList elExamplesRear;
            ExampleList elExamplesFront;

            if (bSerializeExamples)
            {
                elExamplesRear  = elExamples.GetFrontRearExampleList(false);
                elExamplesFront = elExamples.GetFrontRearExampleList(true);
            }
            else
            {
                elExamplesRear  = new ExampleList(binRead, lsett);
                elExamplesFront = new ExampleList(binRead, lsett);
            }

            if (!lsett.bBuildFrontLemmatizer)
            {
                ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null);
            }
            else
            {
                ltnRootNode      = new LemmaTreeNode(binRead, lsett, elExamplesRear, null);
                ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamplesFront, null);
            }
        }
Ejemplo n.º 2
0
        public void BuildModel(string msdSpec, MsdSplitTree.BeamSearchParams beamSearchOpt)
        {
            if (ltnRootNode != null)
            {
                return;
            }

            //if msd are used and other criterias are fulfiled than use MsdSplitTreeOptimization
            if (lsett.bUseMsdSplitTreeOptimization && lsett.eMsdConsider == LemmatizerSettings.MsdConsideration.Distinct && !string.IsNullOrEmpty(msdSpec))
            {
                msdSplitTree = new MsdSplitTree(elExamples.ListExamples, new MsdSpec(msdSpec), beamSearchOpt);
                //Console.WriteLine("MsdSplitTree consturcetd with {0} leaves!",msdSplitTree.subTreeSizeRecurs);
                ExampleList el = elExamples;
                elExamples = new ExampleList(lsett);
                //int s = 0;
                Dictionary <string, double> msds = new Dictionary <string, double>();
                foreach (LemmaExample le in el.ListExamples)
                {
                    //Console.WriteLine("{0}: {1}",s++,le.Msd);
                    string newMsd = msdSplitTree.TransformMsd(le.Msd);
                    elExamples.AddExample(le.Word, le.Lemma, le.Weight, newMsd);
                    //Console.WriteLine("\t" + newMsd);
                    if (msds.ContainsKey(newMsd))
                    {
                        msds[newMsd] += le.Weight;
                    }
                    else
                    {
                        msds[newMsd] = le.Weight;
                    }
                }
                foreach (KeyValuePair <string, double> msd in msds)
                {
                    //Console.WriteLine("{0} {1}", msd.Key, msd.Value);
                }
                //TODO problem, if buildmodel is called twice than a problem occurs!!!!
            }
            elExamples.FinalizeAdditions();


            if (!lsett.bBuildFrontLemmatizer)
            {
                ltnRootNode = new LemmaTreeNode(lsett, elExamples);
            }
            else
            {
                ltnRootNode      = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(false));
                ltnRootNodeFront = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(true));
            }
        }
Ejemplo n.º 3
0
 public void Load(Latino.BinarySerializer binRead)
 {
     lsett      = new LemmatizerSettings(binRead);
     elExamples = new ExampleList(binRead, lsett);
     if (!lsett.bBuildFrontLemmatizer)
     {
         ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null);
     }
     else
     {
         ltnRootNode      = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(false), null);
         ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(true), null);
     }
 }
Ejemplo n.º 4
0
        public void Deserialize(BinaryReader binRead)
        {
            using (binRead)
            {
                // settings
                Lsett = new LemmatizerSettings(binRead);

                // examples
                bool bSerializeExamples = binRead.ReadBoolean();
                ElExamples = new ExampleList(binRead, Lsett);
                ExampleList elExamplesRear;
                ExampleList elExamplesFront;
                if (bSerializeExamples)
                {
                    elExamplesRear  = ElExamples.GetFrontRearExampleList(false);
                    elExamplesFront = ElExamples.GetFrontRearExampleList(true);
                }
                else
                {
                    elExamplesRear  = new ExampleList(binRead, Lsett);
                    elExamplesFront = new ExampleList(binRead, Lsett);
                }

                // root node
                LtnRootNode = new LemmaTreeNode(binRead, Lsett, Lsett.bBuildFrontLemmatizer ? elExamplesRear : ElExamples, null);

                // root node front
                if (Lsett.bBuildFrontLemmatizer)
                {
                    LtnRootNodeFront = new LemmaTreeNode(binRead, Lsett, elExamplesFront, null);
                }

                // exceptions - use try catch for retro compatibility
                // --> this section is missing in the old lemmatizer files
                try
                {
                    var nbOfExceptions = binRead.ReadInt32();
                    for (var i = 0; i < nbOfExceptions; i++)
                    {
                        var exception = binRead.ReadString();
                        var parts     = exception.Split(' ');
                        this.AddException(parts[0], parts[1]);
                    }
                }
                catch (Exception)
                {
                    Trace.WriteLine("Couldn't deserialize exceptions in Lemmatizer file");
                }
            }
        }
Ejemplo n.º 5
0
        public void BuildModel()
        {
            if (ltnRootNode != null)
            {
                return;
            }

            if (!lsett.bBuildFrontLemmatizer)
            {
                //TODO remove: elExamples.FinalizeAdditions();
                elExamples.FinalizeAdditions();
                ltnRootNode = new LemmaTreeNode(lsett, elExamples);
            }
            else
            {
                ltnRootNode      = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(false));
                ltnRootNodeFront = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(true));
            }
        }
Ejemplo n.º 6
0
 public void Load(Latino.BinarySerializer binRead) {
     lsett = new LemmatizerSettings(binRead);
     elExamples = new ExampleList(binRead, lsett);
     if (!lsett.bBuildFrontLemmatizer) {
         ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null);
     }
     else {
         ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(false) , null);
         ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(true), null);
     }               
 }
Ejemplo n.º 7
0
        public void Deserialize(BinaryReader binRead) {
            lsett = new LemmatizerSettings(binRead);

            bool bSerializeExamples = binRead.ReadBoolean();
            elExamples = new ExampleList(binRead, lsett);

            ExampleList elExamplesRear;
            ExampleList elExamplesFront;

            if (bSerializeExamples) {
                elExamplesRear = elExamples.GetFrontRearExampleList(false);
                elExamplesFront = elExamples.GetFrontRearExampleList(true);
            }
            else {
                elExamplesRear = new ExampleList(binRead, lsett);
                elExamplesFront = new ExampleList(binRead, lsett);
            }                

            if (!lsett.bBuildFrontLemmatizer) {
                ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null);
            }
            else {
                ltnRootNode = new LemmaTreeNode(binRead, lsett,  elExamplesRear, null);
                ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamplesFront, null);
            }
        }