예제 #1
0
        public void Deserialize(BinaryReader binRead)
        {
            bool msdSpecExists = binRead.ReadBoolean();

            if (!msdSpecExists)
            {
                msdSpec = null;
            }
            else
            {
                msdSpec = new MsdSpec(binRead);
            }

            Dictionary <int, LemmaExample> exampleMapping = new Dictionary <int, LemmaExample>();

            int exampleListCount = binRead.ReadInt32();

            if (exampleListCount < 0)
            {
                exampleList = null;
            }
            else
            {
                exampleList = new List <LemmaExample>(exampleListCount);
                for (int leId = 0; leId < exampleListCount; leId++)
                {
                    LemmaExample le = new LemmaExample(binRead, null, null);
                    exampleMapping[leId] = le;
                    exampleList.Add(le);
                }
            }

            Deserialize(binRead, exampleMapping, msdSpec);
        }
예제 #2
0
        public MsdSplitTree(List <LemmaExample> examples, MsdSpec msdSpec, BeamSearchParams beamParams)
        {
            if (beamParams == null)
            {
                beamParams = new BeamSearchParams();
            }

            MsdSplitTree et = Split(PrepareExampleList(examples), msdSpec, beamParams);

            CopyVariablesToThis(et);
        }
예제 #3
0
 private void CopyVariablesToThis(MsdSplitTree et)
 {
     this.msdSpec           = et.msdSpec;
     this.attrId            = et.attrId;
     this.exampleList       = et.exampleList;
     this.ambigThis         = et.ambigThis;
     this.ambigChild        = et.ambigChild;
     this.ambigRecurs       = et.ambigRecurs;
     this.subTreeSizeRecurs = et.subTreeSizeRecurs;
     this.subTrees          = et.subTrees;
     this.beamSiblings      = et.beamSiblings;
 }
예제 #4
0
        private static void OutputTree(MsdSplitTree et, MsdSpec msdSpec, int level, int maxLevel, string attrSet)
        {
            if (level > maxLevel)
            {
                return;
            }
            int    attrId   = et.attrId;
            string attrName = msdSpec.attrIdToNameMap[attrId];

            StringBuilder sbSubGroups = new StringBuilder();

            if (et.subTrees != null)
            {
                sbSubGroups.AppendFormat(",SplitBy={0}({1}) To={2} classes:",
                                         attrName, attrId, (et.subTrees == null ? "0" : et.subTrees.Count.ToString()));
                foreach (KeyValuePair <char, MsdSplitTree> sub in et.subTrees)
                {
                    sbSubGroups.AppendFormat("|{0}:{1}", sub.Key, sub.Value.exampleList.Count);
                }
            }
            StringBuilder sbBeam = new StringBuilder();

            if (et.beamSiblings != null)
            {
                sbSubGroups.AppendFormat(",BeamSibling=");
                foreach (MsdSplitTree beamSibl in et.beamSiblings)
                {
                    sbSubGroups.AppendFormat("|{0}", beamSibl.subTreeSizeRecurs);
                }
            }

            Console.Write(new string(' ', level * 2));
            Console.WriteLine("Examples={0},AttrSet=({1}),SubTree={2},Ambig:(T={3}/S={4}/R={5}){6}{7}",
                              et.exampleList.Count, attrSet, et.subTreeSizeRecurs,
                              et.ambigThis, et.ambigChild, et.ambigRecurs, sbSubGroups, sbBeam);

            if (et.subTrees != null)
            {
                foreach (KeyValuePair <char, MsdSplitTree> sub in et.subTrees)
                {
                    OutputTree(sub.Value, msdSpec, level + 1, maxLevel, attrSet + (attrSet.Length > 0 ? "&" : "") + attrName + "='" + sub.Key + "'");
                }
            }
        }
예제 #5
0
 private MsdSplitTree(BinaryReader binRead, Dictionary <int, LemmaExample> exampleMapping, MsdSpec msdSpec)
 {
     Deserialize(binRead, exampleMapping, msdSpec);
 }
예제 #6
0
        private void Deserialize(BinaryReader binRead, Dictionary <int, LemmaExample> exampleMapping, MsdSpec msdSpec)
        {
            this.msdSpec = msdSpec;

            attrId = binRead.ReadInt32();

            int exampleListCount = binRead.ReadInt32();

            if (exampleListCount < 0)
            {
                exampleList = null;
            }
            else
            {
                exampleList = new List <LemmaExample>(exampleListCount);
                for (int i = 0; i < exampleListCount; i++)
                {
                    int          leId = binRead.ReadInt32();
                    LemmaExample le   = exampleMapping[leId];
                    exampleList.Add(le);
                }
            }

            ambigThis         = binRead.ReadDouble();
            ambigChild        = binRead.ReadDouble();
            ambigRecurs       = binRead.ReadDouble();
            subTreeSizeRecurs = binRead.ReadInt32();

            int subTreesCount = binRead.ReadInt32();

            if (subTreesCount < 0)
            {
                subTrees = null;
            }
            else
            {
                subTrees = new Dictionary <char, MsdSplitTree>();
                for (int i = 0; i < subTreesCount; i++)
                {
                    char         key = binRead.ReadChar();
                    MsdSplitTree mst = new MsdSplitTree(binRead, exampleMapping, msdSpec);
                    subTrees.Add(key, mst);
                }
            }

            int beamSiblingsCount = binRead.ReadInt32();

            if (beamSiblingsCount < 0)
            {
                beamSiblings = null;
            }
            else
            {
                beamSiblings = new List <MsdSplitTree>(beamSiblingsCount);
                for (int i = 0; i < beamSiblingsCount; i++)
                {
                    bool bThisTree = binRead.ReadBoolean();
                    if (bThisTree)
                    {
                        beamSiblings.Add(this);
                    }
                    else
                    {
                        MsdSplitTree mst = new MsdSplitTree(binRead, exampleMapping, msdSpec);
                        beamSiblings.Add(mst);
                    }
                }
            }
        }
예제 #7
0
 public MsdSplitTree(List <LemmaExample> examples, MsdSpec msdSpec) : this(examples, msdSpec, null)
 {
 }
예제 #8
0
 public MsdSplitTree(MsdSpec msdSpec)
 {
     this.msdSpec = msdSpec;
 }
예제 #9
0
        private static MsdSplitTree SplitByMsdAttribute(List <LemmaExample> el, int attrId, MsdSpec msdSpec)
        {
            MsdSplitTree et = new MsdSplitTree(msdSpec);

            et.attrId      = attrId;
            et.subTrees    = new Dictionary <char, MsdSplitTree>();
            et.exampleList = el;

            //todo FIX IT
            MsdSplitTree etSubDef = new MsdSplitTree(msdSpec);

            etSubDef.exampleList = new List <LemmaExample>();
            et.subTrees['#']     = etSubDef;

            for (int i = 0; i < el.Count; i++)
            {
                LemmaExample e   = el[i];
                char         cls = msdSpec.GetAttrValue(e.Msd, attrId);
                if (et.subTrees.ContainsKey(cls))
                {
                    et.subTrees[cls].exampleList.Add(e);
                }
                else
                {
                    MsdSplitTree etSub = new MsdSplitTree(msdSpec);

                    et.subTrees[cls] = etSub;

                    etSub.exampleList = new List <LemmaExample>();
                    etSub.exampleList.Add(e);
                }
            }

            double ambigChild = 0;

            foreach (KeyValuePair <char, MsdSplitTree> sub in et.subTrees)
            {
                MsdSplitTree etSub = sub.Value;
                double       ambig = GetListAmbiguities(sub.Value.exampleList);
                etSub.ambigThis         = ambig;
                etSub.ambigChild        = ambig;
                etSub.ambigRecurs       = ambig;
                etSub.subTreeSizeRecurs = 1;
                ambigChild += ambig;
            }

            et.ambigChild        = ambigChild;
            et.ambigRecurs       = ambigChild;
            et.subTreeSizeRecurs = et.subTrees.Count;

            return(et);
        }
예제 #10
0
        private static void OutputSplits(List <MsdSplitTree> splits, List <LemmaExample> el, double weightInitial, MsdSpec msdSpec, int level)
        {
            Console.Write(new string(' ', level * 2));
            Console.WriteLine("Trying to split {0} examples with {1} ambiguities", el.Count, weightInitial);
            foreach (MsdSplitTree exmpTree in splits)
            {
                int attrId = exmpTree.attrId;

                string        attrName    = msdSpec.attrIdToNameMap[attrId];
                StringBuilder sbSubGroups = new StringBuilder();
                foreach (KeyValuePair <char, MsdSplitTree> elSub in exmpTree.subTrees)
                {
                    sbSubGroups.AppendFormat(" {0}:{1}", elSub.Key, elSub.Value.exampleList.Count);
                }
                Console.Write(new string(' ', level * 2));
                Console.WriteLine("  Attr: {0,2}, Ambig: Res={1,4} Rem={2,4}, AttrName: {3}, SplitTo: {4} classes: {5}",
                                  attrId, weightInitial - exmpTree.ambigChild, exmpTree.ambigChild, attrName, exmpTree.subTrees.Count, sbSubGroups);
            }
        }
예제 #11
0
        private static List <MsdSplitTree> ProduceOrderedSplits(List <LemmaExample> el, double weightInitial, MsdSpec msdSpec)
        {
            List <MsdSplitTree> splits = new List <MsdSplitTree>();

            for (int attrId = 0; attrId < msdSpec.AttrCount; attrId++)
            {
                splits.Add(SplitByMsdAttribute(el, attrId, msdSpec));
            }
            splits.Sort(CompareTreesAbmibuitiesAsc);
            return(splits);
        }
예제 #12
0
        private static Dictionary <char, MsdSplitTree> RecursiveSplit(MsdSplitTree bestTree, MsdSpec msdSpec, int level, BeamSearchParams beamParams)
        {
            Dictionary <char, MsdSplitTree> newSubTrees = new Dictionary <char, MsdSplitTree>();

            bestTree.ambigRecurs       = 0;
            bestTree.ambigChild        = 0;
            bestTree.subTreeSizeRecurs = 0;
            foreach (KeyValuePair <char, MsdSplitTree> kvp in bestTree.subTrees)
            {
                MsdSplitTree subTree    = kvp.Value;
                MsdSplitTree newSubTree = subTree;
                if (subTree.ambigChild > 0)
                {
                    newSubTree = RecursiveSplitBeam(subTree.exampleList, subTree.ambigChild, msdSpec, level + 1, beamParams);
                }
                if (newSubTree == null)
                {
                    newSubTree = subTree;
                }
                newSubTrees.Add(kvp.Key, newSubTree);


                bestTree.ambigRecurs       += newSubTree.ambigRecurs;
                bestTree.ambigChild        += newSubTree.ambigThis;
                bestTree.subTreeSizeRecurs += newSubTree.subTreeSizeRecurs;
            }
            return(newSubTrees);
        }
예제 #13
0
        private static MsdSplitTree RecursiveSplitBeam(List <LemmaExample> el, double weightInitial, MsdSpec msdSpec, int level, BeamSearchParams beamParams)
        {
            List <MsdSplitTree> splits = ProduceOrderedSplits(el, weightInitial, msdSpec);
            //OutputSplits(splits, el, weightInitial, msdSpec, level);

            List <MsdSplitTree> beamSplits = new List <MsdSplitTree>();

            int beamSize = 1;

            if (beamParams.beamsPerLevel != null && beamParams.beamsPerLevel.ContainsKey(level))
            {
                beamSize = Math.Min(beamParams.beamsPerLevel[level], splits.Count);
            }

            for (int beam = 0; beam < beamSize; beam++)
            {
                MsdSplitTree bestTree = splits[beam];
                if (bestTree.ambigChild < weightInitial)
                {
                    Dictionary <char, MsdSplitTree> newSubTrees = RecursiveSplit(bestTree, msdSpec, level, beamParams);
                    bestTree.subTrees  = newSubTrees;
                    bestTree.ambigThis = weightInitial;
                    beamSplits.Add(bestTree);
                }
            }

            if (beamSplits.Count == 0)
            {
                return(null);
            }
            if (beamSplits.Count == 1)
            {
                return(beamSplits[0]);
            }

            beamSplits.Sort(CompareTreesRecurSizeAsc);
            MsdSplitTree best = beamSplits[0];

            best.beamSiblings = beamSplits;
            return(best);
        }
예제 #14
0
        private static MsdSplitTree Split(List <LemmaExample> el, MsdSpec msdSpec, BeamSearchParams beamParams)
        {
            double weightInitial = GetListAmbiguities(el);

            return(RecursiveSplitBeam(el, weightInitial, msdSpec, 0, beamParams));
        }