public void Deserialize(BinaryReader binRead) { bool msdSpecExists = binRead.ReadBoolean(); if (!msdSpecExists) { msdSpec = null; } else { msdSpec = new MsdSpec(binRead); } Dictionary <int, LemmaExample> exampleMapping = new Dictionary <int, LemmaExample>(); int exampleListCount = binRead.ReadInt32(); if (exampleListCount < 0) { exampleList = null; } else { exampleList = new List <LemmaExample>(exampleListCount); for (int leId = 0; leId < exampleListCount; leId++) { LemmaExample le = new LemmaExample(binRead, null, null); exampleMapping[leId] = le; exampleList.Add(le); } } Deserialize(binRead, exampleMapping, msdSpec); }
public MsdSplitTree(List <LemmaExample> examples, MsdSpec msdSpec, BeamSearchParams beamParams) { if (beamParams == null) { beamParams = new BeamSearchParams(); } MsdSplitTree et = Split(PrepareExampleList(examples), msdSpec, beamParams); CopyVariablesToThis(et); }
private void CopyVariablesToThis(MsdSplitTree et) { this.msdSpec = et.msdSpec; this.attrId = et.attrId; this.exampleList = et.exampleList; this.ambigThis = et.ambigThis; this.ambigChild = et.ambigChild; this.ambigRecurs = et.ambigRecurs; this.subTreeSizeRecurs = et.subTreeSizeRecurs; this.subTrees = et.subTrees; this.beamSiblings = et.beamSiblings; }
private static void OutputTree(MsdSplitTree et, MsdSpec msdSpec, int level, int maxLevel, string attrSet) { if (level > maxLevel) { return; } int attrId = et.attrId; string attrName = msdSpec.attrIdToNameMap[attrId]; StringBuilder sbSubGroups = new StringBuilder(); if (et.subTrees != null) { sbSubGroups.AppendFormat(",SplitBy={0}({1}) To={2} classes:", attrName, attrId, (et.subTrees == null ? "0" : et.subTrees.Count.ToString())); foreach (KeyValuePair <char, MsdSplitTree> sub in et.subTrees) { sbSubGroups.AppendFormat("|{0}:{1}", sub.Key, sub.Value.exampleList.Count); } } StringBuilder sbBeam = new StringBuilder(); if (et.beamSiblings != null) { sbSubGroups.AppendFormat(",BeamSibling="); foreach (MsdSplitTree beamSibl in et.beamSiblings) { sbSubGroups.AppendFormat("|{0}", beamSibl.subTreeSizeRecurs); } } Console.Write(new string(' ', level * 2)); Console.WriteLine("Examples={0},AttrSet=({1}),SubTree={2},Ambig:(T={3}/S={4}/R={5}){6}{7}", et.exampleList.Count, attrSet, et.subTreeSizeRecurs, et.ambigThis, et.ambigChild, et.ambigRecurs, sbSubGroups, sbBeam); if (et.subTrees != null) { foreach (KeyValuePair <char, MsdSplitTree> sub in et.subTrees) { OutputTree(sub.Value, msdSpec, level + 1, maxLevel, attrSet + (attrSet.Length > 0 ? "&" : "") + attrName + "='" + sub.Key + "'"); } } }
private MsdSplitTree(BinaryReader binRead, Dictionary <int, LemmaExample> exampleMapping, MsdSpec msdSpec) { Deserialize(binRead, exampleMapping, msdSpec); }
private void Deserialize(BinaryReader binRead, Dictionary <int, LemmaExample> exampleMapping, MsdSpec msdSpec) { this.msdSpec = msdSpec; attrId = binRead.ReadInt32(); int exampleListCount = binRead.ReadInt32(); if (exampleListCount < 0) { exampleList = null; } else { exampleList = new List <LemmaExample>(exampleListCount); for (int i = 0; i < exampleListCount; i++) { int leId = binRead.ReadInt32(); LemmaExample le = exampleMapping[leId]; exampleList.Add(le); } } ambigThis = binRead.ReadDouble(); ambigChild = binRead.ReadDouble(); ambigRecurs = binRead.ReadDouble(); subTreeSizeRecurs = binRead.ReadInt32(); int subTreesCount = binRead.ReadInt32(); if (subTreesCount < 0) { subTrees = null; } else { subTrees = new Dictionary <char, MsdSplitTree>(); for (int i = 0; i < subTreesCount; i++) { char key = binRead.ReadChar(); MsdSplitTree mst = new MsdSplitTree(binRead, exampleMapping, msdSpec); subTrees.Add(key, mst); } } int beamSiblingsCount = binRead.ReadInt32(); if (beamSiblingsCount < 0) { beamSiblings = null; } else { beamSiblings = new List <MsdSplitTree>(beamSiblingsCount); for (int i = 0; i < beamSiblingsCount; i++) { bool bThisTree = binRead.ReadBoolean(); if (bThisTree) { beamSiblings.Add(this); } else { MsdSplitTree mst = new MsdSplitTree(binRead, exampleMapping, msdSpec); beamSiblings.Add(mst); } } } }
public MsdSplitTree(List <LemmaExample> examples, MsdSpec msdSpec) : this(examples, msdSpec, null) { }
public MsdSplitTree(MsdSpec msdSpec) { this.msdSpec = msdSpec; }
private static MsdSplitTree SplitByMsdAttribute(List <LemmaExample> el, int attrId, MsdSpec msdSpec) { MsdSplitTree et = new MsdSplitTree(msdSpec); et.attrId = attrId; et.subTrees = new Dictionary <char, MsdSplitTree>(); et.exampleList = el; //todo FIX IT MsdSplitTree etSubDef = new MsdSplitTree(msdSpec); etSubDef.exampleList = new List <LemmaExample>(); et.subTrees['#'] = etSubDef; for (int i = 0; i < el.Count; i++) { LemmaExample e = el[i]; char cls = msdSpec.GetAttrValue(e.Msd, attrId); if (et.subTrees.ContainsKey(cls)) { et.subTrees[cls].exampleList.Add(e); } else { MsdSplitTree etSub = new MsdSplitTree(msdSpec); et.subTrees[cls] = etSub; etSub.exampleList = new List <LemmaExample>(); etSub.exampleList.Add(e); } } double ambigChild = 0; foreach (KeyValuePair <char, MsdSplitTree> sub in et.subTrees) { MsdSplitTree etSub = sub.Value; double ambig = GetListAmbiguities(sub.Value.exampleList); etSub.ambigThis = ambig; etSub.ambigChild = ambig; etSub.ambigRecurs = ambig; etSub.subTreeSizeRecurs = 1; ambigChild += ambig; } et.ambigChild = ambigChild; et.ambigRecurs = ambigChild; et.subTreeSizeRecurs = et.subTrees.Count; return(et); }
private static void OutputSplits(List <MsdSplitTree> splits, List <LemmaExample> el, double weightInitial, MsdSpec msdSpec, int level) { Console.Write(new string(' ', level * 2)); Console.WriteLine("Trying to split {0} examples with {1} ambiguities", el.Count, weightInitial); foreach (MsdSplitTree exmpTree in splits) { int attrId = exmpTree.attrId; string attrName = msdSpec.attrIdToNameMap[attrId]; StringBuilder sbSubGroups = new StringBuilder(); foreach (KeyValuePair <char, MsdSplitTree> elSub in exmpTree.subTrees) { sbSubGroups.AppendFormat(" {0}:{1}", elSub.Key, elSub.Value.exampleList.Count); } Console.Write(new string(' ', level * 2)); Console.WriteLine(" Attr: {0,2}, Ambig: Res={1,4} Rem={2,4}, AttrName: {3}, SplitTo: {4} classes: {5}", attrId, weightInitial - exmpTree.ambigChild, exmpTree.ambigChild, attrName, exmpTree.subTrees.Count, sbSubGroups); } }
private static List <MsdSplitTree> ProduceOrderedSplits(List <LemmaExample> el, double weightInitial, MsdSpec msdSpec) { List <MsdSplitTree> splits = new List <MsdSplitTree>(); for (int attrId = 0; attrId < msdSpec.AttrCount; attrId++) { splits.Add(SplitByMsdAttribute(el, attrId, msdSpec)); } splits.Sort(CompareTreesAbmibuitiesAsc); return(splits); }
private static Dictionary <char, MsdSplitTree> RecursiveSplit(MsdSplitTree bestTree, MsdSpec msdSpec, int level, BeamSearchParams beamParams) { Dictionary <char, MsdSplitTree> newSubTrees = new Dictionary <char, MsdSplitTree>(); bestTree.ambigRecurs = 0; bestTree.ambigChild = 0; bestTree.subTreeSizeRecurs = 0; foreach (KeyValuePair <char, MsdSplitTree> kvp in bestTree.subTrees) { MsdSplitTree subTree = kvp.Value; MsdSplitTree newSubTree = subTree; if (subTree.ambigChild > 0) { newSubTree = RecursiveSplitBeam(subTree.exampleList, subTree.ambigChild, msdSpec, level + 1, beamParams); } if (newSubTree == null) { newSubTree = subTree; } newSubTrees.Add(kvp.Key, newSubTree); bestTree.ambigRecurs += newSubTree.ambigRecurs; bestTree.ambigChild += newSubTree.ambigThis; bestTree.subTreeSizeRecurs += newSubTree.subTreeSizeRecurs; } return(newSubTrees); }
private static MsdSplitTree RecursiveSplitBeam(List <LemmaExample> el, double weightInitial, MsdSpec msdSpec, int level, BeamSearchParams beamParams) { List <MsdSplitTree> splits = ProduceOrderedSplits(el, weightInitial, msdSpec); //OutputSplits(splits, el, weightInitial, msdSpec, level); List <MsdSplitTree> beamSplits = new List <MsdSplitTree>(); int beamSize = 1; if (beamParams.beamsPerLevel != null && beamParams.beamsPerLevel.ContainsKey(level)) { beamSize = Math.Min(beamParams.beamsPerLevel[level], splits.Count); } for (int beam = 0; beam < beamSize; beam++) { MsdSplitTree bestTree = splits[beam]; if (bestTree.ambigChild < weightInitial) { Dictionary <char, MsdSplitTree> newSubTrees = RecursiveSplit(bestTree, msdSpec, level, beamParams); bestTree.subTrees = newSubTrees; bestTree.ambigThis = weightInitial; beamSplits.Add(bestTree); } } if (beamSplits.Count == 0) { return(null); } if (beamSplits.Count == 1) { return(beamSplits[0]); } beamSplits.Sort(CompareTreesRecurSizeAsc); MsdSplitTree best = beamSplits[0]; best.beamSiblings = beamSplits; return(best); }
private static MsdSplitTree Split(List <LemmaExample> el, MsdSpec msdSpec, BeamSearchParams beamParams) { double weightInitial = GetListAmbiguities(el); return(RecursiveSplitBeam(el, weightInitial, msdSpec, 0, beamParams)); }