public static void WriteDatasetOrange(string fileName, string nullVal) { StreamWriter writer = new StreamWriter(fileName); StringBuilder line = new StringBuilder(); line.Append("Tagger\t"); line.Append("Prev_POS_1\tPrev_POS_2\t"); int c = 3; foreach (string attr in mAttrSet) { line.Append(string.Format("Prev_{0}_1\t", attr)); line.Append(string.Format("Prev_{0}_2\t", attr)); c += 2; } line.Append("POS_1\tPOS_2\tAgree_POS\t"); c += 3; foreach (string attr in mAttrSet) { line.Append(string.Format("{0}_1\t", attr)); line.Append(string.Format("{0}_2\t", attr)); line.Append(string.Format("Agree_{0}\t", attr)); c += 3; } line.Append("Next_POS_1\tNext_POS_2\t"); c += 2; foreach (string attr in mAttrSet) { line.Append(string.Format("Next_{0}_1\t", attr)); line.Append(string.Format("Next_{0}_2\t", attr)); c += 2; } writer.WriteLine(line.ToString().TrimEnd('\t')); line = new StringBuilder(); for (int i = 0; i < c; i++) { line.Append("discrete\t"); } writer.WriteLine(line.ToString().TrimEnd('\t')); writer.WriteLine("class"); for (int i = 0; i < mItems.Count; i++) { Dictionary <string, string> attrVal1; Dictionary <string, string> attrVal2; MetaTaggerDataEntry entry = mItems[i]; if (entry.Tag1 != entry.Tag2 && (entry.GoldTag == entry.Tag1 || entry.GoldTag == entry.Tag2)) // the two taggers disagree, one of them is correct { line = new StringBuilder(); line.Append(entry.GoldTag == entry.Tag1 ? "Tagger1" : "Tagger2"); line.Append("\t"); // previous word if (i == 0) { c = 2 + mAttrSet.Count * 2; for (int j = 0; j < c; j++) { line.Append(nullVal); line.Append("\t"); } } else { MetaTaggerDataEntry prevEntry = mItems[i - 1]; string prevPos1 = prevEntry.Tag1.Length > 0 ? prevEntry.Tag1[0].ToString() : nullVal; string prevPos2 = prevEntry.Tag2.Length > 0 ? prevEntry.Tag2[0].ToString() : nullVal; line.Append(prevPos1); line.Append("\t"); line.Append(prevPos2); line.Append("\t"); attrVal1 = mAttr[prevEntry.Tag1]; attrVal2 = mAttr[prevEntry.Tag2]; foreach (string attr in mAttrSet) { line.Append(attrVal1.ContainsKey(attr) ? attrVal1[attr] : nullVal); line.Append("\t"); line.Append(attrVal2.ContainsKey(attr) ? attrVal2[attr] : nullVal); line.Append("\t"); } } // current word string pos1 = entry.Tag1.Length > 0 ? entry.Tag1[0].ToString() : nullVal; string pos2 = entry.Tag2.Length > 0 ? entry.Tag2[0].ToString() : nullVal; line.Append(pos1); line.Append("\t"); line.Append(pos2); line.Append("\t"); line.Append(pos1 == pos2 ? "yes" : "no"); line.Append("\t"); attrVal1 = mAttr[entry.Tag1]; attrVal2 = mAttr[entry.Tag2]; foreach (string attr in mAttrSet) { string attr1 = attrVal1.ContainsKey(attr) ? attrVal1[attr] : nullVal; string attr2 = attrVal2.ContainsKey(attr) ? attrVal2[attr] : nullVal; line.Append(attr1); line.Append("\t"); line.Append(attr2); line.Append("\t"); line.Append(attr1 == attr2 ? "yes" : "no"); line.Append("\t"); } // next word if (i == mItems.Count - 1) { c = 2 + mAttrSet.Count * 2; for (int j = 0; j < c; j++) { line.Append(nullVal); line.Append("\t"); } } else { MetaTaggerDataEntry nextEntry = mItems[i + 1]; string nextPos1 = nextEntry.Tag1.Length > 0 ? nextEntry.Tag1[0].ToString() : nullVal; string nextPos2 = nextEntry.Tag2.Length > 0 ? nextEntry.Tag2[0].ToString() : nullVal; line.Append(nextPos1); line.Append("\t"); line.Append(nextPos2); line.Append("\t"); attrVal1 = mAttr[nextEntry.Tag1]; attrVal2 = mAttr[nextEntry.Tag2]; foreach (string attr in mAttrSet) { line.Append(attrVal1.ContainsKey(attr) ? attrVal1[attr] : nullVal); line.Append("\t"); line.Append(attrVal2.ContainsKey(attr) ? attrVal2[attr] : nullVal); line.Append("\t"); } } writer.WriteLine(line.ToString().TrimEnd('\t')); } } writer.Close(); }
public static ArrayList <KeyDat <string, string> > CreateExample(int idx) { MetaTaggerDataEntry prevEntry = idx > 0 ? mItems[idx - 1] : null; MetaTaggerDataEntry entry = mItems[idx]; MetaTaggerDataEntry nextEntry = idx < mItems.Count - 1 ? mItems[idx + 1] : null; ArrayList <KeyDat <string, string> > example = new ArrayList <KeyDat <string, string> >(); Dictionary <string, string> attrVal1; Dictionary <string, string> attrVal2; // previous word if (prevEntry != null) { attrVal1 = mAttr[prevEntry.Tag1]; attrVal2 = mAttr[prevEntry.Tag2]; if (prevEntry.Tag1.Length > 0) { example.Add(new KeyDat <string, string>("Prev_POS_1", prevEntry.Tag1[0].ToString())); } if (prevEntry.Tag2.Length > 0) { example.Add(new KeyDat <string, string>("Prev_POS_2", prevEntry.Tag2[0].ToString())); } foreach (string attr in mAttrSet) { if (attrVal1.ContainsKey(attr)) { example.Add(new KeyDat <string, string>(string.Format("Prev_{0}_1", attr), attrVal1[attr])); } if (attrVal2.ContainsKey(attr)) { example.Add(new KeyDat <string, string>(string.Format("Prev_{0}_2", attr), attrVal2[attr])); } } } // current word attrVal1 = mAttr[entry.Tag1]; attrVal2 = mAttr[entry.Tag2]; string pos1 = entry.Tag1.Length > 0 ? entry.Tag1[0].ToString() : null; string pos2 = entry.Tag2.Length > 0 ? entry.Tag2[0].ToString() : null; if (pos1 != null) { example.Add(new KeyDat <string, string>("POS_1", pos1)); } if (pos2 != null) { example.Add(new KeyDat <string, string>("POS_2", pos2)); } example.Add(new KeyDat <string, string>("Agree_POS", pos1 == pos2 ? "yes" : "no")); foreach (string attr in mAttrSet) { string attr1 = attrVal1.ContainsKey(attr) ? attrVal1[attr] : null; string attr2 = attrVal2.ContainsKey(attr) ? attrVal2[attr] : null; if (attr1 != null) { example.Add(new KeyDat <string, string>(string.Format("{0}_1", attr), attr1)); } if (attr2 != null) { example.Add(new KeyDat <string, string>(string.Format("{0}_2", attr), attr2)); } example.Add(new KeyDat <string, string>(string.Format("Agree_{0}", attr), attr1 == attr2 ? "yes" : "no")); } // next word if (nextEntry != null) { attrVal1 = mAttr[nextEntry.Tag1]; attrVal2 = mAttr[nextEntry.Tag2]; if (nextEntry.Tag1.Length > 0) { example.Add(new KeyDat <string, string>("Next_POS_1", nextEntry.Tag1[0].ToString())); } if (nextEntry.Tag2.Length > 0) { example.Add(new KeyDat <string, string>("Next_POS_2", nextEntry.Tag2[0].ToString())); } foreach (string attr in mAttrSet) { if (attrVal1.ContainsKey(attr)) { example.Add(new KeyDat <string, string>(string.Format("Next_{0}_1", attr), attrVal1[attr])); } if (attrVal2.ContainsKey(attr)) { example.Add(new KeyDat <string, string>(string.Format("Next_{0}_2", attr), attrVal2[attr])); } } } return(example); }