static void CombineLabel() { string parsefilename = @"D:\user\nyang\tmp\train.pcfg.clean.txt"; string safilename = @"D:\user\nyang\tmp\train.c.txt"; string combinefilename = @"D:\user\nyang\tmp\train.cb.txt"; using (StreamReader srp = new StreamReader(parsefilename)) { using (StreamReader sra = new StreamReader(safilename)) { using (StreamWriter sw = new StreamWriter(combinefilename)) { while (!srp.EndOfStream && !sra.EndOfStream) { string parseline = srp.ReadLine(); string saline = sra.ReadLine(); PhrasalTree ptree = new PhrasalTree(parseline); ptree.RemoveUnaryRule(); PhrasalTree stree = new PhrasalTree(saline); int length = ptree.Root.End; var dict = new Dictionary<int, PhrasalNode>(); foreach (var node in ptree.TreeNodes) { int sig = node.Start * (length + 1) + node.End; dict [sig] = node; } foreach (var node in stree.TreeNodes) { int sig = node.Start * (length + 1) + node.End; var pnode = dict [sig]; var ptag = pnode.Tag; bool xbar = ptag [0] == '@'; if (xbar) { ptag = ptag.Substring(1); } //ptag = ptag.Substring(0, 1); if (xbar) { ptag += "x"; } node.Tag = node.Tag + ptag; } stree.Root.Tag = "S"; var xline = stree.GetParseLine(); sw.WriteLine(xline); } } } } }
static void ProcessBKOutput() { string inputfn = @"D:\user\nyang\tools\bkparser\wsj.23.s1.splitting.viterbi.out"; string outputfn = @"C:\Users\v-nayang\Downloads\EVALB\EVALB\wsj.23.s1.bk.out"; using (StreamReader sr = new StreamReader(inputfn)) { using (StreamWriter sw = new StreamWriter(outputfn)) { while (!sr.EndOfStream) { string line = sr.ReadLine().Trim(); if (string.IsNullOrWhiteSpace(line)) { continue; } if (!line.StartsWith("(") || line.StartsWith("()")) { continue; } string xline = "(TOP " + line + ")"; var tree = new PhrasalTree(xline); foreach (var node in tree.TreeNodes) { if (node.Children.Count == 0) { node.Lex = SimpleTokenizor.ETokenize(node.Lex); } } sw.WriteLine(tree.GetParseLine()); } } } }