public Node(string w, string T, double sc, double ps, Node prev) { word = w; tag = T; score = sc; PathScore = ps; PrevNode = prev; }
static void Main(string[] args) { string testPath = args[0]; string BoundaryPath = args[1]; string ModelFile = args[2]; string OutFile = args[3]; double BeamSize = Convert.ToDouble(args[4]); int TopN = Convert.ToInt32(args[5]); int TopK = Convert.ToInt32(args[6]); Dictionary<string, Dictionary<string, double>> ClassFeatureDict = new Dictionary<string, Dictionary<string, double>>(); List<String> TagList = new List<string>(); //Reads Model File and builds the required DS ReadModelFile(ClassFeatureDict, ModelFile,TagList); List<int> LineBoundaries = new List<int>(); ReadBoundaryFile(BoundaryPath, LineBoundaries); Stopwatch stopwatch = Stopwatch.StartNew(); // we are reading test data from here on string line, prevT="", prev2Tag=""; int docCount = 0,BoundaryValue=0,curWordNum=1; double Correct = 0,totalLines=0; double score = 0; List<Node> Parent = new List<Node>(); StreamWriter Sw = new StreamWriter(OutFile); Sw.WriteLine("%%%%% test data:"); using(StreamReader Sr = new StreamReader(testPath)) { while ((line = Sr.ReadLine()) != null) { if (String.IsNullOrWhiteSpace(line)) continue; //score refers to the prob value for this node. it has to be set to zero for every word score = 0; totalLines++; List<Node> CurCandidates = new List<Node>(); if(curWordNum > BoundaryValue) { // this means we have moved to a new training instance BoundaryValue = LineBoundaries[docCount++]; curWordNum = 1; Node Root = new Node("BOS", "BOS", 1,0); //parents from previous test instance can be cleared when we are in a new one Parent.Clear(); Parent.Add(Root); } string[] words = line.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries); List<String> Features = new List<string>(); string curWord = "",word=""; double maxScore = double.MinValue; //TODO : indexed from 2 because we dont want the instance name and gold standard for now for (int i = 2; i < words.Length; i++) { if (words[i].Contains("curW=")) { curWord = words[i]; word = curWord.Substring(curWord.IndexOf("=")+1); } //TODO:Check if feature Value is zero if (words[i].Trim() !="1") Features.Add(words[i]); } curWord = curWord.Trim(); //this is for storign previous tags List<string> AddFeatures = new List<string>(); List<String> ClassList = new List<string>(); foreach (var nd in Parent) { AddFeatures.Clear(); prevT = nd.tag; if (nd.PrevNode != null) prev2Tag = nd.PrevNode.tag; else prev2Tag = "BOS"; AddFeatures.Add("prevT=" + prevT); AddFeatures.Add("prevTwoTags=" + prev2Tag + "+" + prevT); //this loop handles topN condition List<Node> CandForTopN = new List<Node>(); double normalizerPerParent = 0; foreach (var classLabel in TagList) { score = 0; var ClassLambda = ClassFeatureDict[classLabel]; //we are goign to get the value for this path foreach (var feat in Features) { if (ClassLambda.ContainsKey(feat)) { score += ClassLambda[feat]; } } // this is to account for prev tags foreach (var feat in AddFeatures) { if (ClassLambda.ContainsKey(feat)) score += ClassLambda[feat]; } score += ClassLambda["<default>"]; score = System.Math.Exp(score); normalizerPerParent += score; //we will update the pathScore later Node Cand = new Node(word, classLabel, score,0,nd); CandForTopN.Add(Cand); } CandForTopN = CandForTopN.OrderByDescending(x => x.score).Take(TopN).ToList(); foreach (var cand in CandForTopN) { cand.score /= normalizerPerParent; } foreach (var TopNNode in CandForTopN) { CurCandidates.Add(TopNNode); } } foreach (var cd in CurCandidates) { //cd.score = cd.score / totalScoreAcrossAllPaths; cd.PathScore = System.Math.Log10(cd.score) + (cd.PrevNode.PathScore); //this will help in pruning by beam size if (cd.PathScore > maxScore) maxScore = cd.PathScore; } //update parents for the next word Parent.Clear(); //Order by the whole path score and just take the topK candidate. We will prune further with beam. CurCandidates = CurCandidates.OrderByDescending(x => x.PathScore).Take(TopK).ToList(); foreach (var cd in CurCandidates) { if(cd.PathScore == maxScore) { Sw.WriteLine(words[0]+" "+ words[1] +" " +cd.tag+" "+cd.score); if (words[1] == cd.tag) Correct++; } if (cd.PathScore + BeamSize >= maxScore) Parent.Add(cd); else break; } curWordNum++; } } stopwatch.Stop(); Console.WriteLine("Time elapsed: {0}", stopwatch.Elapsed); Console.WriteLine("Accuracy : " + Convert.ToString(Correct/totalLines)); Sw.Close(); Console.ReadLine(); }