public ForwardBackwordAlgo(List<string> inputSentence, WeightVector wc, List<string> tagList) { _inputSentence = inputSentence; _wc = wc; _tagList = tagList; _alphaDictionary = new Dictionary<int, Dictionary<string, double>>(); _betaDictionary = new Dictionary<int, Dictionary<string, double>>(); _uDictionary = new Dictionary<int, Dictionary<string, double>>(); UabDictionary = new Dictionary<int, Dictionary<string, double>>(); Z = 0; _weightedFeaturesum = new WeightedFeatureSum(wc, inputSentence, true); cList = new List<double>(_inputSentence.Count); dList = new List<double>(_inputSentence.Count); _useScaling = true; _useLog = false; _twoGramsList = new string[4]; var ngramTags = new Tags(_tagList); int index = 0; foreach (var ngram in ngramTags.GetNGramTags(2)) { if (index >= _twoGramsList.Length) { Array.Resize(ref _twoGramsList, index + 1); } string[] split = ngram.Split(new[] { ':' }); _twoGramsList[index] = split[0] + "@#" + split[1]; index++; } }
public ForwardBackwordAlgo(List<string> inputSentence, WeightVector wc, List<string> tagList) { _inputSentence = inputSentence; _wc = wc; _tagList = tagList; _tags = new Tags(tagList); _alphaDictionary = new Dictionary<int, Dictionary<string, double>>(); _betaDictionary = new Dictionary<int, Dictionary<string, double>>(); _uDictionary = new Dictionary<int, Dictionary<string, double>>(); UabDictionary = new Dictionary<int, Dictionary<string, double>>(); Z = 0; _weightedFeaturesum = new WeightedFeatureSum(wc, inputSentence, true); }
public List<string> Decode(List<string> inputSentance, bool debug, out List<string> debugList) { var outputTags = new string[(inputSentance.Count)]; var weightedFeatureSum = new WeightedFeatureSum(WeightVector, inputSentance); var init = new Dictionary<string, double> {{"*:*", 0}}; Pi.Add(init); var lastTwo = string.Empty; double lastTwoTagsValue = -0xFFFF; int k; debugList = new List<string>(inputSentance.Count); for (k = 0; k < inputSentance.Count; k++ ) { double max = - 0xFFFF; Pi.Add(new Dictionary<string, double>()); Bp.Add(new Dictionary<string, string>()); foreach (var tagStr in Tags.GetNGramTags(k == 0 ? 1 : 2)) { // follow algo from notes; var tagsKey = tagStr; double current; if (k > 1) { var split = tagStr.Split(new char[] {':'}); foreach (var t in Tags.GetNGramTags(1)) { string debugStr; var newTemp = t+ ":"+ tagsKey; Initialize(k - 1, t + ":" + split[0]); double newWeight = weightedFeatureSum.GetFeatureValue(newTemp, k, debug, out debugStr); current = Pi[k - 1][t + ":" + split[0]] + newWeight; if (current > max) { max = current; outputTags[k] = split[1]; if (debug) { debugList.Insert(k, debugStr); } } Initialize(k, tagsKey); if (!(current > Pi[k][tagsKey])) continue; Pi[k][tagsKey] = current; Bp[k][tagsKey] = t; } } else { if (k == 0) { tagsKey = "*:" + tagsKey; } var split = tagsKey.Split(new char[]{':'}); var newTemp = "*:" + tagsKey; string debugStr; current = weightedFeatureSum.GetFeatureValue(newTemp, k, debug, out debugStr); Initialize(k, tagsKey); if (current > Pi[k][tagsKey]) { Pi[k][tagsKey] = current; Bp[k][tagsKey] = "*"; } if (current > max) { max = current; outputTags[k] = split[1]; if (debug) debugList.Insert(k, debugStr); } } if (k != inputSentance.Count - 1) continue; //var temp = tagsKey + ":STOP"; //current = Pi[k][tagsKey] + weightedFeatureSum.GetFeatureValue(temp, k + 1); current = Pi[k][tagsKey]; if (!(current >= lastTwoTagsValue)) continue; lastTwo = tagsKey; lastTwoTagsValue = current; } } var n = inputSentance.Count - 1; var lastTwoSplit = lastTwo.Split(new char[] {':'}); if (lastTwoSplit.Count() != 2) { throw new Exception("count mismatch for lastTwo tags"+ lastTwo); } if (n-1 >= 0) outputTags[n-1] = lastTwoSplit[0]; outputTags[n] = lastTwoSplit[1]; //for (k = n - 2; k >= 0; k--) //{ // outputTags[k] = Bp[k + 2][outputTags[k + 1] + ":" + outputTags[k + 2]]; //} return outputTags.ToList(); }
public List<string> DecodeNew(List<string> inputSentance, bool debug, out List<string> debugList) { var outputTags = new string[(inputSentance.Count)]; var weightedFeatureSum = new WeightedFeatureSum(WeightVector, inputSentance); var init = new Dictionary<string, double> {{"*:*", 0}}; Pi.Add(init); int k; debugList = new List<string>(inputSentance.Count); for (k = 0; k < inputSentance.Count; k++ ) { double max = - 0xFFFF; Pi.Add(new Dictionary<string, double>()); Bp.Add(new Dictionary<string, string>()); var previousTag = "*"; var prePreviousTag = "*"; if (k > 0) { previousTag = outputTags[k - 1]; if (k > 1) { prePreviousTag = outputTags[k - 2]; } } foreach (var t in Tags.GetNGramTags(1)) { string debugStr; var tagsKey = previousTag + ":" + t; var newTemp = prePreviousTag + ":" + tagsKey ; if (k > 0) { Initialize(k - 1, previousTag); } var newWeight = weightedFeatureSum.GetFeatureValue(newTemp, k, debug, out debugStr); var current = newWeight; Initialize(k, t); if (current > max) { max = current; outputTags[k] = t; Pi[k][t] = current; if (debug) { debugList.Insert(k, debugStr); } } } } // second pass over sentence. // fix cases like //New LOCATION OTHER //Line LOCATION LOCATION //Cinema LOCATION LOCATION //for (int i = 1; i < outputTags.Length; i++) //{ // if (outputTags[i].Equals("LOCATION") && // outputTags[i-1].Equals("OTHER") && // char.IsUpper(inputSentance[i-1][0])) // { // var preWord = Features.RemoveSymbols(inputSentance[i - 1]); // if (preWord.Length == inputSentance[i - 1].Length) // { // outputTags[i - 1] = "LOCATION"; // } // } //} return outputTags.ToList(); }