public ForwardBackwordAlgo(List<string> inputSentence, WeightVector wc, List<string> tagList) { _inputSentence = inputSentence; _wc = wc; _tagList = tagList; _alphaDictionary = new Dictionary<int, Dictionary<string, double>>(); _betaDictionary = new Dictionary<int, Dictionary<string, double>>(); _uDictionary = new Dictionary<int, Dictionary<string, double>>(); UabDictionary = new Dictionary<int, Dictionary<string, double>>(); Z = 0; _weightedFeaturesum = new WeightedFeatureSum(wc, inputSentence, true); cList = new List<double>(_inputSentence.Count); dList = new List<double>(_inputSentence.Count); _useScaling = true; _useLog = false; _twoGramsList = new string[4]; var ngramTags = new Tags(_tagList); int index = 0; foreach (var ngram in ngramTags.GetNGramTags(2)) { if (index >= _twoGramsList.Length) { Array.Resize(ref _twoGramsList, index + 1); } string[] split = ngram.Split(new[] { ':' }); _twoGramsList[index] = split[0] + "@#" + split[1]; index++; } }
public void Setup(bool debug) { var readModel = new ReadModel(InputModelFile); var temp = new ReadModel(string.Concat(InputModelFile, ".featuresToK")); _weightVector = new WeightVector(temp.GetFeatureToKdDictionary()); foreach (var pair in readModel.ModelIterator()) { _weightVector.Add(pair); } _tags = new Tags(_tagList); _viterbiForGlobalLinearModel = new ViterbiForGlobalLinearModel(_weightVector, _tags); // read input file in a class and per line iterator. var inputData = new ReadInputData(InputTestFile); var writeModel = new WriteModel(_outputTestFile); foreach (var line in inputData.GetSentence()) { List<string> debugList; var outputTags = _viterbiForGlobalLinearModel.Decode(line, debug, out debugList); if (debug) { writeModel.WriteDataWithTagDebug(line, outputTags, debugList); } else { writeModel.WriteDataWithTag(line, outputTags); } } writeModel.Flush(); }
public ViterbiForGlobalLinearModel(WeightVector weightVector, Tags tags) { WeightVector = weightVector; Tags = tags; Pi = new List<Dictionary<string, double>>(); Bp = new List<Dictionary<string, string>>(); }
public ComputeGradient(List<List<string>> inputSentence, List<List<string>> tagsList, List<string> tagList, double lambda, double learningParam, FeatureCache cache, WriteModel logger) { Logger = logger; _inputSentence = inputSentence; _outputTagsList = tagsList; _tagList = tagList; _lambda = lambda; _learningParam = learningParam; _cache = cache; forwardBackwordAlgos = new List<ForwardBackwordAlgo>(); _weightVector = null; _twoGramsList = new string[4]; _twoGramPair = new KeyValuePair<string, string>[4]; var ngramTags = new Tags(_tagList); int index = 0; foreach (var ngram in ngramTags.GetNGramTags(2)) { if (index >= _twoGramsList.Length) { Array.Resize(ref _twoGramsList, index+1); Array.Resize(ref _twoGramPair, index + 1); } string[] split = ngram.Split(new[] { ':' }); _twoGramsList[index] = split[0] +"@#"+ split[1]; _twoGramPair[index] = new KeyValuePair<string, string>(split[0], split[1]); index++; } }
public MapFeaturesToK(string outputFile, List<string> tagList) { _writeModel = new WriteModel(outputFile); _tagList = tagList; DictFeaturesToK = new Dictionary<string, int>(); DictKToFeatures = new Dictionary<int, string>(); FeatureCount = 0; _tags = new Tags(tagList); }
public ForwardBackwordAlgo(List<string> inputSentence, WeightVector wc, List<string> tagList) { _inputSentence = inputSentence; _wc = wc; _tagList = tagList; _tags = new Tags(tagList); _alphaDictionary = new Dictionary<int, Dictionary<string, double>>(); _betaDictionary = new Dictionary<int, Dictionary<string, double>>(); _uDictionary = new Dictionary<int, Dictionary<string, double>>(); UabDictionary = new Dictionary<int, Dictionary<string, double>>(); Z = 0; _weightedFeaturesum = new WeightedFeatureSum(wc, inputSentence, true); }
public Perceptron(string inputFile, string outputFile, List<string> tagList) { _inputFile = inputFile; _outputFile = outputFile; var tags = new Tags(tagList); MapFeatures = new MapFeaturesToK(inputFile, string.Concat(outputFile, ".featuresToK"), tagList); MapFeatures.StartMapping(); WeightVector = new WeightVector(MapFeatures.DictFeaturesToK, MapFeatures.FeatureCount); _viterbiForGlobalLinearModel = new ViterbiForGlobalLinearModel(WeightVector, tags); InputSentences = new List<List<string>>(); TagsList = new List<List<string>>(); ReadInputs(); }
public Perceptron(List<string> inputFiles, string outputFile, List<string> tagList, bool useAvg = false) { _outputFile = outputFile; _useAvg = useAvg; var tags = new Tags(tagList); MapFeatures = new MapFeaturesToK(string.Concat(outputFile, ".featuresToK"), tagList); MapFeatures.StartMapping(inputFiles); WeightVector = new WeightVector(MapFeatures.DictFeaturesToK, MapFeatures.FeatureCount); AvgWeightVector = new WeightVector(MapFeatures.DictFeaturesToK, MapFeatures.FeatureCount); _viterbiForGlobalLinearModel = new ViterbiForGlobalLinearModel(WeightVector, tags); InputSentences = new List<List<string>>(); TagsList = new List<List<string>>(); //ReadInputs(); }
public void Init() { var readModel = new ReadModel(InputModelFile + ".preceptron"); var temp = new ReadModel(string.Concat(InputModelFile, ".featuresToK")); var dict = temp.GetFeatureToKdDictionary(); _weightVector = new WeightVector(dict, dict.Count); foreach (var pair in readModel.ModelIterator()) { _weightVector.Add(pair); } _tags = new Tags(_tagList); ViterbiForGLM = new ViterbiForGlobalLinearModel(_weightVector, _tags); }
public void CreateCache() { Console.WriteLine(DateTime.Now + "creating Cache begin"); var ngramTags = new Tags(_tags); foreach (var ngramTag in ngramTags.GetNGramTags(2)) { string[] split = ngramTag.Split(new[] { ':' }); for (var lineIndex = 0; lineIndex < Sentences.Count; lineIndex++) { for (var pos = 0; pos < Sentences[lineIndex].Count; pos++) { if (pos == 0) { StoreFeature("*", split[1], pos, lineIndex); } else { StoreFeature(split[0], split[1], pos, lineIndex); } } } } Console.WriteLine(DateTime.Now + "creating Cache end"); }
/* static void Test() { const string inputFile = "../../data/tag.model"; //const string outputFile = "../../test.output1"; var readModel = new ReadModel(inputFile); //var writeModel = new WriteModel(outputFile); var weightVector = new WeightVector(); var tags = new List<string> {"I-GENE", "O"}; PrintFeatureList(tags); foreach (var pair in readModel.ModelIterator()) { weightVector.Add(pair); } //writeModel.WriteLine(line); //writeModel.Flush(); } */ static void PrintFeatureList(List<string> tags) { var featureTags = new Tags(tags); featureTags.Dump(3); }
private double CalculateGradient(List<string> outputTags, int k, Tags ngramTags, int lineIndex) { double output = 0; double secondTerm = 0; //output += weightedFeatureSum.GetAllFeatureK(outputTags, k, sentence); output += GetAllFeatureKFromCache(outputTags, k, lineIndex); // second term. for (var pos = 0; pos < outputTags.Count; pos++) { //double sum = 0; secondTerm += GetSecondTerm(ngramTags, lineIndex, pos, k); //foreach (var ngramTag in ngramTags.GetNGramTags(2)) //{ // string[] split = ngramTag.Split(new[] { ':' }); // sum += (forwardBackwordAlgos[i].GetQ(j, split[0], split[1]) * // weightedFeatureSum.GetFeatureK(split[0], split[1], j, k, sentence)); //} //secondTerm += sum; } return output - secondTerm; }
private double GetSecondTerm(Tags ngramTags, int lineIndex, int pos, int k) { double sum = 0; foreach (var ngramTag in ngramTags.GetNGramTags(2)) { string[] split = ngramTag.Split(new[] { ':' }); if (_cache.Contains(split[0], split[1], k, pos, lineIndex)) { sum += (forwardBackwordAlgos[lineIndex].GetQ(pos, split[0], split[1]) * _weightVector.Get(k)); } //else //{ // sum += (forwardBackwordAlgos[lineIndex].GetQ(j, split[0], split[1]) * // weightedFeatureSum.GetFeatureK(split[0], split[1], j, k, sentence)); //} } return sum; }
private double Compute(int k, WeightVector weightVector) { double output = 0; //double secondTerm = 0; int lineIndex = 0; //var weightedFeaturesum = new WeightedFeatureSum(weightVector, null, true); if (_inputSentence.Count != _outputTagsList.Count) { throw new Exception("counts dont match "+ _inputSentence.Count + "with "+ _outputTagsList.Count); } var ngramTags = new Tags(_tagList); // first term. foreach (var sentence in _inputSentence) { var outputTags = _outputTagsList[lineIndex]; if (sentence.Count != outputTags.Count) { throw new Exception("compute counts dont match " + sentence.Count + "with " + outputTags.Count); } output += CalculateGradient(outputTags, k, ngramTags, lineIndex); //output += weightedFeaturesum.GetAllFeatureK(outputTags, k, sentence); //// second term. //for (var j = 0; j < outputTags.Count; j++) //{ // double sum = 0; // foreach (var ngramTag in ngramTags.GetNGramTags(2)) // { // string[] split = ngramTag.Split(new[] {':'}); // sum += (forwardBackwordAlgos[i].GetQ(j, split[0], split[1]) * // weightedFeaturesum.GetFeatureK(split[0], split[1], j, k, sentence)); // } // secondTerm += sum; //} lineIndex++; } output = output - (_lambda*weightVector.Get(k)); return output; }