private int lm_ngram_prob(string strText, int start, int end, ref double probability) { NGram lm_ngram = new NGram(); // get the longest ngram conditional prob in LM int j; for (j = start; j <= end; j++) { string words = GenerateNGram(strText, j); int offset = daSearch.SearchByPerfectMatch(words); if (offset >= 0) { lm_ngram = lm_prob[offset]; break; } } if (j > end) { return(1);// OOV } else if (j == start) { probability = lm_ngram.prob; return(0); // exact ngram in LM } double prob = lm_ngram.prob; double bow = 0; // get bows starting from the longest ngram prob to the original ngram // exclude the last word, set temp buffer end string[] ngrams = strText.Split(); strText = String.Join(" ", ngrams, 0, ngrams.Length - 1); for (j--; j >= start; j--) { string words = GenerateNGram(strText, j); int offset = daSearch.SearchByPerfectMatch(words); if (offset < 0) { break; } bow += lm_prob[offset].bow; } probability = prob + bow; return(0); }
//Extract feature id list from given record and start position public List <int> GetFeatureIds(List <string[]> record, int startX) { //Get the feature string var featureList = GenerateFeature(record, startX); //Check if the feature string has already built into feature set, //If yes, save the feature id, otherwise, ignore the feature return (featureList.Select(strFeature => daSearch.SearchByPerfectMatch(strFeature)) .Where(id => id >= 0) .ToList()); }
//Extract feature id list from given record and start position public List <int> GetFeatureIds(List <string[]> record, int startX) { //Get the feature string List <string> featureList = GenerateFeature(record, startX); List <int> featureIdList = new List <int>(); //Check if the feature string has already built into feature set, //If yes, save the feature id, otherwise, ignore the feature foreach (string strFeature in featureList) { int id = daSearch.SearchByPerfectMatch(strFeature); if (id >= 0) { featureIdList.Add(id); } } return(featureIdList); }
//Verify whether double array trie-tree correct //strTextFileName: raw text file name used to build DA trie-tree // text file format: key \t value // key as string type // value as non-netgive integer //strDAFileName: double array trie-tree binary file name built from strTextFileName private static void Verify(string strTextFileName, string strDAFileName) { StreamReader sr = new StreamReader(strTextFileName); DoubleArrayTrieSearch das = new DoubleArrayTrieSearch(); das.Load(strDAFileName); while (sr.EndOfStream == false) { string strLine = sr.ReadLine(); if (strLine.Length == 0) { continue; } string[] items = strLine.Split('\t'); int val = int.Parse(items[1]); string fea = items[0]; int rval = das.SearchByPerfectMatch(fea); if (rval != val) { Console.WriteLine("Values in raw text file and double array trie-tree is different"); Console.WriteLine("Key-Value in text file: {0}", strLine); Console.WriteLine("Value in DA trie: {0}", rval); } } //Test SearchAsKeyPrefix function. //TestSearchPrefix_case0, TestSearchPrefix_case01, TestSearchPrefix_case012 should be in result list List<int> resultList = new List<int>(); int rlistCnt = das.SearchAsKeyPrefix("TestSearchPrefix_case012", resultList); //Test SearchByPrefix resultList = new List<int>(); rlistCnt = das.SearchByPrefix("TestSearchPrefix_case0", resultList); rlistCnt = das.SearchByPrefix("U04:京", resultList); Console.WriteLine("Done!"); }
//Verify whether double array trie-tree correct //strTextFileName: raw text file name used to build DA trie-tree // text file format: key \t value // key as string type // value as non-netgive integer //strDAFileName: double array trie-tree binary file name built from strTextFileName private static void Verify(string strTextFileName, string strDAFileName) { StreamReader sr = new StreamReader(strTextFileName); DoubleArrayTrieSearch das = new DoubleArrayTrieSearch(); das.Load(strDAFileName); while (sr.EndOfStream == false) { string strLine = sr.ReadLine(); if (strLine.Length == 0) { continue; } string[] items = strLine.Split('\t'); int val = int.Parse(items[1]); string fea = items[0]; int rval = das.SearchByPerfectMatch(fea); if (rval != val) { Console.WriteLine("Values in raw text file and double array trie-tree is different"); Console.WriteLine("Key-Value in text file: {0}", strLine); Console.WriteLine("Value in DA trie: {0}", rval); } } //Test SearchAsKeyPrefix function. //TestSearchPrefix_case0, TestSearchPrefix_case01, TestSearchPrefix_case012 should be in result list List <int> resultList = new List <int>(); int rlistCnt = das.SearchAsKeyPrefix("TestSearchPrefix_case012", resultList); //Test SearchByPrefix resultList = new List <int>(); rlistCnt = das.SearchByPrefix("TestSearchPrefix_case0", resultList); rlistCnt = das.SearchByPrefix("U04:京", resultList); Console.WriteLine("Done!"); }
//获取key对应的特征id public int get_id(string str) { return(da.SearchByPerfectMatch(str)); }