예제 #1
0
        private int lm_ngram_prob(string strText, int start, int end, ref double probability)
        {
            NGram lm_ngram = new NGram();

            // get the longest ngram conditional prob in LM
            int j;

            for (j = start; j <= end; j++)
            {
                string words = GenerateNGram(strText, j);

                int offset = daSearch.SearchByPerfectMatch(words);
                if (offset >= 0)
                {
                    lm_ngram = lm_prob[offset];
                    break;
                }
            }

            if (j > end)
            {
                return(1);// OOV
            }
            else if (j == start)
            {
                probability = lm_ngram.prob;
                return(0);                       // exact ngram in LM
            }

            double prob = lm_ngram.prob;
            double bow  = 0;

            // get bows starting from the longest ngram prob to the original ngram
            // exclude the last word, set temp buffer end
            string[] ngrams = strText.Split();
            strText = String.Join(" ", ngrams, 0, ngrams.Length - 1);


            for (j--; j >= start; j--)
            {
                string words  = GenerateNGram(strText, j);
                int    offset = daSearch.SearchByPerfectMatch(words);
                if (offset < 0)
                {
                    break;
                }

                bow += lm_prob[offset].bow;
            }
            probability = prob + bow;
            return(0);
        }
예제 #2
0
        //Extract feature id list from given record and start position
        public List <int> GetFeatureIds(List <string[]> record, int startX)
        {
            //Get the feature string
            var featureList = GenerateFeature(record, startX);

            //Check if the feature string has already built into feature set,
            //If yes, save the feature id, otherwise, ignore the feature

            return
                (featureList.Select(strFeature => daSearch.SearchByPerfectMatch(strFeature))
                 .Where(id => id >= 0)
                 .ToList());
        }
예제 #3
0
        //Extract feature id list from given record and start position
        public List <int> GetFeatureIds(List <string[]> record, int startX)
        {
            //Get the feature string
            List <string> featureList   = GenerateFeature(record, startX);
            List <int>    featureIdList = new List <int>();

            //Check if the feature string has already built into feature set,
            //If yes, save the feature id, otherwise, ignore the feature
            foreach (string strFeature in featureList)
            {
                int id = daSearch.SearchByPerfectMatch(strFeature);
                if (id >= 0)
                {
                    featureIdList.Add(id);
                }
            }

            return(featureIdList);
        }
예제 #4
0
        //Verify whether double array trie-tree correct
        //strTextFileName: raw text file name used to build DA trie-tree
        //  text file format: key \t value
        //  key as string type
        //  value as non-netgive integer
        //strDAFileName: double array trie-tree binary file name built from strTextFileName
        private static void Verify(string strTextFileName, string strDAFileName)
        {
            StreamReader sr = new StreamReader(strTextFileName);
            DoubleArrayTrieSearch das = new DoubleArrayTrieSearch();

            das.Load(strDAFileName);
            while (sr.EndOfStream == false)
            {
                string strLine = sr.ReadLine();
                if (strLine.Length == 0)
                {
                    continue;
                }
                string[] items = strLine.Split('\t');
                int val = int.Parse(items[1]);
                string fea = items[0];

                int rval = das.SearchByPerfectMatch(fea);
                if (rval != val)
                {
                    Console.WriteLine("Values in raw text file and double array trie-tree is different");
                    Console.WriteLine("Key-Value in text file: {0}", strLine);
                    Console.WriteLine("Value in DA trie: {0}", rval);
                }
            }

            //Test SearchAsKeyPrefix function.
            //TestSearchPrefix_case0, TestSearchPrefix_case01, TestSearchPrefix_case012 should be in result list
            List<int> resultList = new List<int>();
            int rlistCnt = das.SearchAsKeyPrefix("TestSearchPrefix_case012", resultList);

            //Test SearchByPrefix
            resultList = new List<int>();
            rlistCnt = das.SearchByPrefix("TestSearchPrefix_case0", resultList);
            rlistCnt = das.SearchByPrefix("U04:京", resultList);

            Console.WriteLine("Done!");
        }
예제 #5
0
        //Verify whether double array trie-tree correct
        //strTextFileName: raw text file name used to build DA trie-tree
        //  text file format: key \t value
        //  key as string type
        //  value as non-netgive integer
        //strDAFileName: double array trie-tree binary file name built from strTextFileName
        private static void Verify(string strTextFileName, string strDAFileName)
        {
            StreamReader          sr  = new StreamReader(strTextFileName);
            DoubleArrayTrieSearch das = new DoubleArrayTrieSearch();

            das.Load(strDAFileName);
            while (sr.EndOfStream == false)
            {
                string strLine = sr.ReadLine();
                if (strLine.Length == 0)
                {
                    continue;
                }
                string[] items = strLine.Split('\t');
                int      val   = int.Parse(items[1]);
                string   fea   = items[0];

                int rval = das.SearchByPerfectMatch(fea);
                if (rval != val)
                {
                    Console.WriteLine("Values in raw text file and double array trie-tree is different");
                    Console.WriteLine("Key-Value in text file: {0}", strLine);
                    Console.WriteLine("Value in DA trie: {0}", rval);
                }
            }

            //Test SearchAsKeyPrefix function.
            //TestSearchPrefix_case0, TestSearchPrefix_case01, TestSearchPrefix_case012 should be in result list
            List <int> resultList = new List <int>();
            int        rlistCnt   = das.SearchAsKeyPrefix("TestSearchPrefix_case012", resultList);

            //Test SearchByPrefix
            resultList = new List <int>();
            rlistCnt   = das.SearchByPrefix("TestSearchPrefix_case0", resultList);
            rlistCnt   = das.SearchByPrefix("U04:京", resultList);

            Console.WriteLine("Done!");
        }
예제 #6
0
 //获取key对应的特征id
 public int get_id(string str)
 {
     return(da.SearchByPerfectMatch(str));
 }