示例#1
0
        static void Main(string[] args)
        {
            if (args.Length != 3 && args.Length != 1)
            {
                Console.WriteLine("QueryTermWeightAnalyzerConsole.exe [configuration file name] <input file name> <output file name>");
                Console.WriteLine("     [configuration file name] : a specified file name contains configuration items for analyzing");
                Console.WriteLine("     <input/output file name> : input/output file name contains input query for analyzing and output result");
                Console.WriteLine("Examples:");
                Console.WriteLine("     QueryTermWeightAnalyzerConsole.exe qt_analyzer.ini input.txt output.txt");
                Console.WriteLine("         Load queries from input.txt file, analyze and save result into output.txt file");
                Console.WriteLine("     QueryTermWeightAnalyzerConsole.exe qt_analyzer.ini");
                Console.WriteLine("         Load queries from console, analyze and output result to console");
                return;
            }

            if (File.Exists(args[0]) == false)
            {
                Console.WriteLine("Configuration file is not existed: {0}", args[0]);
                return;
            }

            StreamReader sr = null;
            StreamWriter sw = null;

            if (args.Length == 3)
            {
                if (File.Exists(args[1]) == false)
                {
                    Console.WriteLine("Input file {0} is not existed.", args[1]);
                    return;
                }
                sr = new StreamReader(args[1], Encoding.UTF8);
                sw = new StreamWriter(args[2], false, Encoding.UTF8);
            }
            else if (args.Length != 1)
            {
                Console.WriteLine("Invalidated parameters.");
                return;
            }

            Console.WriteLine("Start to initialize query term weight analyzer...");
            QueryTermWeightAnalyzer.QueryTermWeightAnalyzer analyzer = new QueryTermWeightAnalyzer.QueryTermWeightAnalyzer();
            if (analyzer.Initialize(args[0]) == false)
            {
                Console.WriteLine("Initialize the analyzer failed.");
                return;
            }
            Console.WriteLine("Done.");

            //Create working instance for each thread
            Instance instance = analyzer.CreateInstance();

            while (true)
            {
                string strLine = null;
                if (sr != null)
                {
                    strLine = sr.ReadLine();
                }
                else
                {
                    strLine = Console.ReadLine();
                }

                if (strLine == null || strLine.ToLower() == "quit")
                {
                    break;
                }

                List <Token> tknList;
                string[]     columns = strLine.Split('\t');

                tknList = analyzer.Analyze(instance, columns[0]);
                if (tknList == null)
                {
                    //Analyze term weight is failed.
                    Console.WriteLine("Failed to analyze {0}", columns[0]);
                    continue;
                }

                string strOutput = "";
                foreach (Token token in tknList)
                {
                    strOutput += token.strTerm + "[RANK_" + token.rankId.ToString() + ", " + token.rankingscore.ToString("0.00") + "] ";
                }

                if (sw != null)
                {
                    sw.WriteLine(strOutput.Trim());
                }
                else
                {
                    Console.WriteLine(strOutput.Trim());
                }
            }

            if (sr != null)
            {
                sr.Close();
            }

            if (sw != null)
            {
                sw.Close();
            }
        }
示例#2
0
        static void Main(string[] args)
        {
            if (args.Length != 4)
            {
                Console.WriteLine("RankingFeatureExtractor.exe [configuration file name] [input file name] [output file name] [corpus size]");
                return;
            }

            QueryTermWeightAnalyzer.QueryTermWeightAnalyzer analyzer = new QueryTermWeightAnalyzer.QueryTermWeightAnalyzer();
            if (analyzer.Initialize(args[0]) == false)
            {
                Console.WriteLine("Initialize the analyzer failed.");
                return;
            }
            Instance instance = analyzer.CreateInstance();

            StreamReader sr       = new StreamReader(args[1]);
            StreamWriter sw_train = new StreamWriter(args[2] + ".train");
            StreamWriter sw_test  = new StreamWriter(args[2] + ".test");
            int          maxSize  = int.Parse(args[3]);

            //Write column header into file (include feature set name)
            sw_train.WriteLine("m:Rating\tm:QueryId\tTerm\tQuery\t" + analyzer.GetFeatureName());
            sw_test.WriteLine("m:Rating\tm:QueryId\tTerm\tQuery\t" + analyzer.GetFeatureName());

            //Write all active feature name into file
            string strAF = analyzer.GetFeatureName();

            string[] afitems = strAF.Split('\t');
            File.WriteAllLines("activefeatures.txt", afitems);

            HashSet <string> setLine = new HashSet <string>();
            int g_id = 10000;
            int cnt  = 0;

            while (sr.EndOfStream == false)
            {
                string strLine = sr.ReadLine().Trim();
                if (setLine.Contains(strLine) == true)
                {
                    continue;
                }
                setLine.Add(strLine);

                //Parse training corpus
                string[]      items    = strLine.Split();
                List <string> termList = new List <string>();
                List <string> tagList  = new List <string>();
                StringBuilder sbQuery  = new StringBuilder();
                foreach (string item in items)
                {
                    int    pos     = item.LastIndexOf('[');
                    string strTerm = item.Substring(0, pos);
                    string strTag  = item.Substring(pos + 1, item.Length - pos - 2);

                    termList.Add(strTerm.ToLower());
                    tagList.Add(strTag);

                    sbQuery.Append(strTerm);
                }

                //Extract each term's features
                List <string> featureList = analyzer.ExtractFeature(instance, termList);
                if (featureList == null || featureList.Count != termList.Count)
                {
                    //Failed to analyze term weight
                    Console.WriteLine("Failed to analyze {0}", strLine);
                    continue;
                }

                //Format: m:Rating\tm:QueryId\tTerm\tQuery\tFeatureSet
                for (int i = 0; i < featureList.Count; i++)
                {
                    //The [0, maxSize] queries are for training corpus
                    //The [maxSize + 1, maxSize * 2] queries are for test corpus
                    if (cnt <= maxSize)
                    {
                        sw_train.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", tagList[i], g_id, termList[i], sbQuery.ToString().Trim(), featureList[i]);
                    }
                    else
                    {
                        sw_test.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", tagList[i], g_id, termList[i], sbQuery.ToString().Trim(), featureList[i]);
                    }
                }
                System.Threading.Interlocked.Increment(ref g_id);

                cnt++;
                if (cnt > maxSize * 2)
                {
                    break;
                }
            }
            sr.Close();
            sw_train.Close();
            sw_test.Close();
        }