static void Main(string[] args) { if (args.Length != 3 && args.Length != 1) { Console.WriteLine("QueryTermWeightAnalyzerConsole.exe [configuration file name] <input file name> <output file name>"); Console.WriteLine(" [configuration file name] : a specified file name contains configuration items for analyzing"); Console.WriteLine(" <input/output file name> : input/output file name contains input query for analyzing and output result"); Console.WriteLine("Examples:"); Console.WriteLine(" QueryTermWeightAnalyzerConsole.exe qt_analyzer.ini input.txt output.txt"); Console.WriteLine(" Load queries from input.txt file, analyze and save result into output.txt file"); Console.WriteLine(" QueryTermWeightAnalyzerConsole.exe qt_analyzer.ini"); Console.WriteLine(" Load queries from console, analyze and output result to console"); return; } if (File.Exists(args[0]) == false) { Console.WriteLine("Configuration file is not existed: {0}", args[0]); return; } StreamReader sr = null; StreamWriter sw = null; if (args.Length == 3) { if (File.Exists(args[1]) == false) { Console.WriteLine("Input file {0} is not existed.", args[1]); return; } sr = new StreamReader(args[1], Encoding.UTF8); sw = new StreamWriter(args[2], false, Encoding.UTF8); } else if (args.Length != 1) { Console.WriteLine("Invalidated parameters."); return; } Console.WriteLine("Start to initialize query term weight analyzer..."); QueryTermWeightAnalyzer.QueryTermWeightAnalyzer analyzer = new QueryTermWeightAnalyzer.QueryTermWeightAnalyzer(); if (analyzer.Initialize(args[0]) == false) { Console.WriteLine("Initialize the analyzer failed."); return; } Console.WriteLine("Done."); //Create working instance for each thread Instance instance = analyzer.CreateInstance(); while (true) { string strLine = null; if (sr != null) { strLine = sr.ReadLine(); } else { strLine = Console.ReadLine(); } if (strLine == null || strLine.ToLower() == "quit") { break; } List <Token> tknList; string[] columns = strLine.Split('\t'); tknList = analyzer.Analyze(instance, columns[0]); if (tknList == null) { //Analyze term weight is failed. Console.WriteLine("Failed to analyze {0}", columns[0]); continue; } string strOutput = ""; foreach (Token token in tknList) { strOutput += token.strTerm + "[RANK_" + token.rankId.ToString() + ", " + token.rankingscore.ToString("0.00") + "] "; } if (sw != null) { sw.WriteLine(strOutput.Trim()); } else { Console.WriteLine(strOutput.Trim()); } } if (sr != null) { sr.Close(); } if (sw != null) { sw.Close(); } }
static void Main(string[] args) { if (args.Length != 4) { Console.WriteLine("RankingFeatureExtractor.exe [configuration file name] [input file name] [output file name] [corpus size]"); return; } QueryTermWeightAnalyzer.QueryTermWeightAnalyzer analyzer = new QueryTermWeightAnalyzer.QueryTermWeightAnalyzer(); if (analyzer.Initialize(args[0]) == false) { Console.WriteLine("Initialize the analyzer failed."); return; } Instance instance = analyzer.CreateInstance(); StreamReader sr = new StreamReader(args[1]); StreamWriter sw_train = new StreamWriter(args[2] + ".train"); StreamWriter sw_test = new StreamWriter(args[2] + ".test"); int maxSize = int.Parse(args[3]); //Write column header into file (include feature set name) sw_train.WriteLine("m:Rating\tm:QueryId\tTerm\tQuery\t" + analyzer.GetFeatureName()); sw_test.WriteLine("m:Rating\tm:QueryId\tTerm\tQuery\t" + analyzer.GetFeatureName()); //Write all active feature name into file string strAF = analyzer.GetFeatureName(); string[] afitems = strAF.Split('\t'); File.WriteAllLines("activefeatures.txt", afitems); HashSet <string> setLine = new HashSet <string>(); int g_id = 10000; int cnt = 0; while (sr.EndOfStream == false) { string strLine = sr.ReadLine().Trim(); if (setLine.Contains(strLine) == true) { continue; } setLine.Add(strLine); //Parse training corpus string[] items = strLine.Split(); List <string> termList = new List <string>(); List <string> tagList = new List <string>(); StringBuilder sbQuery = new StringBuilder(); foreach (string item in items) { int pos = item.LastIndexOf('['); string strTerm = item.Substring(0, pos); string strTag = item.Substring(pos + 1, item.Length - pos - 2); termList.Add(strTerm.ToLower()); tagList.Add(strTag); sbQuery.Append(strTerm); } //Extract each term's features List <string> featureList = analyzer.ExtractFeature(instance, termList); if (featureList == null || featureList.Count != termList.Count) { //Failed to analyze term weight Console.WriteLine("Failed to analyze {0}", strLine); continue; } //Format: m:Rating\tm:QueryId\tTerm\tQuery\tFeatureSet for (int i = 0; i < featureList.Count; i++) { //The [0, maxSize] queries are for training corpus //The [maxSize + 1, maxSize * 2] queries are for test corpus if (cnt <= maxSize) { sw_train.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", tagList[i], g_id, termList[i], sbQuery.ToString().Trim(), featureList[i]); } else { sw_test.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", tagList[i], g_id, termList[i], sbQuery.ToString().Trim(), featureList[i]); } } System.Threading.Interlocked.Increment(ref g_id); cnt++; if (cnt > maxSize * 2) { break; } } sr.Close(); sw_train.Close(); sw_test.Close(); }