/// <summary> /// Twitter data: from cosmos, each line represents a Tweet. /// Different fields are seperated by '\t'. The schema is the name for each field /// </summary> private void BuildFromTwitterTxt() { string inputpath = TwitterConfigure.InputPath; string outputpath = TwitterConfigure.OutputPath; var schema = TwitterConfigure.TwitterSchema; string bodyField = TwitterConfigure.TwitterBodyField; var indexwriter = LuceneOperations.GetIndexWriter(outputpath); StreamReader sr = new StreamReader(inputpath); string line; int lineCnt = 0; while ((line = sr.ReadLine()) != null) { lineCnt++; } //Console.WriteLine("Total Lines: " + lineCnt); sr.Close(); sr = new StreamReader(inputpath); var seperator = new char[] { '\t' }; int lineIndex = 0; var progress = new ProgramProgress(lineCnt); while ((line = sr.ReadLine()) != null) { //if (lineIndex % 100000 == 0) // Console.WriteLine("{0} out of {1} ({2}%)", lineIndex, lineCnt, 100 * lineIndex / lineCnt); var tokens = line.Split(seperator);//, StringSplitOptions.RemoveEmptyEntries); if (tokens.Length != schema.Length) { throw new Exception("Unmatch schema"); } var document = new Document(); for (int i = 0; i < tokens.Length; i++) { if (schema[i] == bodyField) { tokens[i] = RemoveContentNoise.RemoveTweetIndexNoise(tokens[i]); } document.Add(new Field(schema[i], tokens[i], Field.Store.YES, Field.Index.ANALYZED)); } indexwriter.AddDocument(document); lineIndex++; progress.PrintIncrementExperiment(); } progress.PrintTotalTime(); sr.Close(); indexwriter.Optimize(); indexwriter.Close(); }
public void TransformWithFileNameContentSearch(string[] files, string indexPath, string searchStr, string progressEndStr = null) { double tweetCnt = 0; var indexWriter = LuceneOperations.GetIndexWriter(indexPath); searchStr = searchStr.ToLower(); var progress = new ProgramProgress(files.Length); int docFoundCount = 0; int totalDocCount = 0; foreach (var file in files) { FileOperations.ReadJsonFile <Spinn3rTwitterData>(file, (data) => { tweetCnt += data.count; //Console.WriteLine(data.count); //Console.WriteLine(data.items[0].main); foreach (var tweet in data.items) { if (tweet.lang != "en") { continue; } if (tweet.main.ToLower().Contains(searchStr)) { var document = new Document(); document.Add(new Field(TweetFields.TweetId, tweet.permalink, Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.Text, tweet.main, Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.UserScreenName, tweet.author_link, Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.UserName, tweet.author_name, Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.Tags, StringOperations.ConvertNullStringToEmpty(StringOperations.GetMergedString(tweet.tags)), Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.CreatedAt, tweet.published, Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.Location, tweet.source_location, Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.UserDescription, tweet.source_description, Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.UserFollowersCount, tweet.source_followers.ToString(), Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.UserFriendsCount, tweet.source_following.ToString(), Field.Store.YES, Field.Index.ANALYZED)); indexWriter.AddDocument(document); docFoundCount++; } totalDocCount++; } }); progress.PrintIncrementExperiment(string.Format("docFound: {0} out of {1} ({2}%) -- {3}", docFoundCount, totalDocCount, 100 * docFoundCount / totalDocCount, progressEndStr)); } progress.PrintTotalTime(); Console.WriteLine("Final docFound: {0} out of {1} ({2}%)", docFoundCount, totalDocCount, 100 * docFoundCount / totalDocCount); Console.WriteLine("Start writing index..."); indexWriter.Commit(); indexWriter.Close(); //Util.ProgramFinishHalt(); }
public void AnalyzeDocuments() { string fileName = @"D:\Project\TopicPanorama\data\TopicGraphs\NewCode-Ebola-Test2\Raw\news\result\lda.top.json"; string indexPath = @"D:\DataProcess\Index\Raw_EbolaEnBingNews_Ebola_0_1_RS_R-1"; int topDocCnt = 20; var indexReader = LuceneOperations.GetIndexReader(indexPath); //Read from json and sort SimpleJsonReader reader = new SimpleJsonReader(new StreamReader(File.Open(fileName, FileMode.Open))); HeapSortDouble[] hsd = null; int topicNumber = -1; ProgramProgress progress = new ProgramProgress(indexReader.NumDocs()); while (reader.IsReadable) { int docID = int.Parse(reader.ReadPropertyName()); double[] topicArray = reader.ReadDoubleArray(); if (topicNumber < 0) { topicNumber = topicArray.Length; hsd = new HeapSortDouble[topicNumber]; for (int i = 0; i < topicNumber; i++) { hsd[i] = new HeapSortDouble(topDocCnt); } } for (int i = 0; i < topicNumber; i++) { hsd[i].Insert(docID, topicArray[i]); } progress.PrintIncrementExperiment(); } progress.PrintTotalTime(); //Statistics Console.ReadLine(); }
public void Start() { var reader = LuceneOperations.GetIndexReader(Configure.InputPath); var docNum = reader.NumDocs(); ProgramProgress progress = new ProgramProgress(docNum); XmlDoc[] xmlDocs = new XmlDoc[docNum]; for (int iDoc = 0; iDoc < docNum; iDoc++) { var doc = reader.Document(iDoc); xmlDocs[iDoc] = new XmlDoc(doc); progress.PrintIncrementExperiment(); } progress.PrintTotalTime(); //序列化这个对象 XmlSerializer serializer = new XmlSerializer(typeof(XmlDocCollection)); ////将对象序列化输出到控制台 serializer.Serialize(new StreamWriter(Configure.OutputPath), new XmlDocCollection() { XmlDocs = xmlDocs }); }
public static void AnalyzeTwitterWordDistribution(string inputPath, TokenizeConfig tokenConfig) { var indexReader = LuceneOperations.GetIndexReader(inputPath); var docNum = indexReader.NumDocs(); int[] docWordCnt = new int[docNum]; int[] docUniqWordCnt = new int[docNum]; Dictionary <string, int> wordDocCntDict = new Dictionary <string, int>(); Dictionary <string, int> wordOccCntDict = new Dictionary <string, int>(); var fieldWeights = tokenConfig.TokenizerType == TokenizerType.FeatureVector ? BingNewsFields.FeatureVectorFieldWeights : BingNewsFields.NewsFieldWeights; ProgramProgress progress = new ProgramProgress(docNum); for (int iDoc = 0; iDoc < docNum; iDoc++) { var document = indexReader.Document(iDoc); var content = LuceneOperations.GetContent(document, fieldWeights); var words = NLPOperations.Tokenize(content, tokenConfig); var uniqueWords = new HashSet <string>(words); docWordCnt[iDoc] = words.Count; docUniqWordCnt[iDoc] = uniqueWords.Count; foreach (var word in uniqueWords) { if (!wordDocCntDict.ContainsKey(word)) { wordDocCntDict.Add(word, 0); } wordDocCntDict[word]++; } foreach (var word in words) { if (!wordOccCntDict.ContainsKey(word)) { wordOccCntDict.Add(word, 0); } wordOccCntDict[word]++; } progress.PrintIncrementExperiment(); } progress.PrintTotalTime(); indexReader.Close(); //Statistics DoubleStatistics statDocWordCnt = new DoubleStatistics(); DoubleStatistics statDocUniqWordCnt = new DoubleStatistics(); DoubleStatistics statWordDocCnt = new DoubleStatistics(); DoubleStatistics statWordOccCnt = new DoubleStatistics(); for (int iDoc = 0; iDoc < docNum; iDoc++) { statDocWordCnt.AddNumber(docWordCnt[iDoc]); statDocUniqWordCnt.AddNumber(docUniqWordCnt[iDoc]); } foreach (var kvp in wordDocCntDict) { statWordDocCnt.AddNumber(kvp.Value); } foreach (var kvp in wordOccCntDict) { statWordOccCnt.AddNumber(kvp.Value); } Console.WriteLine(statDocWordCnt.ToString("statDocWordCnt")); Console.WriteLine(statDocUniqWordCnt.ToString("statDocUniqWordCnt")); Console.WriteLine(statWordDocCnt.ToString("statWordDocCnt")); Console.WriteLine(statWordOccCnt.ToString("wordOccCnt")); //Hist var docWordCntHist = new DoubleHistogram(docWordCnt.Select(i => (double)i), (double)1); var docUniqueWordCntList = new DoubleHistogram(docUniqWordCnt.Select(i => (double)i), (double)1); var wordDocCntHist = new DoubleHistogram(wordDocCntDict.Select(kvp => (double)kvp.Value), 1000); var wordDocCntHist2 = new DoubleHistogram(wordDocCntDict.Select(kvp => (double)kvp.Value), (double)1); docWordCntHist.PrintToFile(StringOperations.EnsureFolderEnd(inputPath) + "docWordCntHist.csv"); docUniqueWordCntList.PrintToFile(StringOperations.EnsureFolderEnd(inputPath) + "docUniqueWordCntList.csv"); wordDocCntHist.PrintToFile(StringOperations.EnsureFolderEnd(inputPath) + "wordDocCntHist.csv"); wordDocCntHist2.PrintToFile(StringOperations.EnsureFolderEnd(inputPath) + "wordDocCntHist2.csv"); Console.Read(); }
public static void AnalyzeSearchWordSentiment(string indexPath, string field, string[] keywords, int printDocumentCnt = 10, string histogramField = null) { var searcher = LuceneOperations.GetIndexSearcher(indexPath); var reader = searcher.GetIndexReader(); var docIDs = LuceneOperations.Search(searcher, StringOperations.GetMergedString(keywords, " "), field); Console.WriteLine("Find {0}% ({1}/{2}) documents containing: {3}", (100.0 * docIDs.Count / reader.NumDocs()), docIDs.Count, reader.NumDocs(), StringOperations.GetMergedString(keywords, " ")); var progress = new ProgramProgress(docIDs.Count); var sentiAnalyzer = new SentimentAnalyzer(); SentimentType sentimentType; double sentimentScore; HeapSortDouble hsdPos = new HeapSortDouble(printDocumentCnt); HeapSortDouble hsdNeg = new HeapSortDouble(printDocumentCnt); Counter <string> counterPos = null; Counter <string> counterNeg = null; Counter <string> counterNeu = null; if (histogramField != null) { counterPos = new Counter <string>(); counterNeg = new Counter <string>(); counterNeu = new Counter <string>(); } int posCnt = 0; int negCnt = 0; int neuCnt = 0; foreach (var docID in docIDs) { var document = reader.Document(docID); var content = document.Get(field); sentiAnalyzer.GetSentiment(content, out sentimentType, out sentimentScore); switch (sentimentType) { case SentimentType.Positive: posCnt++; hsdPos.Insert(docID, Math.Abs(sentimentScore)); if (histogramField != null) { counterPos.Add(document.Get(histogramField)); } break; case SentimentType.Negative: negCnt++; hsdNeg.Insert(docID, Math.Abs(sentimentScore)); if (histogramField != null) { counterNeg.Add(document.Get(histogramField)); } break; case SentimentType.Neutral: neuCnt++; if (histogramField != null) { counterNeu.Add(document.Get(histogramField)); } break; default: throw new NotImplementedException(); } progress.PrintIncrementExperiment(); } Console.WriteLine("Positive document ratio {0}% ({1}/{2})", Math.Round(100.0 * posCnt / docIDs.Count), posCnt, docIDs.Count); Console.WriteLine("Negatvie document ratio {0}% ({1}/{2})", Math.Round(100.0 * negCnt / docIDs.Count), negCnt, docIDs.Count); Console.WriteLine("Neutral document ratio {0}% ({1}/{2})", Math.Round(100.0 * neuCnt / docIDs.Count), neuCnt, docIDs.Count); Console.WriteLine(StringOperations.WrapWithDash("Positive documents")); foreach (var kvp in hsdPos.GetSortedDictionary()) { Console.WriteLine(kvp.Value + "\t" + reader.Document(kvp.Key).Get(field)); } Console.WriteLine(StringOperations.WrapWithDash("Negative documents")); foreach (var kvp in hsdNeg.GetSortedDictionary()) { Console.WriteLine(kvp.Value + "\t" + reader.Document(kvp.Key).Get(field)); } progress.PrintTotalTime(); if (histogramField != null) { string[] featureStrings = new[] { "Pos", "Neg", "Neu" }; Counter <string>[] counters = new[] { counterPos, counterNeg, counterNeu }; for (int i = 0; i < featureStrings.Length; i++) { Console.WriteLine(StringOperations.WrapWithDash(histogramField + " " + featureStrings[i])); int index = 0; foreach (var kvp in counters[i].GetCountDictionary().OrderByDescending(kvp => kvp.Value)) { Console.WriteLine(kvp.Key + "\t" + kvp.Value); if (++index >= 100) { break; } } } } Console.ReadKey(); }
public void Transform(string inputFolder, string indexPath, HashSet <string> keywords) { Console.WriteLine("Start to search words: " + StringOperations.GetMergedString(keywords)); Console.WriteLine("InputFolder: " + inputFolder + "\n"); string notParseSpecString = "Temp-DoNotParse"; inputFolder = StringOperations.EnsureFolderEnd(inputFolder); string[] schema = new[] { "CreatedAt", "Text", "IsRetweet", "Retweeted", "RetweetCount", "UserScreenName", "UserId", "UserFollowersCount", "UserFriendsCount" }; var schemeDict = Util.GetInvertedDictionary(schema); var textFieldIndex = schemeDict["Text"]; var createdTimeFieldIndex = schemeDict["CreatedAt"]; var userIdFieldIndex = schemeDict["UserId"]; //string outputPath = inputFolder + notParseSpecString + "\\"; //if (Directory.Exists(outputPath)) //{ // Directory.Delete(outputPath, true); //} //Directory.CreateDirectory(outputPath); //var indexPath = outputPath + "Index\\"; if (Directory.Exists(indexPath)) { Directory.Delete(indexPath, true); } var files = Directory.GetFiles(inputFolder, "*.*", SearchOption.AllDirectories); //Preprocess Console.WriteLine("Start preprocesing..."); ProgramProgress progress = new ProgramProgress(files.Length); int estiDocCnt = 0; foreach (var file in files) { estiDocCnt += FileOperations.GetLineCount(file); progress.PrintIncrementExperiment(); } progress.PrintTotalTime(); Console.WriteLine("Estimate tweet count: " + estiDocCnt + "\n"); //Parse Console.WriteLine("Start parsing..."); var indexWriter = LuceneOperations.GetIndexWriter(indexPath); TokenizeConfig tokenizeConfig = new TokenizeConfig(TokenizerType.Twitter); progress = new ProgramProgress(estiDocCnt); var sep = new char[] { '\t' }; int uniqDocFoundCnt = 0; int docFoundCnt = 0; int docCnt = 0; ThreeLayerHashSet <string, long, string> hash3Layer = new ThreeLayerHashSet <string, long, string>(); int notUsedDocCnt = 0; foreach (var file in files) { if (file.Contains(notParseSpecString)) { continue; } if (file.EndsWith(".txt")) { var sr = new StreamReader(file); string line; while ((line = sr.ReadLine()) != null) { var tokens = line.Split(sep, StringSplitOptions.None); if (tokens.Length != schema.Length) { notUsedDocCnt++; continue; //throw new ArgumentException(); } var words = NLPOperations.Tokenize(tokens[textFieldIndex], tokenizeConfig); bool isContainSearch = false; foreach (var word in words) { if (keywords.Contains(word)) { isContainSearch = true; break; } } if (isContainSearch) { string createdAt = tokens[createdTimeFieldIndex]; long userId = long.Parse(tokens[userIdFieldIndex]); string text = tokens[textFieldIndex]; if (!hash3Layer.Contains(createdAt, userId, text)) { var document = new Document(); for (int i = 0; i < schema.Length; i++) { document.Add(new Field(schema[i], tokens[i], Field.Store.YES, Field.Index.ANALYZED)); } indexWriter.AddDocument(document); hash3Layer.Add(createdAt, userId, text); uniqDocFoundCnt++; } docFoundCnt++; } docCnt++; progress.PrintIncrementExperiment(string.Format("uniqDocFound: {0} out of {1} ({2}%), docFoundUnqiueRatio: {3}%", uniqDocFoundCnt, docCnt, 100 * uniqDocFoundCnt / docCnt, (docFoundCnt == 0 ? 0 : (100 * uniqDocFoundCnt / docFoundCnt)))); } sr.Close(); } } progress.PrintTotalTime(); Console.WriteLine(string.Format("uniqDocFound: {0} out of {1} ({2}%), docFoundUnqiueRatio: {3}%", uniqDocFoundCnt, docCnt, 100 * uniqDocFoundCnt / docCnt, 100 * uniqDocFoundCnt / docFoundCnt)); Console.WriteLine("Not used doc count: " + notUsedDocCnt); Console.WriteLine("Start writing index..."); indexWriter.Commit(); indexWriter.Close(); Console.WriteLine("Finish"); Console.ReadKey(); }
public void TransformWithFileNames(string[] files, string indexPath, HashSet <string> searchHashSet, SearchSpinn3rType searchType) { double tweetCnt = 0; TokenizeConfig tokenizeConfig = new TokenizeConfig(TokenizerType.Twitter); var indexWriter = LuceneOperations.GetIndexWriter(indexPath); var progress = new ProgramProgress(files.Length); int docFoundCount = 0; int totalDocCount = 0; foreach (var file in files) { FileOperations.ReadJsonFile <Spinn3rTwitterData>(file, (data) => { tweetCnt += data.count; //Console.WriteLine(data.count); //Console.WriteLine(data.items[0].main); foreach (var tweet in data.items) { if (tweet.lang != "en") { continue; } bool isContainSearch = false; switch (searchType) { case SearchSpinn3rType.Main: var words = NLPOperations.Tokenize(tweet.main, tokenizeConfig); foreach (var word in words) { if (searchHashSet.Contains(word)) { isContainSearch = true; break; } } break; case SearchSpinn3rType.User: isContainSearch = searchHashSet.Contains(tweet.author_link.ToLower()); break; default: throw new ArgumentException(); } if (isContainSearch) { var document = new Document(); document.Add(new Field(TweetFields.TweetId, tweet.permalink, Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.Text, tweet.main, Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.UserScreenName, tweet.author_link, Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.UserName, tweet.author_name, Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.Tags, StringOperations.ConvertNullStringToEmpty(StringOperations.GetMergedString(tweet.tags)), Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.CreatedAt, tweet.published, Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.Location, tweet.source_location, Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.UserDescription, tweet.source_description, Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.UserFollowersCount, tweet.source_followers.ToString(), Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field(TweetFields.UserFriendsCount, tweet.source_following.ToString(), Field.Store.YES, Field.Index.ANALYZED)); indexWriter.AddDocument(document); docFoundCount++; } totalDocCount++; } }); progress.PrintIncrementExperiment(string.Format("docFound: {0} out of {1} ({2}%)", docFoundCount, totalDocCount, 100 * docFoundCount / totalDocCount)); } progress.PrintTotalTime(); Console.WriteLine("Final docFound: {0} out of {1} ({2}%)", docFoundCount, totalDocCount, 100 * docFoundCount / totalDocCount); Console.WriteLine("Start writing index..."); indexWriter.Commit(); indexWriter.Close(); Util.ProgramFinishHalt(); }
//public void StartEbola(int[] focusSeeds) public void StartEbola() { // -- node counts -- string folder = @"D:\Project\StreamingRoseRiver\EbolaCaseStudyFinal\Trees3\"; string exeFolder = @"D:\Project\StreamingRoseRiver\EbolaCaseStudyFinal\RoseRiver\RoseRiver\bin\x64\Release\"; if (!Directory.Exists(folder)) { folder = @"H:\Xiting\StreamingRoseRiver\ScalabilityExperiment\Data\Trees3\"; exeFolder = @"H:\Xiting\StreamingRoseRiver\ScalabilityExperiment\RoseRiverExe\"; } if (!Directory.Exists(folder)) { folder = @"D:\Documents\roseriver\RoseRiver\RoseRiver\Data\Ebola\Trees3\"; exeFolder = @"D:\Documents\roseriver\RoseRiver\RoseRiver\Data\Ebola\ScalabilityExperiment\RoseRiver\RoseRiver\bin\x64\Release\"; } List <int> nodeCounts = new List <int>(); for (int i = 0; i < 30; i++) { var fileName = folder + i + ".gv"; var tree = BRTAnalysis.ReadTree(fileName); nodeCounts.Add(tree.BFS(tree.Root).Count()); } // -- experiment -- var copyFactors = new[] { 1 }; //Util.GetIntArray(1, 9, 2); //new[] {1, 2, 5, 10, 20, 50}; var focusCounts = new[] { 1, 3, 5 }; //DataProcess.Utils.Util.GetIntArray(1, 5); //var focusSampleCount = 1;//50; var focusSeeds = Util.GetIntArray(51, 100); //Util.GetIntArray(1, 50); //new[] { 1 };//Util.GetIntArray(1, 50); //var minMaxTreeCount = 10; //var maxMaxTreeCount = 30; var treeCounts = Util.GetIntArray(5, 30); //new int[] { 5, 10 };//new[] {10, 20}; int index = 0; ProgramProgress progress = new ProgramProgress(copyFactors.Length * focusCounts.Length * focusSeeds.Length * treeCounts.Length); var configure = new TopicStreamConfigure(); foreach (int focusSeed in focusSeeds) { foreach (var copyFactor in copyFactors) { configure.CopyFactor = copyFactor; foreach (var focusCount in focusCounts) { configure.FocusCount = focusCount; configure.DefaultTreeCut = GetRandomManuallyTreeCut(focusCount, treeCounts.Min(), focusSeed, nodeCounts, 1); configure.DefaultTreeCutRandomSeed = focusSeed; foreach (var treeCount in treeCounts) { if (File.Exists("RunTimeExperiment\\" + index + ".txt")) { Console.WriteLine("Skip index = " + index); index++; progress.PrintSkipExperiment(); continue; } configure.TreeCount = treeCount; configure.Index = index; configure.Write(); File.Copy(TopicStreamConfigure.ConfigureFileName, exeFolder + TopicStreamConfigure.ConfigureFileName, true); ProcessStartInfo startInfo = new ProcessStartInfo(); startInfo.ErrorDialog = false; startInfo.CreateNoWindow = false; startInfo.UseShellExecute = false; startInfo.FileName = exeFolder + @"RoseRiver.exe"; startInfo.WindowStyle = ProcessWindowStyle.Normal; using (Process exeProcess = Process.Start(startInfo)) { exeProcess.WaitForExit(); } progress.PrintIncrementExperiment("\n"); index++; } } } } progress.PrintTotalTime(); }
public void StartKDD() { // -- node counts -- string folder = @"D:\Project\StreamingRoseRiver\EbolaCaseStudyFinal\RoseRiver\Data\KddInfovisGraphicsIndex_Lucene_a=0.003_sm=1\"; string exeFolder = @"D:\Project\StreamingRoseRiver\EbolaCaseStudyFinal\RoseRiver\RoseRiver\bin\x64\Release\"; List <int> nodeCounts = new List <int>(); for (int i = 0; i < 11; i++) { var fileName = folder + i + ".gv"; var tree = BRTAnalysis.ReadTree(fileName); nodeCounts.Add(tree.BFS(tree.Root).Count()); } // -- experiment -- var copyFactors = new[] { 2, 1 }; var focusCounts = DataProcess.Utils.Util.GetIntArray(1, 5); var focusSampleCount = 5; var minMaxTreeCount = 6; var maxMaxTreeCount = 8; int index = 0; ProgramProgress progress = new ProgramProgress(copyFactors.Length * focusCounts.Length * focusSampleCount * (maxMaxTreeCount - minMaxTreeCount + 1)); var configure = new TopicStreamConfigure(); configure.DataType = "kdd"; foreach (var copyFactor in copyFactors) { configure.CopyFactor = copyFactor; foreach (var focusCount in focusCounts) { for (int iFocusSample = 0; iFocusSample < focusSampleCount; iFocusSample++) { configure.FocusCount = focusCount; configure.DefaultTreeCut = GetRandomManuallyTreeCut(focusCount, minMaxTreeCount, iFocusSample, nodeCounts, 1); configure.DefaultTreeCutRandomSeed = iFocusSample; for (int iMaxTreeCount = minMaxTreeCount; iMaxTreeCount <= maxMaxTreeCount; iMaxTreeCount++) { configure.TreeCount = iMaxTreeCount; configure.Index = index; configure.Write(); File.Copy(TopicStreamConfigure.ConfigureFileName, exeFolder + TopicStreamConfigure.ConfigureFileName, true); ProcessStartInfo startInfo = new ProcessStartInfo(); startInfo.CreateNoWindow = true; startInfo.UseShellExecute = false; startInfo.FileName = exeFolder + @"RoseRiver.exe"; startInfo.WindowStyle = ProcessWindowStyle.Hidden; using (Process exeProcess = Process.Start(startInfo)) { exeProcess.WaitForExit(); } progress.PrintIncrementExperiment("\n"); index++; } } } } progress.PrintTotalTime(); }