public static void Main() { /*#if DEBUG Application.EnableVisualStyles(); Application.SetCompatibleTextRenderingDefault(false); Application.Run(new Main()); #else*/ Boolean isAnnotated = true; FileParser fileparserFP = new FileParser(); String sourcePath = @"..\..\training_news.xml"; String destinationPath = @"..\..\result.xml"; String invertedDestinationPath = @"..\..\result_inverted_index.xml"; String formatDateDestinationPath = @"..\..\result_format_date.xml"; List<Article> listCurrentArticles = fileparserFP.parseFile(sourcePath); List<Annotation> listCurrentTrainingAnnotations = new List<Annotation>(); if (isAnnotated) { listCurrentTrainingAnnotations = fileparserFP.parseAnnotations(sourcePath); } List<List<Token>> listTokenizedArticles = new List<List<Token>>(); List<List<Candidate>> listAllWhoCandidates = new List<List<Candidate>>(); List<List<Candidate>> listAllWhenCandidates = new List<List<Candidate>>(); List<List<Candidate>> listAllWhereCandidates = new List<List<Candidate>>(); List<List<List<Token>>> listAllWhatCandidates = new List<List<List<Token>>>(); List<List<List<Token>>> listAllWhyCandidates = new List<List<List<Token>>>(); List<List<String>> listAllWhoAnnotations = new List<List<String>>(); List<List<String>> listAllWhenAnnotations = new List<List<String>>(); List<List<String>> listAllWhereAnnotations = new List<List<String>>(); List<String> listAllWhatAnnotations = new List<String>(); List<String> listAllWhyAnnotations = new List<String>(); Preprocessor preprocessor = new Preprocessor(); if (listCurrentArticles != null && listCurrentArticles.Count > 0 && (!isAnnotated || (listCurrentTrainingAnnotations != null && listCurrentTrainingAnnotations.Count > 0 && listCurrentArticles.Count == listCurrentTrainingAnnotations.Count))) { //Temporarily set to 2 because getting all articles takes longer run time for (int nI = 0; nI < listCurrentArticles.Count; nI++) { preprocessor.setCurrentArticle(listCurrentArticles[nI]); preprocessor.preprocess(); if (isAnnotated) { preprocessor.setCurrentAnnotation(listCurrentTrainingAnnotations[nI]); preprocessor.performAnnotationAssignment(); } listTokenizedArticles.Add(preprocessor.getLatestTokenizedArticle()); listAllWhoCandidates.Add(preprocessor.getWhoCandidates()); listAllWhenCandidates.Add(preprocessor.getWhenCandidates()); listAllWhereCandidates.Add(preprocessor.getWhereCandidates()); listAllWhatCandidates.Add(preprocessor.getWhatCandidates()); listAllWhyCandidates.Add(preprocessor.getWhyCandidates()); } if (isAnnotated) { /*Trainer trainer = new Trainer(); trainer.trainMany("who", listTokenizedArticles, listAllWhoCandidates); trainer.trainMany("when", listTokenizedArticles, listAllWhenCandidates); trainer.trainMany("where", listTokenizedArticles, listAllWhereCandidates);*/ } } #region Candidate Selection Printer /*Candidate Selection Printer*/ /*try { var whoCandidatesPath = @"..\..\candidates_who.txt"; var whenCandidatesPath = @"..\..\candidates_when.txt"; var whereCandidatesPath = @"..\..\candidates_where.txt"; if (File.Exists(whoCandidatesPath)) File.Delete(whoCandidatesPath); if (File.Exists(whenCandidatesPath)) File.Delete(whenCandidatesPath); if (File.Exists(whereCandidatesPath)) File.Delete(whereCandidatesPath); using (StreamWriter sw = File.CreateText(whoCandidatesPath)) { for (int nI = 0; nI < listAllWhoCandidates.Count; nI++) { sw.WriteLine("#{0}:", nI); foreach (var candidate in listAllWhoCandidates[nI]) { sw.Write(candidate.Value + ", "); } sw.WriteLine("\n"); } } using (StreamWriter sw = File.CreateText(whenCandidatesPath)) { for (int nI = 0; nI < listAllWhenCandidates.Count; nI++) { sw.WriteLine("#{0}:", nI); foreach (var candidate in listAllWhenCandidates[nI]) { sw.Write(candidate.Value + ", "); } sw.WriteLine("\n"); } } using (StreamWriter sw = File.CreateText(whereCandidatesPath)) { for (int nI = 0; nI < listAllWhereCandidates.Count; nI++) { sw.WriteLine("#{0}:", nI); foreach (var candidate in listAllWhereCandidates[nI]) { sw.Write(candidate.Value + ", "); } sw.WriteLine("\n"); } } } catch (Exception e) { System.Console.WriteLine("Error with writing initial line of training dataset."); }*/ #endregion WhatWhyTrainer wwt = new WhatWhyTrainer(); wwt.startTrain(); Identifier annotationIdentifier = new Identifier(isAnnotated, wwt); for (int nI = 0; nI < listCurrentArticles.Count; nI++) { annotationIdentifier.setCurrentArticle(listTokenizedArticles[nI]); annotationIdentifier.setWhoCandidates(listAllWhoCandidates[nI]); annotationIdentifier.setWhenCandidates(listAllWhenCandidates[nI]); annotationIdentifier.setWhereCandidates(listAllWhereCandidates[nI]); annotationIdentifier.setWhatCandidates(listAllWhatCandidates[nI]); annotationIdentifier.setWhyCandidates(listAllWhyCandidates[nI]); annotationIdentifier.setTitle(listCurrentArticles[nI].Title); if (isAnnotated) { annotationIdentifier.setCurrentAnnotation(listCurrentTrainingAnnotations[nI]); } annotationIdentifier.labelAnnotations(); listAllWhoAnnotations.Add(annotationIdentifier.getWho()); listAllWhenAnnotations.Add(annotationIdentifier.getWhen()); listAllWhereAnnotations.Add(annotationIdentifier.getWhere()); listAllWhatAnnotations.Add(annotationIdentifier.getWhat()); listAllWhyAnnotations.Add(annotationIdentifier.getWhy()); } wwt.endTrain(); /*ResultWriter rw = new ResultWriter(destinationPath, formatDateDestinationPath, invertedDestinationPath, listCurrentArticles, listAllWhoAnnotations, listAllWhenAnnotations, listAllWhereAnnotations, listAllWhatAnnotations, listAllWhyAnnotations); rw.generateOutput(); rw.generateOutputFormatDate(); rw.generateInvertedIndexOutput();*/ //#endif }
private void btnImport_Click(object sender, EventArgs e) { if (!textBoxes[tabControl1.SelectedIndex].Text.Equals("")) { FileInfo fi = new FileInfo(textBoxes[tabControl1.SelectedIndex].Text); if (File.Exists(fi.FullName) && fi.Extension.Equals(".xml")) { sourcePaths[tabControl1.SelectedIndex] = fi.FullName; if (tabControl1.SelectedIndex > 0) { List <Article> listArticles = fileparserFP.parseFile(sourcePaths[tabControl1.SelectedIndex]); List <Annotation> listAnnotations = fileparserFP.parseAnnotations(sourcePaths[tabControl1.SelectedIndex]); if (listArticles.Count <= 0) { MessageBox.Show("No articles found!"); return; } foreach (int i in Enumerable.Range(0, listAnnotations.Count())) { listAnnotations[i].Index = i; Console.WriteLine(listArticles[i].Title + " " + i); } if (tabControl1.SelectedIndex == 1) { listViewerArticles = listArticles; listViewerAnnotations = listAnnotations; loadArticles(); } else if (tabControl1.SelectedIndex == 2) { String formatDateDestinationPath = fi.FullName.Insert(fi.FullName.Length - 4, "_inverted_index"); if (File.Exists(formatDateDestinationPath)) { listNavigatorArticles = listArticles; listNavigatorAnnotations = listAnnotations; XmlDocument doc = new XmlDocument(); doc.Load(formatDateDestinationPath); XmlNodeList whoNodes = doc.DocumentElement.SelectNodes("/data/who/entry"); XmlNodeList whenNodes = doc.DocumentElement.SelectNodes("/data/when/entry"); XmlNodeList whereNodes = doc.DocumentElement.SelectNodes("/data/where/entry"); XmlNodeList whatNodes = doc.DocumentElement.SelectNodes("/data/what/entry"); XmlNodeList whyNodes = doc.DocumentElement.SelectNodes("/data/why/entry"); foreach (XmlNode entry in whoNodes) { List <int> indices = new List <int>(); foreach (XmlNode index in entry.SelectNodes("articleIndex")) { indices.Add(Convert.ToInt32(index.InnerText)); } whoReverseIndex.Add(entry["text"].InnerText, indices); } foreach (XmlNode entry in whenNodes) { List <int> indices = new List <int>(); foreach (XmlNode index in entry.SelectNodes("articleIndex")) { indices.Add(Convert.ToInt32(index.InnerText)); } whenReverseIndex.Add(entry.SelectSingleNode("text").InnerText, indices); } foreach (XmlNode entry in whereNodes) { List <int> indices = new List <int>(); foreach (XmlNode index in entry.SelectNodes("articleIndex")) { indices.Add(Convert.ToInt32(index.InnerText)); } whereReverseIndex.Add(entry.SelectSingleNode("text").InnerText, indices); } foreach (XmlNode entry in whatNodes) { List <int> indices = new List <int>(); foreach (XmlNode index in entry.SelectNodes("articleIndex")) { indices.Add(Convert.ToInt32(index.InnerText)); } whatReverseIndex.Add(entry.SelectSingleNode("text").InnerText, indices); } foreach (XmlNode entry in whyNodes) { List <int> indices = new List <int>(); foreach (XmlNode index in entry.SelectNodes("articleIndex")) { indices.Add(Convert.ToInt32(index.InnerText)); } whyReverseIndex.Add(entry.SelectSingleNode("text").InnerText, indices); } } else { MessageBox.Show("Inverted index file not found!"); return; } } } //firstBoxes[tabControl1.SelectedIndex].Enabled = false; secondBoxes[tabControl1.SelectedIndex].Enabled = true; } } }
public static void Main() { #if DEBUG Application.EnableVisualStyles(); Application.SetCompatibleTextRenderingDefault(false); Application.Run(new Main()); #else Boolean isAnnotated = true; FileParser fileparserFP = new FileParser(); String sourcePath = @"..\..\training_news.xml"; String destinationPath = @"..\..\result.xml"; String invertedDestinationPath = @"..\..\result_inverted_index.xml"; String formatDateDestinationPath = @"..\..\result_format_date.xml"; List <Article> listCurrentArticles = fileparserFP.parseFile(sourcePath); List <Annotation> listCurrentTrainingAnnotations = new List <Annotation>(); if (isAnnotated) { listCurrentTrainingAnnotations = fileparserFP.parseAnnotations(sourcePath); } List <List <Token> > listTokenizedArticles = new List <List <Token> >(); List <List <Candidate> > listAllWhoCandidates = new List <List <Candidate> >(); List <List <Candidate> > listAllWhenCandidates = new List <List <Candidate> >(); List <List <Candidate> > listAllWhereCandidates = new List <List <Candidate> >(); List <List <List <Token> > > listAllWhatCandidates = new List <List <List <Token> > >(); List <List <List <Token> > > listAllWhyCandidates = new List <List <List <Token> > >(); List <List <String> > listAllWhoAnnotations = new List <List <String> >(); List <List <String> > listAllWhenAnnotations = new List <List <String> >(); List <List <String> > listAllWhereAnnotations = new List <List <String> >(); List <String> listAllWhatAnnotations = new List <String>(); List <String> listAllWhyAnnotations = new List <String>(); Preprocessor preprocessor = new Preprocessor(); if (listCurrentArticles != null && listCurrentArticles.Count > 0 && (!isAnnotated || (listCurrentTrainingAnnotations != null && listCurrentTrainingAnnotations.Count > 0 && listCurrentArticles.Count == listCurrentTrainingAnnotations.Count))) { //Temporarily set to 2 because getting all articles takes longer run time for (int nI = 0; nI < listCurrentArticles.Count; nI++) { preprocessor.setCurrentArticle(listCurrentArticles[nI]); preprocessor.preprocess(); if (isAnnotated) { preprocessor.setCurrentAnnotation(listCurrentTrainingAnnotations[nI]); preprocessor.performAnnotationAssignment(); } listTokenizedArticles.Add(preprocessor.getLatestTokenizedArticle()); listAllWhoCandidates.Add(preprocessor.getWhoCandidates()); listAllWhenCandidates.Add(preprocessor.getWhenCandidates()); listAllWhereCandidates.Add(preprocessor.getWhereCandidates()); listAllWhatCandidates.Add(preprocessor.getWhatCandidates()); listAllWhyCandidates.Add(preprocessor.getWhyCandidates()); } if (isAnnotated) { /*Trainer trainer = new Trainer(); * trainer.trainMany("who", listTokenizedArticles, listAllWhoCandidates); * trainer.trainMany("when", listTokenizedArticles, listAllWhenCandidates); * trainer.trainMany("where", listTokenizedArticles, listAllWhereCandidates);*/ } } #region Candidate Selection Printer /*Candidate Selection Printer*/ /*try * { * var whoCandidatesPath = @"..\..\candidates_who.txt"; * var whenCandidatesPath = @"..\..\candidates_when.txt"; * var whereCandidatesPath = @"..\..\candidates_where.txt"; * * if (File.Exists(whoCandidatesPath)) File.Delete(whoCandidatesPath); * if (File.Exists(whenCandidatesPath)) File.Delete(whenCandidatesPath); * if (File.Exists(whereCandidatesPath)) File.Delete(whereCandidatesPath); * * using (StreamWriter sw = File.CreateText(whoCandidatesPath)) * { * for (int nI = 0; nI < listAllWhoCandidates.Count; nI++) * { * sw.WriteLine("#{0}:", nI); * foreach (var candidate in listAllWhoCandidates[nI]) * { * sw.Write(candidate.Value + ", "); * } * sw.WriteLine("\n"); * } * } * using (StreamWriter sw = File.CreateText(whenCandidatesPath)) * { * for (int nI = 0; nI < listAllWhenCandidates.Count; nI++) * { * sw.WriteLine("#{0}:", nI); * foreach (var candidate in listAllWhenCandidates[nI]) * { * sw.Write(candidate.Value + ", "); * } * sw.WriteLine("\n"); * } * } * using (StreamWriter sw = File.CreateText(whereCandidatesPath)) * { * for (int nI = 0; nI < listAllWhereCandidates.Count; nI++) * { * sw.WriteLine("#{0}:", nI); * foreach (var candidate in listAllWhereCandidates[nI]) * { * sw.Write(candidate.Value + ", "); * } * sw.WriteLine("\n"); * } * } * } * catch (Exception e) * { * System.Console.WriteLine("Error with writing initial line of training dataset."); * }*/ #endregion WhyTrainer wt = new WhyTrainer(); if (isAnnotated) { wt.startTrain(); } Identifier annotationIdentifier = new Identifier(isAnnotated, wt); for (int nI = 0; nI < listCurrentArticles.Count; nI++) { annotationIdentifier.setCurrentArticle(listTokenizedArticles[nI]); annotationIdentifier.setWhoCandidates(listAllWhoCandidates[nI]); annotationIdentifier.setWhenCandidates(listAllWhenCandidates[nI]); annotationIdentifier.setWhereCandidates(listAllWhereCandidates[nI]); annotationIdentifier.setWhatCandidates(listAllWhatCandidates[nI]); annotationIdentifier.setWhyCandidates(listAllWhyCandidates[nI]); annotationIdentifier.setTitle(listCurrentArticles[nI].Title); if (isAnnotated) { annotationIdentifier.setCurrentAnnotation(listCurrentTrainingAnnotations[nI]); } annotationIdentifier.labelAnnotations(); listAllWhoAnnotations.Add(annotationIdentifier.getWho()); listAllWhenAnnotations.Add(annotationIdentifier.getWhen()); listAllWhereAnnotations.Add(annotationIdentifier.getWhere()); listAllWhatAnnotations.Add(annotationIdentifier.getWhat()); listAllWhyAnnotations.Add(annotationIdentifier.getWhy()); } if (isAnnotated) { wt.endTrain(); } ResultWriter rw = new ResultWriter(destinationPath, formatDateDestinationPath, invertedDestinationPath, listCurrentArticles, listAllWhoAnnotations, listAllWhenAnnotations, listAllWhereAnnotations, listAllWhatAnnotations, listAllWhyAnnotations); rw.generateOutput(); rw.generateOutputFormatDate(); rw.generateInvertedIndexOutput(); #endif }