Exemplo n.º 1
0
        public static void Main()
        {
            /*#if DEBUG
            Application.EnableVisualStyles();
            Application.SetCompatibleTextRenderingDefault(false);
            Application.Run(new Main());
            #else*/
            Boolean isAnnotated = true;
            FileParser fileparserFP = new FileParser();
            String sourcePath = @"..\..\training_news.xml";
            String destinationPath = @"..\..\result.xml";
            String invertedDestinationPath = @"..\..\result_inverted_index.xml";
            String formatDateDestinationPath = @"..\..\result_format_date.xml";

            List<Article> listCurrentArticles = fileparserFP.parseFile(sourcePath);
            List<Annotation> listCurrentTrainingAnnotations = new List<Annotation>();
            if (isAnnotated)
            {
                 listCurrentTrainingAnnotations = fileparserFP.parseAnnotations(sourcePath);
            }
            List<List<Token>> listTokenizedArticles = new List<List<Token>>();
            List<List<Candidate>> listAllWhoCandidates = new List<List<Candidate>>();
            List<List<Candidate>> listAllWhenCandidates = new List<List<Candidate>>();
            List<List<Candidate>> listAllWhereCandidates = new List<List<Candidate>>();
            List<List<List<Token>>> listAllWhatCandidates = new List<List<List<Token>>>();
            List<List<List<Token>>> listAllWhyCandidates = new List<List<List<Token>>>();
            List<List<String>> listAllWhoAnnotations = new List<List<String>>();
            List<List<String>> listAllWhenAnnotations = new List<List<String>>();
            List<List<String>> listAllWhereAnnotations = new List<List<String>>();
            List<String> listAllWhatAnnotations = new List<String>();
            List<String> listAllWhyAnnotations = new List<String>();

            Preprocessor preprocessor = new Preprocessor();

            if (listCurrentArticles != null && listCurrentArticles.Count > 0 &&
                (!isAnnotated || (listCurrentTrainingAnnotations != null && listCurrentTrainingAnnotations.Count > 0 &&
                listCurrentArticles.Count == listCurrentTrainingAnnotations.Count)))
            {
                //Temporarily set to 2 because getting all articles takes longer run time
                for (int nI = 0; nI < listCurrentArticles.Count; nI++)
                {
                    preprocessor.setCurrentArticle(listCurrentArticles[nI]);
                    preprocessor.preprocess();

                    if (isAnnotated)
                    {
                        preprocessor.setCurrentAnnotation(listCurrentTrainingAnnotations[nI]);
                        preprocessor.performAnnotationAssignment();
                    }

                    listTokenizedArticles.Add(preprocessor.getLatestTokenizedArticle());
                    listAllWhoCandidates.Add(preprocessor.getWhoCandidates());
                    listAllWhenCandidates.Add(preprocessor.getWhenCandidates());
                    listAllWhereCandidates.Add(preprocessor.getWhereCandidates());
                    listAllWhatCandidates.Add(preprocessor.getWhatCandidates());
                    listAllWhyCandidates.Add(preprocessor.getWhyCandidates());
                }

                if (isAnnotated)
                {
                    /*Trainer trainer = new Trainer();
                    trainer.trainMany("who", listTokenizedArticles, listAllWhoCandidates);
                    trainer.trainMany("when", listTokenizedArticles, listAllWhenCandidates);
                    trainer.trainMany("where", listTokenizedArticles, listAllWhereCandidates);*/
                }
            }

            #region Candidate Selection Printer
            /*Candidate Selection Printer*/
            /*try
            {
                var whoCandidatesPath = @"..\..\candidates_who.txt";
                var whenCandidatesPath = @"..\..\candidates_when.txt";
                var whereCandidatesPath = @"..\..\candidates_where.txt";

                if (File.Exists(whoCandidatesPath)) File.Delete(whoCandidatesPath);
                if (File.Exists(whenCandidatesPath)) File.Delete(whenCandidatesPath);
                if (File.Exists(whereCandidatesPath)) File.Delete(whereCandidatesPath);

                using (StreamWriter sw = File.CreateText(whoCandidatesPath))
                {
                    for (int nI = 0; nI < listAllWhoCandidates.Count; nI++)
                    {
                        sw.WriteLine("#{0}:", nI);
                        foreach (var candidate in listAllWhoCandidates[nI])
                        {
                            sw.Write(candidate.Value + ", ");
                        }
                        sw.WriteLine("\n");
                    }
                }
                using (StreamWriter sw = File.CreateText(whenCandidatesPath))
                {
                    for (int nI = 0; nI < listAllWhenCandidates.Count; nI++)
                    {
                        sw.WriteLine("#{0}:", nI);
                        foreach (var candidate in listAllWhenCandidates[nI])
                        {
                            sw.Write(candidate.Value + ", ");
                        }
                        sw.WriteLine("\n");
                    }
                }
                using (StreamWriter sw = File.CreateText(whereCandidatesPath))
                {
                    for (int nI = 0; nI < listAllWhereCandidates.Count; nI++)
                    {
                        sw.WriteLine("#{0}:", nI);
                        foreach (var candidate in listAllWhereCandidates[nI])
                        {
                            sw.Write(candidate.Value + ", ");
                        }
                        sw.WriteLine("\n");
                    }
                }
            }
            catch (Exception e)
            {
                System.Console.WriteLine("Error with writing initial line of training dataset.");
            }*/
            #endregion

            WhatWhyTrainer wwt = new WhatWhyTrainer();
            wwt.startTrain();
            Identifier annotationIdentifier = new Identifier(isAnnotated, wwt);
            for (int nI = 0; nI < listCurrentArticles.Count; nI++)
            {
                annotationIdentifier.setCurrentArticle(listTokenizedArticles[nI]);
                annotationIdentifier.setWhoCandidates(listAllWhoCandidates[nI]);
                annotationIdentifier.setWhenCandidates(listAllWhenCandidates[nI]);
                annotationIdentifier.setWhereCandidates(listAllWhereCandidates[nI]);
                annotationIdentifier.setWhatCandidates(listAllWhatCandidates[nI]);
                annotationIdentifier.setWhyCandidates(listAllWhyCandidates[nI]);
                annotationIdentifier.setTitle(listCurrentArticles[nI].Title);
                if (isAnnotated)
                {
                    annotationIdentifier.setCurrentAnnotation(listCurrentTrainingAnnotations[nI]);
                }
                annotationIdentifier.labelAnnotations();
                listAllWhoAnnotations.Add(annotationIdentifier.getWho());
                listAllWhenAnnotations.Add(annotationIdentifier.getWhen());
                listAllWhereAnnotations.Add(annotationIdentifier.getWhere());
                listAllWhatAnnotations.Add(annotationIdentifier.getWhat());
                listAllWhyAnnotations.Add(annotationIdentifier.getWhy());
            }
            wwt.endTrain();

            /*ResultWriter rw = new ResultWriter(destinationPath, formatDateDestinationPath, invertedDestinationPath, listCurrentArticles, listAllWhoAnnotations, listAllWhenAnnotations, listAllWhereAnnotations, listAllWhatAnnotations, listAllWhyAnnotations);
            rw.generateOutput();
            rw.generateOutputFormatDate();
            rw.generateInvertedIndexOutput();*/
            //#endif
        }
Exemplo n.º 2
0
        public static void Main()
        {
            #if DEBUG
            Application.EnableVisualStyles();
            Application.SetCompatibleTextRenderingDefault(false);
            Application.Run(new Main());
            #else
            Boolean    isAnnotated               = true;
            FileParser fileparserFP              = new FileParser();
            String     sourcePath                = @"..\..\training_news.xml";
            String     destinationPath           = @"..\..\result.xml";
            String     invertedDestinationPath   = @"..\..\result_inverted_index.xml";
            String     formatDateDestinationPath = @"..\..\result_format_date.xml";

            List <Article>    listCurrentArticles            = fileparserFP.parseFile(sourcePath);
            List <Annotation> listCurrentTrainingAnnotations = new List <Annotation>();
            if (isAnnotated)
            {
                listCurrentTrainingAnnotations = fileparserFP.parseAnnotations(sourcePath);
            }
            List <List <Token> >         listTokenizedArticles   = new List <List <Token> >();
            List <List <Candidate> >     listAllWhoCandidates    = new List <List <Candidate> >();
            List <List <Candidate> >     listAllWhenCandidates   = new List <List <Candidate> >();
            List <List <Candidate> >     listAllWhereCandidates  = new List <List <Candidate> >();
            List <List <List <Token> > > listAllWhatCandidates   = new List <List <List <Token> > >();
            List <List <List <Token> > > listAllWhyCandidates    = new List <List <List <Token> > >();
            List <List <String> >        listAllWhoAnnotations   = new List <List <String> >();
            List <List <String> >        listAllWhenAnnotations  = new List <List <String> >();
            List <List <String> >        listAllWhereAnnotations = new List <List <String> >();
            List <String> listAllWhatAnnotations = new List <String>();
            List <String> listAllWhyAnnotations  = new List <String>();

            Preprocessor preprocessor = new Preprocessor();

            if (listCurrentArticles != null && listCurrentArticles.Count > 0 &&
                (!isAnnotated || (listCurrentTrainingAnnotations != null && listCurrentTrainingAnnotations.Count > 0 &&
                                  listCurrentArticles.Count == listCurrentTrainingAnnotations.Count)))
            {
                //Temporarily set to 2 because getting all articles takes longer run time
                for (int nI = 0; nI < listCurrentArticles.Count; nI++)
                {
                    preprocessor.setCurrentArticle(listCurrentArticles[nI]);
                    preprocessor.preprocess();

                    if (isAnnotated)
                    {
                        preprocessor.setCurrentAnnotation(listCurrentTrainingAnnotations[nI]);
                        preprocessor.performAnnotationAssignment();
                    }

                    listTokenizedArticles.Add(preprocessor.getLatestTokenizedArticle());
                    listAllWhoCandidates.Add(preprocessor.getWhoCandidates());
                    listAllWhenCandidates.Add(preprocessor.getWhenCandidates());
                    listAllWhereCandidates.Add(preprocessor.getWhereCandidates());
                    listAllWhatCandidates.Add(preprocessor.getWhatCandidates());
                    listAllWhyCandidates.Add(preprocessor.getWhyCandidates());
                }

                if (isAnnotated)
                {
                    /*Trainer trainer = new Trainer();
                     * trainer.trainMany("who", listTokenizedArticles, listAllWhoCandidates);
                     * trainer.trainMany("when", listTokenizedArticles, listAllWhenCandidates);
                     * trainer.trainMany("where", listTokenizedArticles, listAllWhereCandidates);*/
                }
            }

            #region Candidate Selection Printer
            /*Candidate Selection Printer*/

            /*try
             * {
             *  var whoCandidatesPath = @"..\..\candidates_who.txt";
             *  var whenCandidatesPath = @"..\..\candidates_when.txt";
             *  var whereCandidatesPath = @"..\..\candidates_where.txt";
             *
             *  if (File.Exists(whoCandidatesPath)) File.Delete(whoCandidatesPath);
             *  if (File.Exists(whenCandidatesPath)) File.Delete(whenCandidatesPath);
             *  if (File.Exists(whereCandidatesPath)) File.Delete(whereCandidatesPath);
             *
             *  using (StreamWriter sw = File.CreateText(whoCandidatesPath))
             *  {
             *      for (int nI = 0; nI < listAllWhoCandidates.Count; nI++)
             *      {
             *          sw.WriteLine("#{0}:", nI);
             *          foreach (var candidate in listAllWhoCandidates[nI])
             *          {
             *              sw.Write(candidate.Value + ", ");
             *          }
             *          sw.WriteLine("\n");
             *      }
             *  }
             *  using (StreamWriter sw = File.CreateText(whenCandidatesPath))
             *  {
             *      for (int nI = 0; nI < listAllWhenCandidates.Count; nI++)
             *      {
             *          sw.WriteLine("#{0}:", nI);
             *          foreach (var candidate in listAllWhenCandidates[nI])
             *          {
             *              sw.Write(candidate.Value + ", ");
             *          }
             *          sw.WriteLine("\n");
             *      }
             *  }
             *  using (StreamWriter sw = File.CreateText(whereCandidatesPath))
             *  {
             *      for (int nI = 0; nI < listAllWhereCandidates.Count; nI++)
             *      {
             *          sw.WriteLine("#{0}:", nI);
             *          foreach (var candidate in listAllWhereCandidates[nI])
             *          {
             *              sw.Write(candidate.Value + ", ");
             *          }
             *          sw.WriteLine("\n");
             *      }
             *  }
             * }
             * catch (Exception e)
             * {
             *  System.Console.WriteLine("Error with writing initial line of training dataset.");
             * }*/
            #endregion

            WhyTrainer wt = new WhyTrainer();
            if (isAnnotated)
            {
                wt.startTrain();
            }
            Identifier annotationIdentifier = new Identifier(isAnnotated, wt);
            for (int nI = 0; nI < listCurrentArticles.Count; nI++)
            {
                annotationIdentifier.setCurrentArticle(listTokenizedArticles[nI]);
                annotationIdentifier.setWhoCandidates(listAllWhoCandidates[nI]);
                annotationIdentifier.setWhenCandidates(listAllWhenCandidates[nI]);
                annotationIdentifier.setWhereCandidates(listAllWhereCandidates[nI]);
                annotationIdentifier.setWhatCandidates(listAllWhatCandidates[nI]);
                annotationIdentifier.setWhyCandidates(listAllWhyCandidates[nI]);
                annotationIdentifier.setTitle(listCurrentArticles[nI].Title);
                if (isAnnotated)
                {
                    annotationIdentifier.setCurrentAnnotation(listCurrentTrainingAnnotations[nI]);
                }
                annotationIdentifier.labelAnnotations();
                listAllWhoAnnotations.Add(annotationIdentifier.getWho());
                listAllWhenAnnotations.Add(annotationIdentifier.getWhen());
                listAllWhereAnnotations.Add(annotationIdentifier.getWhere());
                listAllWhatAnnotations.Add(annotationIdentifier.getWhat());
                listAllWhyAnnotations.Add(annotationIdentifier.getWhy());
            }
            if (isAnnotated)
            {
                wt.endTrain();
            }

            ResultWriter rw = new ResultWriter(destinationPath, formatDateDestinationPath, invertedDestinationPath, listCurrentArticles, listAllWhoAnnotations, listAllWhenAnnotations, listAllWhereAnnotations, listAllWhatAnnotations, listAllWhyAnnotations);
            rw.generateOutput();
            rw.generateOutputFormatDate();
            rw.generateInvertedIndexOutput();
            #endif
        }