Beispiel #1
0
        public static List <PhraseData> GetPhrases(string myXmlFile)
        {
            var phrases = new List <PhraseData>();

            try
            {
                var completeText = ReadNews(myXmlFile);

                var idThread         = Thread.CurrentThread.ManagedThreadId;
                var fileNameOriginal = "Text" + idThread + ".txt";
                var fileNamePhrases  = "Phrases" + idThread + ".txt";
                var fileNameCommand  = "Split" + idThread + ".bat";

                if (File.Exists(fileNameOriginal))
                {
                    File.Delete(fileNameOriginal);
                }
                if (File.Exists(fileNamePhrases))
                {
                    File.Delete(fileNamePhrases);
                }
                if (File.Exists(fileNameCommand))
                {
                    File.Delete(fileNameCommand);
                }

                File.AppendAllText(fileNameOriginal, completeText);
                var command = @"c:\python27\python splitta\sbd.py -m splitta\model_nb " + fileNameOriginal + " > " + fileNamePhrases;
                File.AppendAllText(fileNameCommand, command);

                var infoProcess = new ProcessStartInfo(fileNameCommand)
                {
                    WindowStyle = ProcessWindowStyle.Hidden
                };
                var myProcess = new Process {
                    StartInfo = infoProcess
                };
                myProcess.Start();
                myProcess.WaitForExit();

                var lines    = File.ReadAllLines(fileNamePhrases);
                var position = 1;

                foreach (var line in lines)
                {
                    if (line.Length == 0)
                    {
                        continue;
                    }
                    var processed = LuceneIndexer.TextProcessing(line);
                    if (processed.Length == 0)
                    {
                        continue;
                    }
                    var newPhrase = new PhraseData(line, processed, position);
                    position++;
                    phrases.Add(newPhrase);
                }

                if (File.Exists(fileNameOriginal))
                {
                    File.Delete(fileNameOriginal);
                }
                if (File.Exists(fileNamePhrases))
                {
                    File.Delete(fileNamePhrases);
                }
                if (File.Exists(fileNameCommand))
                {
                    File.Delete(fileNameCommand);
                }
            }
            catch (Exception e)
            {
                Console.WriteLine("ERROR XMLFilePreProcessing.GetPhrases : " + e.Message);
            }
            return(phrases);
        }