コード例 #1
0
ファイル: Test_Unit_RegexValues.cs プロジェクト: 24/source_04
        //public static FindDate_old FindDate(FindDateManager findDateManager, string text)
        public static FindDate FindDate(FindDateManager_v1 findDateManager, string text)
        {
            //FindDate_old findDate = findDateManager.Find_old(text);
            FindDate findDate = findDateManager.Find(text);

            Trace.WriteLine("search date in  : \"{0}\"", text);
            if (findDate.Found)
            {
                //Trace.WriteLine("    found date  : \"{0}\"", findDate.regexValues.MatchValue_old);
                Trace.WriteLine("    found date  : \"{0}\"", findDate.matchValues.Match.Value);
                //Trace.WriteLine("    remain text : \"{0}\"", findDate.regexValues.MatchReplace_old("_"));
                Trace.WriteLine("    remain text : \"{0}\"", findDate.matchValues.Replace("_"));

                Trace.WriteLine("    date        : {0:dd-MM-yyyy} type {1}", findDate.Date, findDate.DateType);
                //Trace.Write("not found ");

                Trace.Write("    values      : ");
                //findDate.regexValues.GetValues_old().zTrace();
                findDate.matchValues.GetValues().zTrace();
                Trace.WriteLine();
                //nbDateFound++;
            }
            else
            {
                Trace.WriteLine("    date not found ");
            }
            Trace.WriteLine();
            return(findDate);
        }
コード例 #2
0
ファイル: PrintTitle.cs プロジェクト: 24/source_04
        private string FindDate(string title)
        {
            FindDate findDate = _printTitleManager.FindDateManager.Find(title, _expectedDate);

            if (findDate.Found)
            {
                _date      = findDate.Date;
                _dateType  = findDate.DateType;
                _dateMatch = findDate.matchValues;
                title      = findDate.matchValues.Replace(" $$date$$ ");
                _foundDate = true;
            }
            //_dateOtherMatchList = findDate.matchValuesList;
            return(title);
        }
コード例 #3
0
ファイル: Test_Unit_RegexValues.cs プロジェクト: 24/source_04
        public static void Test_FindDate_02(FindDateManager_v1 findDateManager, string file)
        {
            file = zPath.Combine(GetDirectoryDate(), file);
            int year = Date.Today.Year;

            findDateManager.DateRegexList.Add("year", new RegexValues("year", "year", string.Format(@"(?:^|[_\s])({0}|{1}|{2})(?:$|[_\s])", year - 1, year, year + 1), "IgnoreCase", "year", compileRegex: true));
            string traceFile = zpath.PathSetFileNameWithoutExtension(file, zPath.GetFileNameWithoutExtension(file) + "_out");

            Trace.WriteLine("Test_FindDate \"{0}\" (nb date regex {1})", file, findDateManager.DateRegexList.Count);
            //Trace.CurrentTrace.DisableBaseLog();
            Trace.CurrentTrace.DisableViewer = true;
            //Trace.CurrentTrace.AddTraceFile(traceFile, LogOptions.RazLogFile);
            Trace.CurrentTrace.AddOnWrite("Test_FindDate_02", WriteToFile.Create(traceFile, FileOption.RazFile).Write);
            try
            {
                DateTime dt = DateTime.Now;
                TraceRegexValuesList(findDateManager.DateRegexList);
                Trace.WriteLine();
                int nb          = 0;
                int nbDateFound = 0;
                foreach (BsonDocument document in zMongo.BsonRead <BsonDocument>(file))
                {
                    string text = document["text"].AsString;

                    //FindDate_old findDate = FindDate(findDateManager, text);
                    FindDate findDate = FindDate(findDateManager, text);

                    if (findDate.Found)
                    {
                        nbDateFound++;
                    }

                    nb++;
                }
                Trace.WriteLine();
                Trace.WriteLine();
                Trace.WriteLine("search date in {0} text", nb);
                Trace.WriteLine("found date in {0} text", nbDateFound);
                Trace.WriteLine("test duration {0}", DateTime.Now - dt);
            }
            finally
            {
                //Trace.CurrentTrace.EnableBaseLog();
                //Trace.CurrentTrace.RemoveTraceFile(traceFile);
                Trace.CurrentTrace.RemoveOnWrite("Test_FindDate_02");
                Trace.CurrentTrace.DisableViewer = false;
            }
        }
コード例 #4
0
ファイル: Test_Unit_RegexValues.cs プロジェクト: 24/source_04
        public static TestFindDate FindDateNew(FindDateManager findDateManager, string text)
        {
            //FindDate_old findDate = findDateManager.Find_old(text);
            FindDate     findDate     = findDateManager.Find(text);
            TestFindDate testFindDate = new TestFindDate();

            testFindDate.text      = text;
            testFindDate.foundDate = findDate.Found;
            if (findDate.Found)
            {
                //testFindDate.dateFound = findDate.regexValues.MatchValue_old.Value;
                testFindDate.dateFound = findDate.matchValues.Match.Value;
                testFindDate.date      = findDate.Date;
                testFindDate.dateType  = findDate.DateType;
                //testFindDate.remainText = findDate.regexValues.MatchReplace_old("_");
                testFindDate.remainText = findDate.matchValues.Replace("_");
                //testFindDate.namedValues = findDate.regexValues.GetValues_old();
                testFindDate.namedValues = findDate.matchValues.GetValues();
            }
            return(testFindDate);
        }
コード例 #5
0
ファイル: Test_Unit_RegexValues.cs プロジェクト: 24/source_04
 public static IEnumerable <TestFindDate> zFindDate(this IEnumerable <TestText> textList, FindDateManager_v1 findDateManager)
 {
     foreach (TestText text in textList)
     {
         //FindDate_old findDate = findDateManager.Find_old(text.text);
         FindDate     findDate     = findDateManager.Find(text.text);
         TestFindDate testFindDate = new TestFindDate();
         testFindDate.text      = text.text;
         testFindDate.foundDate = findDate.Found;
         if (findDate.Found)
         {
             //testFindDate.dateFound = findDate.regexValues.MatchValue_old.Value;
             testFindDate.dateFound = findDate.matchValues.Match.Value;
             testFindDate.date      = findDate.Date;
             testFindDate.dateType  = findDate.DateType;
             //testFindDate.remainText = findDate.regexValues.MatchReplace_old("_");
             testFindDate.remainText = findDate.matchValues.Replace("_");
             //testFindDate.namedValues = findDate.regexValues.GetValues_old();
             testFindDate.namedValues = findDate.matchValues.GetValues();
         }
         yield return(testFindDate);
     }
 }
コード例 #6
0
ファイル: PrintTitleManager.cs プロジェクト: 24/source_04
        //private PrintTitleInfo _GetPrintTitleInfo_v1(string title)
        //{
        //    PrintTitleInfo titleInfo = new PrintTitleInfo();
        //    titleInfo.originalTitle = title;
        //    string titleStructure = title;

        //    FindText findSpecial = _findSpecial.Find(titleStructure);
        //    if (findSpecial.found)
        //    {
        //        titleInfo.special = true;
        //        titleInfo.specialMatch = findSpecial.matchValues;
        //        titleStructure = findSpecial.matchValues.Replace(" $$special$$ ");
        //    }

        //    FindNumber findNumber = _findNumber.Find(titleStructure);
        //    if (findNumber.found)
        //    {
        //        titleInfo.number = findNumber.number;
        //        titleInfo.numberMatch = findNumber.matchValues;
        //        titleStructure = findNumber.matchValues.Replace(" $$number$$ ");
        //    }

        //    FindDate findDate = _findDateManager_v1.Find(titleStructure);
        //    if (findDate.found)
        //    {
        //        titleInfo.date = findDate.date;
        //        titleInfo.dateType = findDate.dateType;
        //        titleInfo.dateMatch = findDate.matchValues;
        //        titleStructure = findDate.matchValues.Replace(" $$date$$ ");
        //    }
        //    titleInfo.dateOtherMatchList = findDate.matchValuesList;

        //    Match match = __rgSpecialLabel.Match(titleStructure);
        //    if (match.Success)
        //    {
        //        titleInfo.specialText = match.Groups[1].Value.Trim(__trimChars);
        //        titleInfo.specialText = GetFormatedText(titleInfo.specialText);
        //        titleStructure = match.zReplace(titleStructure, "");
        //    }

        //    int i = titleStructure.IndexOf("$$");
        //    if (i != -1)
        //    {
        //        titleInfo.title = titleStructure.Substring(0, i).Trim(__trimChars);
        //        titleStructure = titleStructure.Substring(i);
        //    }
        //    else
        //        titleInfo.title = titleStructure;

        //    titleInfo.titleStructure = titleStructure;
        //    titleInfo.formatedTitle = GetFormatedText(titleInfo.title);
        //    titleInfo.name = GetName(titleInfo.formatedTitle);
        //    titleInfo.remainText = __rgTitleStructureName.Replace(titleStructure, "").Trim(__trimChars);

        //    titleInfo.file = GetFile(titleInfo);

        //    return titleInfo;
        //}

        /// not used
        //private PrintTitleInfo _GetPrintTitleInfo_v2(string title)
        //{
        //    PrintTitleRequest titleRequest = new PrintTitleRequest();

        //    titleRequest.originalTitle = title;

        //    PrintSplitedTitle splitedTitle = SplitTitle(title);

        //    FindText findSpecial = _findSpecial.Find(splitedTitle.titlePart1);
        //    if (findSpecial.found)
        //    {
        //        titleRequest.special = true;
        //        splitedTitle.titlePart1 = findSpecial.matchValues.Replace(" $$special$$ ");
        //    }

        //    FindNumber findNumber = _findNumber.Find(splitedTitle.titlePart1);
        //    if (findNumber.found)
        //    {
        //        titleRequest.number = findNumber.number;
        //        splitedTitle.titlePart1 = findNumber.matchValues.Replace(" $$number$$ ");
        //    }

        //    FindDate findDate = _findDateManager_new.Find(splitedTitle.titlePart2);
        //    if (findDate.found)
        //    {
        //        titleRequest.date = findDate.date;
        //        titleRequest.dateType = findDate.dateType;
        //        splitedTitle.titlePart2 = findDate.matchValues.Replace(" $$date$$ ");
        //    }

        //    Match match = __rgSpecialLabel.Match(splitedTitle.titlePart1);
        //    if (match.Success)
        //    {
        //        titleRequest.specialText = match.Groups[1].Value.Trim(__trimChars);
        //        titleRequest.specialText = GetFormatedText(titleRequest.specialText);
        //        splitedTitle.titlePart1 = match.zReplace(splitedTitle.titlePart1, "");
        //    }

        //    //Trace.WriteLine("titleRequest :");
        //    //Trace.WriteLine(titleRequest.zToJson());
        //    //Trace.WriteLine("splitedTitle :");
        //    //Trace.WriteLine(splitedTitle.zToJson());

        //    string concatenatedTitle = splitedTitle.titlePart1;
        //    if (!string.IsNullOrEmpty(splitedTitle.realTitlePart2))
        //        concatenatedTitle += " - " + splitedTitle.realTitlePart2;
        //    int i = concatenatedTitle.IndexOf("$$");
        //    if (i != -1)
        //    {
        //        titleRequest.title = concatenatedTitle.Substring(0, i).Trim(__trimChars);
        //        concatenatedTitle = concatenatedTitle.Substring(i);
        //    }
        //    else
        //    {
        //        titleRequest.title = concatenatedTitle;
        //        concatenatedTitle = null;
        //    }

        //    titleRequest.titleStructure = concatenatedTitle;
        //    titleRequest.formatedTitle = GetFormatedText(titleRequest.title);
        //    titleRequest.name = GetName(titleRequest.formatedTitle);
        //    if (concatenatedTitle != null)
        //        titleRequest.remainText = __rgTitleStructureName.Replace(concatenatedTitle, "").Trim(__trimChars);

        //    titleRequest.file = GetFile(titleRequest);

        //    PrintTitleInfo titleInfo = new PrintTitleInfo
        //        {
        //            originalTitle = titleRequest.originalTitle,
        //            title = titleRequest.title,
        //            formatedTitle = titleRequest.formatedTitle,
        //            name = titleRequest.name,
        //            special = titleRequest.special,
        //            specialText = titleRequest.specialText,
        //            number = titleRequest.number,
        //            date = titleRequest.date,
        //            dateType = titleRequest.dateType,
        //            titleStructure = titleRequest.titleStructure,
        //            remainText = titleRequest.remainText,
        //            file = titleRequest.file
        //        };
        //    return titleInfo;
        //}

        /// new split
        private PrintTitleInfo _GetPrintTitleInfo(string title, bool splitTitle, Date?expectedDate)
        {
            // pourquoi split :
            //     "Le Parisien + Votre été du dimanche 24 août 2014"                               date = "été du dimanche 24 août 2014"
            //     "Le Parisien + Votre été du dimanche 24 août 2014"                               date = "été du dimanche 24 août 2014"
            //     "Le Parisien + Votre été (la France en fête) du dimanche 20 juillet 2014"        date = "été"
            //     "Le Monde + Eco&Entreprise + journal de 1994 du mardi 08 avril 2014"             date = "de 1994"
            //     "Le Monde de l'Image 84  - 2013"                                                 date not found
            //     "Le Monde de L'Intelligence 29 - Février-Mars 2013"                              date not found
            //     "Le Monde des Sciences 7 - Février-Mars 2013"                                    date = "7 - Février-Mars 2013"

            PrintTitleRequest titleRequest = new PrintTitleRequest();

            titleRequest.OriginalTitle = title;

            title = ReplaceCharacters(title);

            // new le 11/08/2015
            //title = GetFormatedText(title);

            bool foundDate = false;

            if (splitTitle)
            {
                // split d'abord avec "du" puis avec "-"
                int i1 = title.LastIndexOf(" du ", StringComparison.InvariantCultureIgnoreCase);
                int i2 = title.LastIndexOf("- ");
                int i3 = Math.Max(i1, i2);
                if (i3 != -1)
                {
                    string title1 = title.Substring(0, i3);
                    string title2 = title.Substring(i3);

                    FindDate findDate = _findDateManager.Find(title2, expectedDate);
                    if (findDate.Found)
                    {
                        titleRequest.Date      = findDate.Date;
                        titleRequest.DateType  = findDate.DateType;
                        titleRequest.DateMatch = findDate.matchValues;
                        title2    = findDate.matchValues.Replace(" $$date$$ ");
                        title     = title1 + title2;
                        foundDate = true;
                    }
                    //titleRequest.DateOtherMatchList = findDate.matchValuesList;
                }

                if (!foundDate)
                {
                    // puis split avec "-"
                    i3 = Math.Min(i1, i2);
                    i3 = title.IndexOf("- ");
                    if (i3 != -1)
                    {
                        // attention i + 1 pour garder un espace en début de chaine
                        //return new PrintSplitedTitle(title.Substring(0, i), title.Substring(i + 1));

                        string title1 = title.Substring(0, i3);
                        string title2 = title.Substring(i3);

                        FindDate findDate = _findDateManager.Find(title2, expectedDate);
                        if (findDate.Found)
                        {
                            titleRequest.Date      = findDate.Date;
                            titleRequest.DateType  = findDate.DateType;
                            titleRequest.DateMatch = findDate.matchValues;
                            title2    = findDate.matchValues.Replace(" $$date$$ ");
                            title     = title1 + title2;
                            foundDate = true;
                        }
                        //titleRequest.DateOtherMatchList = findDate.matchValuesList;
                    }
                }
            }

            //FindText findSpecial = _findSpecial.Find(title);
            FindText_v2 findSpecial = _findSpecial.Find(title);

            if (findSpecial.Success)
            {
                titleRequest.Special = true;
                //titleRequest.SpecialMatch = findSpecial.matchValues;
                //title = findSpecial.matchValues.Replace(" $$special$$ ");
                title = findSpecial.Replace(" $$special$$ ");
            }

            FindNumber findNumber = _findNumberManager.Find(title);

            if (findNumber.found)
            {
                titleRequest.Number      = findNumber.number;
                titleRequest.NumberMatch = findNumber.matchValues;
                title = findNumber.matchValues.Replace(" $$number$$ ");
            }

            if (!foundDate)
            {
                FindDate findDate = _findDateManager.Find(title, expectedDate);
                //Trace.WriteLine("PrintTitleManager._GetPrintTitleInfo() : _findDateManager.Find(\"{0}\")", title);
                //Trace.WriteLine(findDate.zToJson());
                if (findDate.Found)
                {
                    titleRequest.Date      = findDate.Date;
                    titleRequest.DateType  = findDate.DateType;
                    titleRequest.DateMatch = findDate.matchValues;
                    title = findDate.matchValues.Replace(" $$date$$ ");
                }
                //titleRequest.DateOtherMatchList = findDate.matchValuesList;
            }

            Match match = __rgSpecialLabel.Match(title);

            if (match.Success)
            {
                titleRequest.SpecialText = match.Groups[1].Value.Trim(__trimChars);
                titleRequest.SpecialText = GetFormatedText(titleRequest.SpecialText);
                title = match.zReplace(title, "");
            }

            //Trace.WriteLine("titleRequest :");
            //Trace.WriteLine(titleRequest.zToJson());
            //Trace.WriteLine("splitedTitle :");
            //Trace.WriteLine(splitedTitle.zToJson());

            //string concatenatedTitle = splitedTitle.titlePart1;
            //if (!string.IsNullOrEmpty(splitedTitle.realTitlePart2))
            //    concatenatedTitle += " - " + splitedTitle.realTitlePart2;
            int i = title.IndexOf("$$");

            if (i != -1)
            {
                titleRequest.Title = title.Substring(0, i).Trim(__trimChars);
                title = title.Substring(i);
            }
            else
            {
                titleRequest.Title = title;
                title = null;
            }

            titleRequest.TitleStructure = title;
            titleRequest.FormatedTitle  = GetFormatedText(titleRequest.Title);
            titleRequest.Name           = GetName(titleRequest.FormatedTitle);
            if (title != null)
            {
                titleRequest.RemainText = __rgTitleStructureName.Replace(title, "").Trim(__trimChars);
            }

            //titleRequest.File = GetFile(titleRequest);

            PrintTitleInfo titleInfo = new PrintTitleInfo
            {
                OriginalTitle = titleRequest.OriginalTitle,
                Title         = titleRequest.Title,
                FormatedTitle = titleRequest.FormatedTitle,
                Name          = titleRequest.Name,
                Special       = titleRequest.Special,
                //SpecialMatch = titleRequest.SpecialMatch,
                SpecialText = titleRequest.SpecialText,
                Number      = titleRequest.Number,
                NumberMatch = titleRequest.NumberMatch,
                Date        = titleRequest.Date,
                DateType    = titleRequest.DateType,
                DateMatch   = titleRequest.DateMatch,
                //DateOtherMatchList = titleRequest.DateOtherMatchList,
                TitleStructure = titleRequest.TitleStructure,
                RemainText     = titleRequest.RemainText,
                //File = titleRequest.File
            };

            return(titleInfo);
        }