//public static FindDate_old FindDate(FindDateManager findDateManager, string text) public static FindDate FindDate(FindDateManager_v1 findDateManager, string text) { //FindDate_old findDate = findDateManager.Find_old(text); FindDate findDate = findDateManager.Find(text); Trace.WriteLine("search date in : \"{0}\"", text); if (findDate.Found) { //Trace.WriteLine(" found date : \"{0}\"", findDate.regexValues.MatchValue_old); Trace.WriteLine(" found date : \"{0}\"", findDate.matchValues.Match.Value); //Trace.WriteLine(" remain text : \"{0}\"", findDate.regexValues.MatchReplace_old("_")); Trace.WriteLine(" remain text : \"{0}\"", findDate.matchValues.Replace("_")); Trace.WriteLine(" date : {0:dd-MM-yyyy} type {1}", findDate.Date, findDate.DateType); //Trace.Write("not found "); Trace.Write(" values : "); //findDate.regexValues.GetValues_old().zTrace(); findDate.matchValues.GetValues().zTrace(); Trace.WriteLine(); //nbDateFound++; } else { Trace.WriteLine(" date not found "); } Trace.WriteLine(); return(findDate); }
private string FindDate(string title) { FindDate findDate = _printTitleManager.FindDateManager.Find(title, _expectedDate); if (findDate.Found) { _date = findDate.Date; _dateType = findDate.DateType; _dateMatch = findDate.matchValues; title = findDate.matchValues.Replace(" $$date$$ "); _foundDate = true; } //_dateOtherMatchList = findDate.matchValuesList; return(title); }
public static void Test_FindDate_02(FindDateManager_v1 findDateManager, string file) { file = zPath.Combine(GetDirectoryDate(), file); int year = Date.Today.Year; findDateManager.DateRegexList.Add("year", new RegexValues("year", "year", string.Format(@"(?:^|[_\s])({0}|{1}|{2})(?:$|[_\s])", year - 1, year, year + 1), "IgnoreCase", "year", compileRegex: true)); string traceFile = zpath.PathSetFileNameWithoutExtension(file, zPath.GetFileNameWithoutExtension(file) + "_out"); Trace.WriteLine("Test_FindDate \"{0}\" (nb date regex {1})", file, findDateManager.DateRegexList.Count); //Trace.CurrentTrace.DisableBaseLog(); Trace.CurrentTrace.DisableViewer = true; //Trace.CurrentTrace.AddTraceFile(traceFile, LogOptions.RazLogFile); Trace.CurrentTrace.AddOnWrite("Test_FindDate_02", WriteToFile.Create(traceFile, FileOption.RazFile).Write); try { DateTime dt = DateTime.Now; TraceRegexValuesList(findDateManager.DateRegexList); Trace.WriteLine(); int nb = 0; int nbDateFound = 0; foreach (BsonDocument document in zMongo.BsonRead <BsonDocument>(file)) { string text = document["text"].AsString; //FindDate_old findDate = FindDate(findDateManager, text); FindDate findDate = FindDate(findDateManager, text); if (findDate.Found) { nbDateFound++; } nb++; } Trace.WriteLine(); Trace.WriteLine(); Trace.WriteLine("search date in {0} text", nb); Trace.WriteLine("found date in {0} text", nbDateFound); Trace.WriteLine("test duration {0}", DateTime.Now - dt); } finally { //Trace.CurrentTrace.EnableBaseLog(); //Trace.CurrentTrace.RemoveTraceFile(traceFile); Trace.CurrentTrace.RemoveOnWrite("Test_FindDate_02"); Trace.CurrentTrace.DisableViewer = false; } }
public static TestFindDate FindDateNew(FindDateManager findDateManager, string text) { //FindDate_old findDate = findDateManager.Find_old(text); FindDate findDate = findDateManager.Find(text); TestFindDate testFindDate = new TestFindDate(); testFindDate.text = text; testFindDate.foundDate = findDate.Found; if (findDate.Found) { //testFindDate.dateFound = findDate.regexValues.MatchValue_old.Value; testFindDate.dateFound = findDate.matchValues.Match.Value; testFindDate.date = findDate.Date; testFindDate.dateType = findDate.DateType; //testFindDate.remainText = findDate.regexValues.MatchReplace_old("_"); testFindDate.remainText = findDate.matchValues.Replace("_"); //testFindDate.namedValues = findDate.regexValues.GetValues_old(); testFindDate.namedValues = findDate.matchValues.GetValues(); } return(testFindDate); }
public static IEnumerable <TestFindDate> zFindDate(this IEnumerable <TestText> textList, FindDateManager_v1 findDateManager) { foreach (TestText text in textList) { //FindDate_old findDate = findDateManager.Find_old(text.text); FindDate findDate = findDateManager.Find(text.text); TestFindDate testFindDate = new TestFindDate(); testFindDate.text = text.text; testFindDate.foundDate = findDate.Found; if (findDate.Found) { //testFindDate.dateFound = findDate.regexValues.MatchValue_old.Value; testFindDate.dateFound = findDate.matchValues.Match.Value; testFindDate.date = findDate.Date; testFindDate.dateType = findDate.DateType; //testFindDate.remainText = findDate.regexValues.MatchReplace_old("_"); testFindDate.remainText = findDate.matchValues.Replace("_"); //testFindDate.namedValues = findDate.regexValues.GetValues_old(); testFindDate.namedValues = findDate.matchValues.GetValues(); } yield return(testFindDate); } }
//private PrintTitleInfo _GetPrintTitleInfo_v1(string title) //{ // PrintTitleInfo titleInfo = new PrintTitleInfo(); // titleInfo.originalTitle = title; // string titleStructure = title; // FindText findSpecial = _findSpecial.Find(titleStructure); // if (findSpecial.found) // { // titleInfo.special = true; // titleInfo.specialMatch = findSpecial.matchValues; // titleStructure = findSpecial.matchValues.Replace(" $$special$$ "); // } // FindNumber findNumber = _findNumber.Find(titleStructure); // if (findNumber.found) // { // titleInfo.number = findNumber.number; // titleInfo.numberMatch = findNumber.matchValues; // titleStructure = findNumber.matchValues.Replace(" $$number$$ "); // } // FindDate findDate = _findDateManager_v1.Find(titleStructure); // if (findDate.found) // { // titleInfo.date = findDate.date; // titleInfo.dateType = findDate.dateType; // titleInfo.dateMatch = findDate.matchValues; // titleStructure = findDate.matchValues.Replace(" $$date$$ "); // } // titleInfo.dateOtherMatchList = findDate.matchValuesList; // Match match = __rgSpecialLabel.Match(titleStructure); // if (match.Success) // { // titleInfo.specialText = match.Groups[1].Value.Trim(__trimChars); // titleInfo.specialText = GetFormatedText(titleInfo.specialText); // titleStructure = match.zReplace(titleStructure, ""); // } // int i = titleStructure.IndexOf("$$"); // if (i != -1) // { // titleInfo.title = titleStructure.Substring(0, i).Trim(__trimChars); // titleStructure = titleStructure.Substring(i); // } // else // titleInfo.title = titleStructure; // titleInfo.titleStructure = titleStructure; // titleInfo.formatedTitle = GetFormatedText(titleInfo.title); // titleInfo.name = GetName(titleInfo.formatedTitle); // titleInfo.remainText = __rgTitleStructureName.Replace(titleStructure, "").Trim(__trimChars); // titleInfo.file = GetFile(titleInfo); // return titleInfo; //} /// not used //private PrintTitleInfo _GetPrintTitleInfo_v2(string title) //{ // PrintTitleRequest titleRequest = new PrintTitleRequest(); // titleRequest.originalTitle = title; // PrintSplitedTitle splitedTitle = SplitTitle(title); // FindText findSpecial = _findSpecial.Find(splitedTitle.titlePart1); // if (findSpecial.found) // { // titleRequest.special = true; // splitedTitle.titlePart1 = findSpecial.matchValues.Replace(" $$special$$ "); // } // FindNumber findNumber = _findNumber.Find(splitedTitle.titlePart1); // if (findNumber.found) // { // titleRequest.number = findNumber.number; // splitedTitle.titlePart1 = findNumber.matchValues.Replace(" $$number$$ "); // } // FindDate findDate = _findDateManager_new.Find(splitedTitle.titlePart2); // if (findDate.found) // { // titleRequest.date = findDate.date; // titleRequest.dateType = findDate.dateType; // splitedTitle.titlePart2 = findDate.matchValues.Replace(" $$date$$ "); // } // Match match = __rgSpecialLabel.Match(splitedTitle.titlePart1); // if (match.Success) // { // titleRequest.specialText = match.Groups[1].Value.Trim(__trimChars); // titleRequest.specialText = GetFormatedText(titleRequest.specialText); // splitedTitle.titlePart1 = match.zReplace(splitedTitle.titlePart1, ""); // } // //Trace.WriteLine("titleRequest :"); // //Trace.WriteLine(titleRequest.zToJson()); // //Trace.WriteLine("splitedTitle :"); // //Trace.WriteLine(splitedTitle.zToJson()); // string concatenatedTitle = splitedTitle.titlePart1; // if (!string.IsNullOrEmpty(splitedTitle.realTitlePart2)) // concatenatedTitle += " - " + splitedTitle.realTitlePart2; // int i = concatenatedTitle.IndexOf("$$"); // if (i != -1) // { // titleRequest.title = concatenatedTitle.Substring(0, i).Trim(__trimChars); // concatenatedTitle = concatenatedTitle.Substring(i); // } // else // { // titleRequest.title = concatenatedTitle; // concatenatedTitle = null; // } // titleRequest.titleStructure = concatenatedTitle; // titleRequest.formatedTitle = GetFormatedText(titleRequest.title); // titleRequest.name = GetName(titleRequest.formatedTitle); // if (concatenatedTitle != null) // titleRequest.remainText = __rgTitleStructureName.Replace(concatenatedTitle, "").Trim(__trimChars); // titleRequest.file = GetFile(titleRequest); // PrintTitleInfo titleInfo = new PrintTitleInfo // { // originalTitle = titleRequest.originalTitle, // title = titleRequest.title, // formatedTitle = titleRequest.formatedTitle, // name = titleRequest.name, // special = titleRequest.special, // specialText = titleRequest.specialText, // number = titleRequest.number, // date = titleRequest.date, // dateType = titleRequest.dateType, // titleStructure = titleRequest.titleStructure, // remainText = titleRequest.remainText, // file = titleRequest.file // }; // return titleInfo; //} /// new split private PrintTitleInfo _GetPrintTitleInfo(string title, bool splitTitle, Date?expectedDate) { // pourquoi split : // "Le Parisien + Votre été du dimanche 24 août 2014" date = "été du dimanche 24 août 2014" // "Le Parisien + Votre été du dimanche 24 août 2014" date = "été du dimanche 24 août 2014" // "Le Parisien + Votre été (la France en fête) du dimanche 20 juillet 2014" date = "été" // "Le Monde + Eco&Entreprise + journal de 1994 du mardi 08 avril 2014" date = "de 1994" // "Le Monde de l'Image 84 - 2013" date not found // "Le Monde de L'Intelligence 29 - Février-Mars 2013" date not found // "Le Monde des Sciences 7 - Février-Mars 2013" date = "7 - Février-Mars 2013" PrintTitleRequest titleRequest = new PrintTitleRequest(); titleRequest.OriginalTitle = title; title = ReplaceCharacters(title); // new le 11/08/2015 //title = GetFormatedText(title); bool foundDate = false; if (splitTitle) { // split d'abord avec "du" puis avec "-" int i1 = title.LastIndexOf(" du ", StringComparison.InvariantCultureIgnoreCase); int i2 = title.LastIndexOf("- "); int i3 = Math.Max(i1, i2); if (i3 != -1) { string title1 = title.Substring(0, i3); string title2 = title.Substring(i3); FindDate findDate = _findDateManager.Find(title2, expectedDate); if (findDate.Found) { titleRequest.Date = findDate.Date; titleRequest.DateType = findDate.DateType; titleRequest.DateMatch = findDate.matchValues; title2 = findDate.matchValues.Replace(" $$date$$ "); title = title1 + title2; foundDate = true; } //titleRequest.DateOtherMatchList = findDate.matchValuesList; } if (!foundDate) { // puis split avec "-" i3 = Math.Min(i1, i2); i3 = title.IndexOf("- "); if (i3 != -1) { // attention i + 1 pour garder un espace en début de chaine //return new PrintSplitedTitle(title.Substring(0, i), title.Substring(i + 1)); string title1 = title.Substring(0, i3); string title2 = title.Substring(i3); FindDate findDate = _findDateManager.Find(title2, expectedDate); if (findDate.Found) { titleRequest.Date = findDate.Date; titleRequest.DateType = findDate.DateType; titleRequest.DateMatch = findDate.matchValues; title2 = findDate.matchValues.Replace(" $$date$$ "); title = title1 + title2; foundDate = true; } //titleRequest.DateOtherMatchList = findDate.matchValuesList; } } } //FindText findSpecial = _findSpecial.Find(title); FindText_v2 findSpecial = _findSpecial.Find(title); if (findSpecial.Success) { titleRequest.Special = true; //titleRequest.SpecialMatch = findSpecial.matchValues; //title = findSpecial.matchValues.Replace(" $$special$$ "); title = findSpecial.Replace(" $$special$$ "); } FindNumber findNumber = _findNumberManager.Find(title); if (findNumber.found) { titleRequest.Number = findNumber.number; titleRequest.NumberMatch = findNumber.matchValues; title = findNumber.matchValues.Replace(" $$number$$ "); } if (!foundDate) { FindDate findDate = _findDateManager.Find(title, expectedDate); //Trace.WriteLine("PrintTitleManager._GetPrintTitleInfo() : _findDateManager.Find(\"{0}\")", title); //Trace.WriteLine(findDate.zToJson()); if (findDate.Found) { titleRequest.Date = findDate.Date; titleRequest.DateType = findDate.DateType; titleRequest.DateMatch = findDate.matchValues; title = findDate.matchValues.Replace(" $$date$$ "); } //titleRequest.DateOtherMatchList = findDate.matchValuesList; } Match match = __rgSpecialLabel.Match(title); if (match.Success) { titleRequest.SpecialText = match.Groups[1].Value.Trim(__trimChars); titleRequest.SpecialText = GetFormatedText(titleRequest.SpecialText); title = match.zReplace(title, ""); } //Trace.WriteLine("titleRequest :"); //Trace.WriteLine(titleRequest.zToJson()); //Trace.WriteLine("splitedTitle :"); //Trace.WriteLine(splitedTitle.zToJson()); //string concatenatedTitle = splitedTitle.titlePart1; //if (!string.IsNullOrEmpty(splitedTitle.realTitlePart2)) // concatenatedTitle += " - " + splitedTitle.realTitlePart2; int i = title.IndexOf("$$"); if (i != -1) { titleRequest.Title = title.Substring(0, i).Trim(__trimChars); title = title.Substring(i); } else { titleRequest.Title = title; title = null; } titleRequest.TitleStructure = title; titleRequest.FormatedTitle = GetFormatedText(titleRequest.Title); titleRequest.Name = GetName(titleRequest.FormatedTitle); if (title != null) { titleRequest.RemainText = __rgTitleStructureName.Replace(title, "").Trim(__trimChars); } //titleRequest.File = GetFile(titleRequest); PrintTitleInfo titleInfo = new PrintTitleInfo { OriginalTitle = titleRequest.OriginalTitle, Title = titleRequest.Title, FormatedTitle = titleRequest.FormatedTitle, Name = titleRequest.Name, Special = titleRequest.Special, //SpecialMatch = titleRequest.SpecialMatch, SpecialText = titleRequest.SpecialText, Number = titleRequest.Number, NumberMatch = titleRequest.NumberMatch, Date = titleRequest.Date, DateType = titleRequest.DateType, DateMatch = titleRequest.DateMatch, //DateOtherMatchList = titleRequest.DateOtherMatchList, TitleStructure = titleRequest.TitleStructure, RemainText = titleRequest.RemainText, //File = titleRequest.File }; return(titleInfo); }