protected virtual IEnumerable <Clipping> ParseStreamFromPath(string path, FormatType format) { //Open stream via path to the .txt file. var stream = new FileStream(path, FileMode.Open); using (var sr = new StreamReader(stream)) { int lineNumber = 0; string line = null; int clippingLineNumber = 0; Clipping clipping = new Clipping(); while ((line = sr.ReadLine()) != null) { lineNumber++; if (line == ClippingSeparator) { yield return(clipping); clippingLineNumber = 0; clipping = new Clipping(); } else { clippingLineNumber++; } ParseClipping(clippingLineNumber, line, clipping, format); } } }
//TODO Too many parameters, simplify protected virtual void ParseDateExact(string[] splitLine, Clipping clipping, int dateIndex, string[] dateFormats, CultureInfo culture) { //Formatting help: https://msdn.microsoft.com/en-us/library/8kb3ddd4.aspx string dateAdded = String.Join(" ", splitLine[dateIndex], splitLine[dateIndex + 1], splitLine[dateIndex + 3], splitLine[dateIndex + 5], splitLine[dateIndex + 6]); //Removing single quotes. There might be other "noise" characters, this one is especially important in typeRub. string input = dateAdded.Replace("'", string.Empty); try { /*Dates have to be parsed and converted to a dateTime format. TryParseExact should do the * trick as long as the proper format is added to the dateFormats array. */ DateTime dt; if (DateTime.TryParseExact(input, dateFormats, culture, DateTimeStyles.None, out dt)) { if (dt < DateTime.Now) { clipping.DateAdded = dt; } } } catch (Exception ex) { clipping.DateAdded = Defaults.DateAdded; new Exception("Error encountered adding date: " + ex.Message, ex); } }
/// <summary> /// Adds a clipping to the storage and returns a unique identifier for later retrieval. /// </summary> /// <param name="clipping">The clipping to add to the storage</param> /// <returns>The unique identifier corresponding to this clipping</returns> public int AddClipping(Clipping clipping) { int id = numberedClippings.Keys.Count; numberedClippings.Add(id, clipping); return(id); }
public void GenerateClippingList(IEnumerable <Clipping> clippings) { rawClippingCount = 0; foreach (var item in clippings) { //Adding clippings to the currently used, dictionary database. if (!Clipping.IsNullOrEmpty(item) || (Clipping.IsNullOrEmpty(item) && Clipping.IsBookMark(item))) { ClippingStorage.AddClipping(item); } ++rawClippingCount; } //Now adding clippings to the layout'ed, list database. int numberOfClippings = ClippingStorage.numberedClippings.Count; if (numberOfClippings > 0) { for (int i = 0; i < numberOfClippings; i++) { Clipping clippingToAdd = ClippingStorage.GetClipping(i); ClippingStorage.finalClippingsList.Add(clippingToAdd); } } else { //TODO What if there is no valid clippings at all? } }
protected virtual void ParseLine4(string line, Clipping clipping) { try { clipping.Text = line.Trim(); } catch (Exception) { clipping.Text = Defaults.Text; } }
protected override void InitDefaults() { Defaults = new Clipping(); Defaults.BookName = "TÃtulo desconocido"; Defaults.Author = "Autor desconocido"; Defaults.Text = ""; Defaults.Location = ""; Defaults.Page = ""; Defaults.DateAdded = new DateTime(); }
protected override void InitDefaults() { Defaults = new Clipping(); Defaults.BookName = "Unknown book"; Defaults.Author = "Unknown author"; Defaults.Location = ""; Defaults.Text = ""; Defaults.Page = ""; Defaults.DateAdded = new DateTime(); }
//TODO quick hardcoded solution in order to exclude bookmarks from empty clipping removal. //Quick and dirty, can be way better. public static bool IsBookMark(Clipping item) { if (item.ClippingType == ClippingTypeEnum.Bookmark || item.ClippingType == ClippingTypeEnum.Marcador) { return(true); } else { return(false); } }
protected virtual IEnumerable <Clipping> ParseFromString(string content, FormatType format) { Clipping clipping = new Clipping(); //TODO Hardcoded line break to solve? string[] result = content.Split(new string[] { "\n", "\r\n" }, StringSplitOptions.None); string lines = null; int lineNumber = 0; int clippingLineNumber = 0; while (lineNumber < result.Length) { lineNumber++; clippingLineNumber++; if (result[lineNumber - 1] == ClippingSeparator) { yield return(clipping); clippingLineNumber = 0; clipping = new Clipping(); } else { //TODO Simplify this foul smelling conditional logic if (!(clippingLineNumber == 1 || clippingLineNumber == 2 || clippingLineNumber == 3) && result[lineNumber] != ClippingSeparator) { lines += result[lineNumber - 1]; } else if (clippingLineNumber == 3) { lines = null; } else { if (lines == null) { lines = result[lineNumber - 1]; } ParseClipping(clippingLineNumber, lines, clipping, format); lines = null; } } } }
protected virtual void ParseLine1(string line, Clipping clipping) { try { Match match = Regex.Match(line, Line1RegexPattern); if (match.Success) { string bookName = match.Groups[1].Value.Trim(); string author = match.Groups[2].Value.Trim(); clipping.BookName = !string.IsNullOrEmpty(bookName) ? bookName : Defaults.BookName; clipping.Author = !string.IsNullOrEmpty(author) ? author : Defaults.Author; } } catch (Exception) { clipping.BookName = Defaults.BookName; clipping.Author = Defaults.Author; System.Diagnostics.Debug.WriteLine("Clipping Line 1 did not match regex pattern, using default values for Author and Bookname."); } }
private void SetClippingType(string clippingType, Clipping clipping) { switch (clippingType.ToLower()) { case "subrayado": clipping.ClippingType = ClippingTypeEnum.Subrayado; break; case "nota": clipping.ClippingType = ClippingTypeEnum.Notas; break; case "marcador": clipping.ClippingType = ClippingTypeEnum.Marcador; break; default: clipping.ClippingType = ClippingTypeEnum.NoReconocido; break; } }
/* Calling to the different methods parsing the different lines. Line 3 is irrelevant * (just white space acting as a separator) and thus is not included in the logic. */ public virtual void ParseClipping(int lineNumber, string line, Clipping clipping, FormatType format) { try { switch (lineNumber) { case 1: ParseLine1(line, clipping); break; case 2: ParseLine2(line, clipping, format); break; case 4: ParseLine4(line, clipping); break; } } catch (Exception ex) { new Exception("Error encountered parsing line " + lineNumber + ": " + ex.Message, ex); } }
protected virtual void ParseDate(string[] splitLine, Clipping clipping, int dateIndex) { try { string[] filteredLine = splitLine.Where(item => !item.Contains("GMT")).ToArray(); string dateAddedString; //Hackish, removing GMT to simplify parse. Indexes change and problems. Can be improved. if (splitLine.Length != filteredLine.Length) { dateAddedString = String.Join(" ", splitLine[dateIndex], splitLine[dateIndex + 1], splitLine[dateIndex + 2], splitLine[dateIndex + 3], splitLine[dateIndex + 4]); } else { dateAddedString = String.Join(" ", splitLine[dateIndex], splitLine[dateIndex + 1], splitLine[dateIndex + 2], splitLine[dateIndex + 3], splitLine[dateIndex + 4], splitLine[dateIndex + 5]); } DateTime dateAdded = DateTime.Parse(dateAddedString); clipping.DateAdded = dateAdded; } catch (Exception ex) { clipping.DateAdded = Defaults.DateAdded; new Exception("Error encountered adding date: " + ex.Message, ex); } }
protected override void ParseLine2(string line, Clipping clipping, FormatType format) { var split = line.Split(' '); string fileType = null; bool hasPageNumber = false; bool hasLocation = false; bool hasInstapaper = false; try { if (!String.IsNullOrEmpty(format.ID)) { fileType = format.ID; } } catch (Exception ex) { System.Diagnostics.Debug.WriteLine(ex.Message, "Can't identify TXT format."); } var clippingType = split[format.clippingTypePosition]; switch (clippingType.ToLower()) { case "highlight": clipping.ClippingType = ClippingTypeEnum.Highlight; break; case "note": clipping.ClippingType = ClippingTypeEnum.Note; break; case "bookmark": clipping.ClippingType = ClippingTypeEnum.Bookmark; break; default: clipping.ClippingType = ClippingTypeEnum.NotRecognized; break; } //Check if line contains any of the critical strings stored in keywords arrays. hasPageNumber = format.pageWording.Any(line.Contains); hasLocation = format.locationWording.Any(line.Contains); /* Indexes are different in Spanish and English version (answers "where to cut" for the different variables). * It also depends on particular formats for each language.*/ var dateIndex = format.dateIndex; var locationIndex = format.locationIndex; var pageIndex = format.pageIndex; try { if (hasPageNumber) { var pageNumber = split[pageIndex]; clipping.Page = pageNumber; locationIndex = format.hasPageLocationIndex; dateIndex = hasLocation ? format.hasPageHasLocationDateIndex : format.hasPageDateIndex; } } catch (Exception) { clipping.Page = Defaults.Page; } try { if (hasLocation) { var location = split[locationIndex]; clipping.Location = location; } } catch (Exception) { clipping.Location = Defaults.Location; } /*Indexes are different in Spanish and English version (answers "where to cut" for the different variables). * It also depends on particular formats for each language. If any format exceptions occur or it is better to * manually look for something for any reason, add logic below. */ if (fileType == "typeRick") { if (split[1] == "Clip") { hasInstapaper = true; } if (hasInstapaper) { locationIndex = 6; dateIndex = 10; if (hasLocation) { var location = split[locationIndex]; clipping.Location = location; } } } ParseDate(split, clipping, dateIndex); }
protected override void ParseLine2(string line, Clipping clipping, FormatType format) { var split = line.Split(' '); var fileType = ""; bool hasPageNumber = false; bool hasLocation = false; //Detect type of file. try { if (!String.IsNullOrEmpty(format.ID)) { fileType = format.ID; } } catch (Exception ex) { System.Diagnostics.Debug.WriteLine(ex.Message, "Can't identify TXT format."); } string clippingType = split[format.clippingTypePosition]; SetClippingType(clippingType, clipping); hasPageNumber = format.pageWording.Any(line.Contains); hasLocation = format.locationWording.Any(line.Contains); var dateIndex = format.dateIndex; var locationIndex = format.locationIndex; var pageIndex = format.pageIndex; var hasPageDateIndex = format.hasPageDateIndex; var hasPageHasLocationDateIndex = format.hasPageHasLocationDateIndex; var hasPageLocationIndex = format.hasPageLocationIndex; bool isSubtypeKyuni = false; if (split[1] == "Tu") { isSubtypeKyuni = true; } if (isSubtypeKyuni) { dateIndex = 10; locationIndex = 6; hasPageDateIndex = 10; } try { if (hasPageNumber) { var pageNumber = split[pageIndex]; clipping.Page = pageNumber; locationIndex = hasPageLocationIndex; dateIndex = hasLocation ? hasPageHasLocationDateIndex : hasPageDateIndex; } } catch (Exception) { clipping.Page = Defaults.Page; } try { if (hasLocation) { var location = split[locationIndex]; clipping.Location = location; } } catch (Exception) { clipping.Location = Defaults.Location; } ParseDateExact(split, clipping, dateIndex, dateFormats, spaCulture); }
public static bool IsNullOrEmpty(Clipping item) { return(item == null || string.IsNullOrEmpty(item.Text)); }
protected abstract void ParseLine2(string line, Clipping clipping, FormatType format);