public bool IsClose(String line, MessageReader reader) { line = line.TrimEnd(' '); if (line == closeBoundary) { RemoveBoundary(this); return true; } // any open or close boundary down in the stack closes all boundaries above in the stack Boundary b = boundaries.FindLast(by => (by.closeBoundary == line || by.openBoundary == line)); if (b != null) { RemoveBoundary(b); reader.PushCacheLine(line); return true; } return false; }
public async Task Start(String dir, String file, StatusCb status, ProgressCb pcb) { StreamWriter filew = null; try { status(false, "", "Started statistics processing."); MessageReader reader = new MessageReader(file); long size = reader.BaseStream.Length; double progress = .0; StringBuilder sb = new StringBuilder(); sb = new StringBuilder(); sb.AppendFormat(@"{0}\stats{1}.out", dir, DateTime.Now.ToFileTime()); file = sb.ToString(); filew = new StreamWriter(file); DateTime start_time = DateTime.Now; await WriteStatsLine(filew, "archive size: {0}\n", size); int count = 1; await EmailParser.ParseMessages(token, reader, async delegate (Message message, Exception reason) { status(true, "^message:", "message: {0}", count++); try { if (null == message) { status(false, "", "message parsing failed: " + (null != reason ? reason.Message : "")); await WriteStatsLine(filew, "--> start"); await WriteStatsLine(filew, "<-- end failed to process: {0}", (null != reason) ? reason.Message : ""); return; } // display progress progress += message.size; double pct = (100.0 * progress / (double)size); pcb(pct); // get required headers Dictionary<String, String> headers = message.email.headers.GetDictionary(new Dictionary<string, string>() { {"from","" }, { "cc", "" }, {"subject","" }, {"date","" }, { "to",""}, {"bcc", "" }, { "in-reply-to","" }, {"reply-to","" }, {"content-type","" }, {"message-id","" }, { "x-gmail-labels",""}}); String msgid = headers["message-id"]; // get unique messages if (msgid != null && msgid != "" && MessageidUnique(msgid) == false) { return; } await WriteStatsLine(filew, "--> start"); int csize = await CompressedSize(message.GetBytes()); await WriteStatsLine(filew, "Full Message: {0} {1}", message.size, csize); await WriteStatsLine(filew, "Hdrs"); await WriteStatsLine(filew, "from: {0}", Sha1(EmailAddr(headers["from"]))); await WriteStatsLine(filew, "to: {0}", GetAddrList(headers["to"])); await WriteStatsLine(filew, "cc: {0}", GetAddrList(headers["cc"])); await WriteStatsLine(filew, "bcc: {0}", GetAddrList(headers["bcc"])); await WriteStatsLine(filew, "date: {0}", headers["date"]); await WriteStatsLine(filew, "subject: {0}", GetSubject(headers["subject"])); await WriteStatsLine(filew, "mailbox: {0}", GetMailbox(headers["x-gmail-labels"])); await WriteStatsLine(filew, "messageid: {0}", GetMessageId(headers["message-id"])); await WriteStatsLine(filew, "inreplyto: {0}", GetInReplyTo(headers["in-reply-to"])); await WriteStatsLine(filew, "replyto: {0}", GetAddrList(headers["reply-to"])); await WriteStatsLine(filew, "Parts:"); await TraverseEmail(filew, 0, 0, message.email); await WriteStatsLine(filew, "<-- end"); } catch (Exception ex) { await WriteStatsLine(filew, "<-- end failed to process: {0}, {1}", ex.Message, ex.StackTrace); } }); status(false, "", "Statistics is generated in file {0}", file); TimeSpan span = DateTime.Now - start_time; status(false, "", "Processing time: {0} seconds", span.TotalSeconds); } catch (Exception ex) { status(false, "", "Statistics failed: {0}", ex.Message); } finally { if (filew != null) filew.Close(); } }
/* Assume (for now) the message starts with the postmark, Further assume the message structure postmark\r\n headers\r\n \r\n body */ public async Task<ParseResult> Parse(MessageReader reader) { postmark = new Postmark(entity); if ((await postmark.Parse(reader)) == ParseResult.Failed) throw new ParsingFailedException("postmark is not found"); email = new Email(entity); ParseResult res = await email.Parse(reader); if (res != ParseResult.Eof && res != ParseResult.Postmark) await ConsumeToEnd(reader); SetSize(); return res; }
public async static Task ParseMessages(CancellationToken token, MessageReader reader, MessageCb cb) { while (reader.EndOfStream == false) { if (token.IsCancellationRequested) break; try { Message message = new Message(); await message.Parse(reader); await cb(message); } catch (Exception ex) { await cb(null, ex); } } return; }
public async Task<ParseResult> Parse(MessageReader reader) { String line = null; while ((line = await reader.ReadLineAsync()) != null) { if (EmailParser.IsPostmark(line)) { WriteWithCrlf(line); // assume the Postmark always starts with 0 position size = (int)entity.Position; return ParseResult.Ok; } } return ParseResult.Failed; }
public async Task<ParseResult> Parse(MessageReader reader, ContentType type = ContentType.Text, ContentSubtype subtype = ContentSubtype.Plain, Boundary boundary = null) { headers = new Headers(entity, type, subtype); if ((await headers.Parse(reader)) == ParseResult.Failed) return ParseResult.Failed; content = new Content(entity); ParseResult result = await content.Parse(reader, headers.contentType, headers.contentSubtype, (headers.boundary != null) ? headers.boundary : boundary); return result; }
public async Task<ParseResult> Parse(MessageReader reader, ContentType type = ContentType.Text, ContentSubtype subtype = ContentSubtype.Plain, Boundary boundary = null) { if (type == ContentType.Multipart) { dataType = DataType.Multipart; while (true) { String line = await reader.ReadLineAsync(); if (line == null) { SetSize(); return ParseResult.Eof; } else if (EmailParser.IsPostmark(line)) { // consumed too much, probably missing boundary? reader.PushCacheLine(line); SetSize(); return ParseResult.Postmark; } WriteWithCrlf(line); // find open boundary if (boundary.IsOpen(line)) { Email email = null; ParseResult res; do { // consume all parts, consisting of header (optional) and content // the boundary token delimets the part // the close boundary completes multipart parsing // content in the multipart is responsible for consuming it's delimeter (end) // exception is the last part which is also multipart email = new Email(entity); Add(email); } while ((res = await email.Parse(reader, type, subtype, boundary)) == ParseResult.OkMultipart); // Ok // if the last part is a multipart or message? itself then it doesn't consume the close boundary // or more parts, continue parsing until all parts and close boundary are consumed /*if (Ok(res) && (data.Last<Email>().content.dataType == DataType.Multipart || data.Last<Email>().content.dataType == DataType.Message))*/ if (res == ParseResult.Ok && boundary.NotClosed()) continue; if (res != ParseResult.Failed) SetSize(); return res; } else if (boundary.IsClose(line, reader)) { SetSize(); return ParseResult.Ok; // OkMultipart } } } else if (type == ContentType.Message) { dataType = DataType.Message; Email email = new Email(entity); Add(email); ParseResult res = await email.Parse(reader, type, subtype, boundary); if (res != ParseResult.Failed) SetSize(); return res; } else { dataType = DataType.Data; while (true) { String line = await reader.ReadLineAsync(); if (line == null) { SetSize(); return ParseResult.Eof; } else if (EmailParser.IsPostmark(line)) { // consumed too much, probably closing boundary is missing ? reader.PushCacheLine(line); SetSize(); return ParseResult.Postmark; } else if (boundary != null && boundary.IsOpen(line)) { SetSize(); RewindLastCrlfSize(); WriteWithCrlf(line); return ParseResult.OkMultipart; //Ok } else if (boundary != null && boundary.IsClose(line, reader)) { SetSize(); RewindLastCrlfSize(); WriteWithCrlf(line); return ParseResult.Ok; //OkMultipart } else WriteWithCrlf(line); } } }
public async Task<ParseResult> Parse(MessageReader reader) { // parse until empty line, which start the body String line = ""; bool foundContentType = false; bool boundaryRequired = false; // could there be an empty line in FWS? I think just crlf is not allowed in FWS // if starts with the blank line then there is no header // ends with blank line // there are empty lines before the headers start. how many? bool first = true; while ((line = await reader.ReadLineAsync()) != null && (first || !first && line != "")) { first = false; if (line != "") WriteWithCrlf(line); else continue; // hack, could there be a header line not matching if (!Regex.IsMatch(line, "^([^ :]+[ ]*:)|([ \t]+)")) { return ParseStatus(ParseResult.Failed, "invalid headers"); } else if (foundContentType && boundaryRequired && boundary == null) { boundary = Boundary.Parse(line); } else if (foundContentType == false) { Match m = re_content.Match(line); if (m.Success) { ContentType type = ContentType.Text; ContentSubtype subtype = ContentSubtype.Plain; String tp = m.Groups[1].Value.ToLower(); String sbtp = m.Groups[2].Value.ToLower(); if (types.TryGetValue(tp, out type) == true) contentType = type; else contentType = ContentType.Other; if (subtypes.TryGetValue(sbtp, out subtype) == true) contentSubtype = subtype; else contentSubtype = ContentSubtype.Other; contentTypeFullStr = (tp==""?"text":tp) + "/" + (sbtp==""?"plain":sbtp); foundContentType = true; if (contentType == ContentType.Multipart) { boundaryRequired = true; boundary = Boundary.Parse(m.Groups[3].Value); } } } } if (boundaryRequired) { if (boundary == null) return ParseStatus(ParseResult.Failed, "multipart media part with no boundary"); else Boundary.Add(boundary); } SetSize(); WriteCrlf(); // delimeter between headers and body, not part of the headers, so not included in size if (line == null) return ParseResult.Eof; else return ParseResult.Ok; }
protected async Task ConsumeToEnd(MessageReader reader) { ParseResult res = ParseResult.Ok; while (res != ParseResult.Eof) { String line = await reader.ReadLineAsync(); if (line == null) break; else if (EmailParser.IsPostmark(line)) { reader.PushCacheLine(line); break; } else WriteWithCrlf(line); } }