コード例 #1
0
ファイル: EmailParser.cs プロジェクト: gregtatcam/email_proc
 public bool IsClose(String line, MessageReader reader)
 {
     line = line.TrimEnd(' ');
     if (line == closeBoundary)
     {
         RemoveBoundary(this);
         return true;
     }
     // any open or close boundary down in the stack closes all boundaries above in the stack
     Boundary b = boundaries.FindLast(by => (by.closeBoundary == line || by.openBoundary == line));
     if (b != null)
     {
         RemoveBoundary(b);
         reader.PushCacheLine(line);
         return true;
     }
     return false;
 }
コード例 #2
0
ファイル: EmailStats.cs プロジェクト: gregtatcam/email_proc
 public async Task Start(String dir, String file, StatusCb status, ProgressCb pcb)
 {
     StreamWriter filew = null;
     try {
         status(false, "", "Started statistics processing.");
         MessageReader reader = new MessageReader(file);
         long size = reader.BaseStream.Length;
         double progress = .0;
         StringBuilder sb = new StringBuilder();
         sb = new StringBuilder();
         sb.AppendFormat(@"{0}\stats{1}.out", dir, DateTime.Now.ToFileTime());
         file = sb.ToString();
         filew = new StreamWriter(file);
         DateTime start_time = DateTime.Now;
         await WriteStatsLine(filew, "archive size: {0}\n", size);
         int count = 1;
         await EmailParser.ParseMessages(token, reader, async delegate (Message message, Exception reason)
         {
             status(true, "^message:", "message: {0}", count++);
             try
             {
                 if (null == message)
                 {
                     status(false, "", "message parsing failed: " + (null != reason ? reason.Message : ""));
                     await WriteStatsLine(filew, "--> start");
                     await WriteStatsLine(filew, "<-- end failed to process: {0}", (null != reason) ? reason.Message : "");
                     return;
                 }
                 // display progress
                 progress += message.size;
                 double pct = (100.0 * progress / (double)size);
                 pcb(pct);
                 // get required headers
                 Dictionary<String, String> headers = message.email.headers.GetDictionary(new Dictionary<string, string>()
                 { {"from","" }, { "cc", "" }, {"subject","" }, {"date","" },
                 { "to",""}, {"bcc", "" }, { "in-reply-to","" }, {"reply-to","" }, {"content-type","" }, {"message-id","" }, { "x-gmail-labels",""}});
                 String msgid = headers["message-id"];
                 // get unique messages
                 if (msgid != null && msgid != "" && MessageidUnique(msgid) == false)
                 {
                     return;
                 }
                 await WriteStatsLine(filew, "--> start");
                 int csize = await CompressedSize(message.GetBytes());
                 await WriteStatsLine(filew, "Full Message: {0} {1}", message.size, csize);
                 await WriteStatsLine(filew, "Hdrs");
                 await WriteStatsLine(filew, "from: {0}", Sha1(EmailAddr(headers["from"])));
                 await WriteStatsLine(filew, "to: {0}", GetAddrList(headers["to"]));
                 await WriteStatsLine(filew, "cc: {0}", GetAddrList(headers["cc"]));
                 await WriteStatsLine(filew, "bcc: {0}", GetAddrList(headers["bcc"]));
                 await WriteStatsLine(filew, "date: {0}", headers["date"]);
                 await WriteStatsLine(filew, "subject: {0}", GetSubject(headers["subject"]));
                 await WriteStatsLine(filew, "mailbox: {0}", GetMailbox(headers["x-gmail-labels"]));
                 await WriteStatsLine(filew, "messageid: {0}", GetMessageId(headers["message-id"]));
                 await WriteStatsLine(filew, "inreplyto: {0}", GetInReplyTo(headers["in-reply-to"]));
                 await WriteStatsLine(filew, "replyto: {0}", GetAddrList(headers["reply-to"]));
                 await WriteStatsLine(filew, "Parts:");
                 await TraverseEmail(filew, 0, 0, message.email);
                 await WriteStatsLine(filew, "<-- end");
             }
             catch (Exception ex)
             {
                 await WriteStatsLine(filew, "<-- end failed to process: {0}, {1}", ex.Message, ex.StackTrace);
             }
         });
         status(false, "", "Statistics is generated in file {0}", file);
         TimeSpan span = DateTime.Now - start_time;
         status(false, "", "Processing time: {0} seconds", span.TotalSeconds);
     } 
     catch (Exception ex)
     {
         status(false, "", "Statistics failed: {0}", ex.Message);
     }
     finally
     {
         if (filew != null)
             filew.Close();
     }
 }
コード例 #3
0
ファイル: EmailParser.cs プロジェクト: gregtatcam/email_proc
        /* Assume (for now) the message starts with the postmark,
           Further assume the message structure
           postmark\r\n
           headers\r\n
           \r\n
           body
        */
        public async Task<ParseResult> Parse(MessageReader reader)
        {
            postmark = new Postmark(entity);
            if ((await postmark.Parse(reader)) == ParseResult.Failed)
                throw new ParsingFailedException("postmark is not found");
            email = new Email(entity);
            ParseResult res = await email.Parse(reader);
            if (res != ParseResult.Eof && res != ParseResult.Postmark)
                await ConsumeToEnd(reader);
            SetSize();

            return res;
        }
コード例 #4
0
ファイル: EmailParser.cs プロジェクト: gregtatcam/email_proc
 public async static Task ParseMessages(CancellationToken token, MessageReader reader, MessageCb cb)
 {
     while (reader.EndOfStream == false)
     {
         if (token.IsCancellationRequested)
             break;
         try
         {
             Message message = new Message();
             await message.Parse(reader);
             await cb(message);
         }
         catch (Exception ex)
         {
             await cb(null, ex);
         }
     }
     return;
 }
コード例 #5
0
ファイル: EmailParser.cs プロジェクト: gregtatcam/email_proc
 public async Task<ParseResult> Parse(MessageReader reader)
 {
     String line = null;
     while ((line = await reader.ReadLineAsync()) != null)
     {
         if (EmailParser.IsPostmark(line))
         {
             WriteWithCrlf(line);
             // assume the Postmark always starts with 0 position
             size = (int)entity.Position;
             return ParseResult.Ok;
         }
     }
     return ParseResult.Failed;
 }
コード例 #6
0
ファイル: EmailParser.cs プロジェクト: gregtatcam/email_proc
 public async Task<ParseResult> Parse(MessageReader reader, ContentType type = ContentType.Text,
     ContentSubtype subtype = ContentSubtype.Plain, Boundary boundary = null)
 {
     headers = new Headers(entity, type, subtype);
     if ((await headers.Parse(reader)) == ParseResult.Failed)
         return ParseResult.Failed;
     content = new Content(entity);
     ParseResult result = await content.Parse(reader, headers.contentType, headers.contentSubtype,
         (headers.boundary != null) ? headers.boundary : boundary);
     return result;
 }
コード例 #7
0
ファイル: EmailParser.cs プロジェクト: gregtatcam/email_proc
 public async Task<ParseResult> Parse(MessageReader reader, ContentType type = ContentType.Text,
     ContentSubtype subtype = ContentSubtype.Plain, Boundary boundary = null)
 {
     if (type == ContentType.Multipart)
     {
         dataType = DataType.Multipart;
         while (true)
         {
             String line = await reader.ReadLineAsync();
             if (line == null)
             {
                 SetSize();
                 return ParseResult.Eof;
             }
             else if (EmailParser.IsPostmark(line))
             {
                 // consumed too much, probably missing boundary?
                 reader.PushCacheLine(line);
                 SetSize();
                 return ParseResult.Postmark;
             }
             WriteWithCrlf(line);
             // find open boundary
             if (boundary.IsOpen(line))
             {
                 Email email = null;
                 ParseResult res;
                 do
                 {
                     // consume all parts, consisting of header (optional) and content
                     // the boundary token delimets the part
                     // the close boundary completes multipart parsing
                     // content in the multipart is responsible for consuming it's delimeter (end)
                     // exception is the last part which is also multipart
                     email = new Email(entity);
                     Add(email);
                 } while ((res = await email.Parse(reader, type, subtype, boundary)) == ParseResult.OkMultipart); // Ok
                 // if the last part is a multipart or message? itself then it doesn't consume the close boundary
                 // or more parts, continue parsing until all parts and close boundary are consumed
                 /*if (Ok(res) && (data.Last<Email>().content.dataType == DataType.Multipart ||
                         data.Last<Email>().content.dataType == DataType.Message))*/
                 if (res == ParseResult.Ok && boundary.NotClosed())
                     continue;
                 if (res != ParseResult.Failed)
                     SetSize();
                 return res;
             }
             else if (boundary.IsClose(line, reader))
             {
                 SetSize();
                 return ParseResult.Ok; // OkMultipart
             }
         }
     }
     else if (type == ContentType.Message)
     {
         dataType = DataType.Message;
         Email email = new Email(entity);
         Add(email);
         ParseResult res = await email.Parse(reader, type, subtype, boundary);
         if (res != ParseResult.Failed)
             SetSize();
         return res;
     }
     else
     {
         dataType = DataType.Data;
         while (true)
         {
             String line = await reader.ReadLineAsync();
             if (line == null)
             {
                 SetSize();
                 return ParseResult.Eof;
             }
             else if (EmailParser.IsPostmark(line))
             {
                 // consumed too much, probably closing boundary is missing ?
                 reader.PushCacheLine(line);
                 SetSize();
                 return ParseResult.Postmark;
             }
             else if (boundary != null && boundary.IsOpen(line))
             {
                 SetSize();
                 RewindLastCrlfSize();
                 WriteWithCrlf(line);
                 return ParseResult.OkMultipart; //Ok
             }
             else if (boundary != null && boundary.IsClose(line, reader))
             {
                 SetSize();
                 RewindLastCrlfSize();
                 WriteWithCrlf(line);
                 return ParseResult.Ok; //OkMultipart
             }
             else
                 WriteWithCrlf(line);
         }
     }
 }
コード例 #8
0
ファイル: EmailParser.cs プロジェクト: gregtatcam/email_proc
 public async Task<ParseResult> Parse(MessageReader reader)
 {
     // parse until empty line, which start the body
     String line = "";
     bool foundContentType = false;
     bool boundaryRequired = false;
     // could there be an empty line in FWS? I think just crlf is not allowed in FWS
     // if starts with the blank line then there is no header
     // ends with blank line
     // there are empty lines before the headers start. how many?
     bool first = true;
     while ((line = await reader.ReadLineAsync()) != null && (first || !first && line != ""))
     {
         first = false;
         if (line != "")
             WriteWithCrlf(line);
         else
             continue;
         // hack, could there be a header line not matching 
         if (!Regex.IsMatch(line, "^([^ :]+[ ]*:)|([ \t]+)"))
         {
             return ParseStatus(ParseResult.Failed, "invalid headers");
         }
         else if (foundContentType && boundaryRequired && boundary == null)
         {
             boundary = Boundary.Parse(line);
         }
         else if (foundContentType == false)
         {
             Match m = re_content.Match(line);
             if (m.Success)
             {
                 ContentType type = ContentType.Text;
                 ContentSubtype subtype = ContentSubtype.Plain;
                 String tp = m.Groups[1].Value.ToLower();
                 String sbtp = m.Groups[2].Value.ToLower();
                 if (types.TryGetValue(tp, out type) == true)
                     contentType = type;
                 else
                     contentType = ContentType.Other;
                 if (subtypes.TryGetValue(sbtp, out subtype) == true)
                     contentSubtype = subtype;
                 else
                     contentSubtype = ContentSubtype.Other;
                 contentTypeFullStr = (tp==""?"text":tp) + "/" + (sbtp==""?"plain":sbtp);
                 foundContentType = true;
                 if (contentType == ContentType.Multipart)
                 {
                     boundaryRequired = true;
                     boundary = Boundary.Parse(m.Groups[3].Value);
                 }
             }
         }
     }
     if (boundaryRequired)
     {
         if (boundary == null)
             return ParseStatus(ParseResult.Failed, "multipart media part with no boundary");
         else
             Boundary.Add(boundary);
     }
     SetSize();
     WriteCrlf(); // delimeter between headers and body, not part of the headers, so not included in size
     if (line == null)
         return ParseResult.Eof;
     else
         return ParseResult.Ok;
 }
コード例 #9
0
ファイル: EmailParser.cs プロジェクト: gregtatcam/email_proc
 protected async Task ConsumeToEnd(MessageReader reader)
 {
     ParseResult res = ParseResult.Ok;
     while (res != ParseResult.Eof)
     {
         String line = await reader.ReadLineAsync();
         if (line == null)
             break;
         else if (EmailParser.IsPostmark(line))
         {
             reader.PushCacheLine(line);
             break;
         }
         else
             WriteWithCrlf(line);
     }
 }