public void NormalGroupUpdateTest()
        {
            XmlDocumentGroup xmlDocumentGroup = XmlHelper.GetXmlDocumentGroup(@".\DataContent\NormalContent\cc54a49bf188f994899cb1c954bb795f.xml");
            xmlDocumentGroup.Operation = Operation.Upd;
            UploadDocumentGroup uploadDocumentGroup = XmlHelper.GetUploadDocumentGroup(xmlDocumentGroup, @".\DataContent\NormalContent\cc54a49bf188f994899cb1c954bb795f.zip");

            using (ShimsContext.Create())
            {
                ShimDateTime.NowGet = () => new DateTime(2000, 1, 1);

                string ident = string.Empty;
                CrawlerLog log = new CrawlerLog();
                DocumentGroup documentGroup = new DocumentGroup();
                IRepository fakeRepository = new StubIRepository
                {
                    GetIdentifierString = (identifier) => { return xmlDocumentGroup.Identifier; },
                    GetOrCreateCrawlerIdString = (crawlerName) => { return 1; },
                    AddDocumentGroupDocumentGroup = (newDocumentGroup) => { documentGroup = newDocumentGroup; },
                    DeleteDocumentGroupString = (identifier) => { ident = identifier; return 1; },
                    AddNewLogCrawlerLog = (crawlerLog) => { log = crawlerLog; }
                };

                WcfHost eu = new WcfHost(fakeRepository);
                var msgActual = eu.UploadFile(uploadDocumentGroup);

                // ReturnMessage
                Assert.AreEqual("Ok", msgActual, "Service return message.");

                // DeleteDocumentGroup
                Assert.AreEqual(xmlDocumentGroup.Identifier, ident, "Identifier");

                // AddDocumentGroup
                Assert.AreEqual(xmlDocumentGroup.Identifier, documentGroup.Identifier, "Identifier");
                Assert.AreEqual(1, documentGroup.CrawlerId, "CrawlerId");
                Assert.AreEqual(1, documentGroup.Operation, "Operation");
                Assert.AreEqual(xmlDocumentGroup.Document.Count, documentGroup.Documents.Count, "DocumentsCount");

                // LogAsserts
                Assert.AreEqual(xmlDocumentGroup.Identifier, log.Identifier, "IdentifierLog");
                Assert.AreEqual(true, log.IsSuccess, "IsSuccess");
                Assert.AreEqual(new DateTime(2000, 1, 1), log.LogDate, "LogDate");
                Assert.AreEqual(xmlDocumentGroup.Crawler, log.CrawlerName, "CrawlerNameLog");
                Assert.AreEqual(1, log.Operation, "OperationLog");
            }
        }
 public static void WriteToFileLog(CrawlerLog crawlerLog)
 {
     lock (locker)
     {
         checkForLog = true;
         StreamWriter sw = new StreamWriter(logFile, true);
         using (sw)
         {
             string logLine = (crawlerLog.IpAddress + " _^sep^_ " +
                              crawlerLog.LogDate + " _^sep^_ " +
                              crawlerLog.MetaXml + " _^sep^_ " +
                              crawlerLog.ZipLength + " _^sep^_ " +
                              crawlerLog.Identifier + " _^sep^_ " +
                              crawlerLog.CrawlerName + " _^sep^_ " +
                              crawlerLog.IsSuccess + " _^sep^_ " +
                              crawlerLog.Operation + " _^sep^_ " +
                              crawlerLog.Error).Replace("\r", " _^r^_ ").Replace("\n", " _^n^_ ");
             sw.WriteLine(logLine);
         }
     }
 }
        /// <summary>
        /// Method that is used to upload XmlDocumentGroup to web service
        /// </summary>
        /// <param name="uploadDocumentGroup">Complex type XmlDocumentGroup</param>
        /// <returns>- If there is no errors the return string is "Ok" 
        /// - If error the return string will be in following format: 
        /// Error: <error description>
        /// </returns>
        public string UploadFile(UploadDocumentGroup uploadDocumentGroup)
        {
            CrawlerLog crawlerLog = new CrawlerLog();
            try
            {
                RemoteEndpointMessageProperty msg = null;
                if (OperationContext.Current != null)
                {
                    msg = OperationContext.Current.IncomingMessageProperties[RemoteEndpointMessageProperty.Name] as RemoteEndpointMessageProperty;
                    crawlerLog.IpAddress = msg.Address;
                }
                else
                {
                    crawlerLog.IpAddress = "localhost";
                }

                crawlerLog.IsSuccess = false;
                crawlerLog.LogDate = DateTime.Now;
                crawlerLog.MetaXml = uploadDocumentGroup.MetaInfo;

                // ValidateDeserializeXml
                this.isValidXml = true;
                this.validationErrorLog.Clear();
                this.XmlValidate(uploadDocumentGroup.MetaInfo);
                if (!this.isValidXml)
                {
                    return this.validationErrorLog.ToString();
                }

                XmlDocumentGroup xmlDocumentGroup = null;
                XmlSerializer deserializer = new XmlSerializer(typeof(XmlDocumentGroup));
                using (TextReader textReader = new StringReader(uploadDocumentGroup.MetaInfo))
                {
                    xmlDocumentGroup = (XmlDocumentGroup)deserializer.Deserialize(textReader);
                }

                crawlerLog.Identifier = xmlDocumentGroup.Identifier;
                crawlerLog.Operation = (int)xmlDocumentGroup.Operation;
                crawlerLog.CrawlerName = xmlDocumentGroup.Crawler;

                // ValidateReadZip
                Dictionary<string, MemoryStream> documentsData = new Dictionary<string, MemoryStream>();
                if (xmlDocumentGroup.Operation != Operation.Del)
                {
                    if (uploadDocumentGroup.Data == null || uploadDocumentGroup.Data.Length == 0)
                    {
                        this.validationErrorLog.Append("Error: Validation Exception - ZipData length is zero or ZipData is NULL, document idenfier: " + xmlDocumentGroup.Identifier + " and Operation: " + xmlDocumentGroup.Operation);
                        return this.validationErrorLog.ToString();
                    }
                    else
                    {
                        crawlerLog.ZipLength = uploadDocumentGroup.Data.Length;
                        documentsData = this.UnZipToMemory(uploadDocumentGroup.Data);
                    }
                }

                // AddUpdDel
                var documentGroupIdentifier = this.db.GetIdentifier(xmlDocumentGroup.Identifier);

                if (xmlDocumentGroup.Operation == Operation.Add)
                {
                    if (string.IsNullOrEmpty(documentGroupIdentifier))
                    {
                        // Add new XmlDocumentGroup
                        this.AddNewDocumentGroup(xmlDocumentGroup, uploadDocumentGroup, documentsData);
                    }
                    else
                    {
                        this.validationErrorLog.Append("Error: Validation Exception - There is a document identifier: " + xmlDocumentGroup.Identifier + " You can't use Оperation: " + xmlDocumentGroup.Operation);
                        return this.validationErrorLog.ToString();
                    }
                }
                else if (xmlDocumentGroup.Operation == Operation.Upd || xmlDocumentGroup.Operation == Operation.Del)
                {
                    if (!string.IsNullOrEmpty(documentGroupIdentifier))
                    {
                        // Update/Delete
                        this.UpdateDeleteDocumentGroup(xmlDocumentGroup, uploadDocumentGroup, documentsData, documentGroupIdentifier);
                    }
                    else
                    {
                        this.validationErrorLog.Append("Error: Validation Exception - No document idenfier: " + xmlDocumentGroup.Identifier + " .You can't use Operation: " + xmlDocumentGroup.Operation);
                        return this.validationErrorLog.ToString();
                    }
                }
                else
                {
                    this.validationErrorLog.Append("Error: Validation Exception - Document idenfier: " + xmlDocumentGroup.Identifier + " .Invalid Operation: " + xmlDocumentGroup.Operation);
                    return this.validationErrorLog.ToString();
                }

                crawlerLog.IsSuccess = true;
            }
            catch (Exception ex)
            {
                crawlerLog.IsSuccess = false;
                crawlerLog.Error += ex.ToString();
                return "Error: Service Exception - " + ex.ToString();
            }
            finally
            {
                crawlerLog.Error += this.validationErrorLog.ToString();

                try
                {
                    this.db.AddNewLog(crawlerLog);
                    LockHelper.PushLocalLogToDb();
                }
                catch (Exception ex)
                {
                    // There is a problem with the connection to the database, so we recorded a log in a text file
                    crawlerLog.Error += ex.ToString();
                    LockHelper.WriteToFileLog(crawlerLog);
                }
            }

            return "Ok";
        }
        private static CrawlerLog GetLog(string log)
        {
            log = log.Replace(" _^r^_ ", "\r").Replace(" _^n^_ ", "\n");
            var logAtribs = log.Split(new string[] { " _^sep^_ " }, StringSplitOptions.None);
            CrawlerLog crawlerLog = new CrawlerLog();
            crawlerLog.IpAddress = logAtribs[0].Trim() == string.Empty ? null : logAtribs[0].Trim();
            DateTime logDate = new DateTime();
            if (DateTime.TryParse(logAtribs[1].Trim(), out logDate))
            {
                crawlerLog.LogDate = logDate;
            }

            crawlerLog.MetaXml = logAtribs[2].Trim() == string.Empty ? null : logAtribs[2].Trim();
            int zipLength = 0;
            if (int.TryParse(logAtribs[3].Trim(), out zipLength))
            {
                crawlerLog.ZipLength = zipLength;
            }

            crawlerLog.Identifier = logAtribs[4].Trim() == string.Empty ? null : logAtribs[4].Trim();
            crawlerLog.CrawlerName = logAtribs[5].Trim() == string.Empty ? null : logAtribs[5].Trim();
            bool isSuccess = false;
            if (bool.TryParse(logAtribs[6].Trim(), out isSuccess))
            {
                crawlerLog.IsSuccess = isSuccess;
            }

            int operation = 0;
            if (int.TryParse(logAtribs[7].Trim(), out operation))
            {
                crawlerLog.Operation = operation;
            }

            crawlerLog.Error = logAtribs[8].Trim();

            return crawlerLog;
        }
Beispiel #5
0
 public void AddLog(CrawlerLog log)
 {
     this.context.CrawlerLogs.Add(log);
     this.SaveChanges();
     this.context.Entry(log).State = EntityState.Detached;
 }
        public void IdentifierGroupDeleteTestNull()
        {
            XmlDocumentGroup xmlDocumentGroup = XmlHelper.GetXmlDocumentGroup(@".\DataContent\NormalContent\cc54a49bf188f994899cb1c954bb795f.xml");
            xmlDocumentGroup.Operation = Operation.Del;
            UploadDocumentGroup uploadDocumentGroup = XmlHelper.GetUploadDocumentGroup(xmlDocumentGroup, @".\DataContent\NormalContent\cc54a49bf188f994899cb1c954bb795f.zip");
            uploadDocumentGroup.Data = null;

            using (ShimsContext.Create())
            {
                ShimDateTime.NowGet = () => new DateTime(2000, 1, 1);

                string ident = string.Empty;
                CrawlerLog log = new CrawlerLog();
                DocumentGroup documentGroup = new DocumentGroup();
                IRepository fakeRepository = new StubIRepository
                {
                    GetIdentifierString = (identifier) => { return string.Empty; },
                    GetOrCreateCrawlerIdString = (crawlerName) => { return 1; },
                    AddDocumentGroupDocumentGroup = (newDocumentGroup) => { documentGroup = newDocumentGroup; },
                    DeleteDocumentGroupString = (identifier) => { ident = identifier; return 1; },
                    AddNewLogCrawlerLog = (crawlerLog) => { log = crawlerLog; }
                };

                WcfHost eu = new WcfHost(fakeRepository);
                var msgActual = eu.UploadFile(uploadDocumentGroup);
                string msgExpected = "Error: Validation Exception - No document idenfier: " + xmlDocumentGroup.Identifier + " .You can't use Operation: Del";

                // ReturnMessage
                Assert.AreEqual(msgExpected, msgActual, "Service return message.");

                // LogAsserts
                Assert.AreEqual(xmlDocumentGroup.Identifier, log.Identifier, "IdentifierLog");
                Assert.AreEqual(false, log.IsSuccess, "IsSuccess");
                Assert.AreEqual(new DateTime(2000, 1, 1), log.LogDate, "LogDate");
                Assert.AreEqual(xmlDocumentGroup.Crawler, log.CrawlerName, "CrawlerNameLog");
                Assert.AreEqual(2, log.Operation, "OperationLog");
            }
        }
        public void ZipXmlValidationTestNull()
        {
            XmlDocumentGroup xmlDocumentGroup = XmlHelper.GetXmlDocumentGroup(@".\DataContent\NormalContent\cc54a49bf188f994899cb1c954bb795f.xml");
            UploadDocumentGroup uploadDocumentGroup = XmlHelper.GetUploadDocumentGroup(xmlDocumentGroup, @".\DataContent\NormalContent\cc54a49bf188f994899cb1c954bb795f.zip");
            uploadDocumentGroup.Data = null;

            using (ShimsContext.Create())
            {
                ShimDateTime.NowGet = () => new DateTime(2000, 1, 1);

                CrawlerLog log = new CrawlerLog();
                IRepository fakeRepository = new StubIRepository
                {
                    GetIdentifierString = (identifier) => { return string.Empty; },
                    AddNewLogCrawlerLog = (crawlerLog) => { log = crawlerLog; }
                };

                WcfHost eu = new WcfHost(fakeRepository);
                var msgActual = eu.UploadFile(uploadDocumentGroup);
                string msgExpected = "Error: Validation Exception - ZipData length is zero or ZipData is NULL, document idenfier: " + xmlDocumentGroup.Identifier + " and Operation: Add";

                Assert.AreEqual(msgExpected, msgActual, "Service return message.");
                Assert.AreEqual(false, log.IsSuccess, "IsSuccess");
                Assert.AreEqual(new DateTime(2000, 1, 1), log.LogDate, "LogDate");
            }
        }
        public void FormatXmlValidationTestInvalid()
        {
            XmlDocumentGroup xmlDocumentGroup = XmlHelper.GetXmlDocumentGroup(@".\DataContent\NormalContent\cc54a49bf188f994899cb1c954bb795f.xml");
            xmlDocumentGroup.Format = "application/badapplication";
            UploadDocumentGroup uploadDocumentGroup = XmlHelper.GetUploadDocumentGroup(xmlDocumentGroup, @".\DataContent\NormalContent\cc54a49bf188f994899cb1c954bb795f.zip");

            using (ShimsContext.Create())
            {
                ShimDateTime.NowGet = () => new DateTime(2000, 1, 1);

                CrawlerLog log = new CrawlerLog();
                IRepository fakeRepository = new StubIRepository
                {
                    GetIdentifierString = (identifier) => { return string.Empty; },
                    AddNewLogCrawlerLog = (crawlerLog) => { log = crawlerLog; }
                };

                WcfHost eu = new WcfHost(fakeRepository);
                var msgActual = eu.UploadFile(uploadDocumentGroup);
                string msgExpected = "Error: Validation Exception - The 'format' attribute has an invalid value according to its data type.\r\n";

                Assert.AreEqual(msgExpected, msgActual, "Service return message.");
                Assert.AreEqual(false, log.IsSuccess, "IsSuccess");
                Assert.AreEqual(new DateTime(2000, 1, 1), log.LogDate, "LogDate");
            }
        }
Beispiel #9
0
        //one call is fuctionality for update duration that has many feeds
        private Feed UpdatingFeed(long feedId)
        {
            int  NumberOfNewItem = 0;
            var  entiti          = new TazehaContext();
            Feed dbfeed          = entiti.Feeds.Single <Feed>(x => x.Id == feedId);

            dbfeed.UpdatingCount    = dbfeed.UpdatingCount == null ? 1 : dbfeed.UpdatingCount + 1;
            dbfeed.LastUpdaterVisit = DateTime.Now;
            try
            {
                if (dbfeed.FeedType == 0 || !dbfeed.FeedType.HasValue)
                {
                    #region Feed
                    RssFeed feed = RssFeed.Read(dbfeed.Link);
                    //-----------shart check kardane inke feed aslan update shode dar site marja ya kheir------------
                    if (feed == null)
                    {
                        dbfeed.Deleted = Common.Share.DeleteStatus.NotWork;
                    }
                    else if (feed.Channels.Count > 0)
                    {
                        bool Exist = false;

                        if (Exist)
                        {
                            GeneralLogs.WriteLog("NoUpdated(last item exist) feed: " + feedId, TypeOfLog.Info);
                        }
                        else
                        {
                            //--------Feed has new items-----------
                            if (feed.Channels.Count > 0)
                            {
                                RssChannel        channel = (RssChannel)feed.Channels[0];
                                List <FeedItemSP> listReturnBack;
                                if (channel.Items.LatestPubDate() != channel.Items[0].PubDate)
                                {
                                    listReturnBack = FeedItemsOperation.InsertFeedItems(channel.ItemsSorted, dbfeed);
                                }
                                else
                                {
                                    listReturnBack = FeedItemsOperation.InsertFeedItems(channel.Items, dbfeed);
                                }

                                GeneralLogs.WriteLog("Updating feed " + dbfeed.Id + " Num:" + listReturnBack.Count + " " + dbfeed.Link, TypeOfLog.OK);
                            }
                        }
                    }
                    #endregion
                }
                else if (dbfeed.FeedType.HasValue && dbfeed.FeedType.Value == Common.Share.FeedType.Atom)
                {
                    #region Atom
                    //-----------------atom feed--------------
                    XmlReader       reader   = XmlReader.Create(dbfeed.Link);
                    SyndicationFeed atomfeed = SyndicationFeed.Load(reader);
                    int             i        = 0;
                    if (atomfeed == null)
                    {
                        dbfeed.Deleted = Common.Share.DeleteStatus.NotWork;
                    }
                    else if (atomfeed.Items.Any())
                    {
                        foreach (SyndicationItem item in atomfeed.Items)
                        {
                            i++;
                            break;
                        }
                        if (i > 0)
                        {
                            List <FeedItem> listReturnBack = FeedItemsOperation.InsertFeedItems(atomfeed.Items, dbfeed);

                            GeneralLogs.WriteLog("OK updating atom " + dbfeed.Id + " Num:" + listReturnBack.Count + " " + dbfeed.Link);
                        }
                    }
                    #endregion
                }
            }
            catch (Exception ex)
            {
                #region Exception
                if (ex.Message.IndexOf("404") > 0)
                {
                    dbfeed.Deleted = Common.Share.DeleteStatus.NotFound;
                }
                else if (ex.Message.IndexOf("403") > 0)
                {
                    dbfeed.Deleted = Common.Share.DeleteStatus.Forbidden;
                }
                else if (ex.Message.IndexOfX("timed out") > 0)
                {
                    //------request time out
                    dbfeed.Deleted = Common.Share.DeleteStatus.RequestTimeOut;
                }
                //-----------log error-----
                if (ex.Message.IndexOfX("Inner Exception") > 0)
                {
                    CrawlerLog.FailLog(dbfeed, ex.InnerException.Message.SubstringX(0, 1020));
                }
                else
                {
                    GeneralLogs.WriteLog("Info " + ex.Message);
                }
                #endregion
            }
            #region LASTFLOW
            try
            {
                CheckForChangeDuration(dbfeed, NumberOfNewItem > 0 ? true : false);
                CrawlerLog.SuccessLog(dbfeed, NumberOfNewItem);
                entiti.SaveChanges();
            }
            catch (Exception ex)
            {
                GeneralLogs.WriteLog(ex.Message);
            }
            #endregion

            return(dbfeed);
        }