Ejemplo n.º 1
0
        public static AnalyzeModel GetProp(IQueryable <Borrow> borrows, IBooksReopository repository)
        {
            List <string>      names        = new List <string>();
            List <int>         values       = new List <int>();
            List <AnalyzeItem> analyzeItems = new List <AnalyzeItem>();

            IQueryable <Reader> Readers = repository.Readers;

            foreach (Reader b in Readers.ToList())
            {
                var analyzeItem = new AnalyzeItem();
                var val         = borrows.Where(br => br.ReaderId == b.Id).Count();
                analyzeItem.name  = b.Name;
                analyzeItem.value = val;
                names.Add(b.Name);
                values.Add(val);
                analyzeItems.Add(analyzeItem);
            }
            AnalyzeModel Sum = new AnalyzeModel
            {
                Values       = values,
                Names        = names,
                AnalyzeItems = analyzeItems
            };

            return(Sum);
        }
Ejemplo n.º 2
0
        public static IFreeDocument Transform(this IColumnDataTransformer ge,
                                              IFreeDocument item, AnalyzeItem analyzeItem)
        {
            if (item == null)
            {
                return(new FreeDocument());
            }

            var dict = item;

            object res = null;

            try
            {
                if (ge.OneOutput && dict[ge.Column] == null)
                {
                    if (analyzeItem != null)
                    {
                        analyzeItem.EmptyInput += 1;
                    }
                }
                else
                {
                    res = ge.TransformData(dict);
                }
            }
            catch (Exception ex)
            {
                res = ex.Message;
                if (analyzeItem != null)
                {
                    analyzeItem.Error++;
                    analyzeItem.Analyzer.AddErrorLog(item, ex, ge);
                }

                XLogSys.Print.Error(string.Format(GlobalHelper.Get("key_208"), ge.Column, ge.TypeName, res));
            }

            if (ge.OneOutput)
            {
                if (!string.IsNullOrWhiteSpace(ge.NewColumn))
                {
                    if (res != null)
                    {
                        dict.SetValue(ge.NewColumn, res);
                    }
                }
                else
                {
                    if (res != null)
                    {
                        dict.SetValue(ge.Column, res);
                    }
                }
            }


            return(dict);
        }
Ejemplo n.º 3
0
        public static IFreeDocument Transform(this IColumnDataTransformer ge,
                                              IFreeDocument item, AnalyzeItem analyzeItem)
        {
            if (item == null)
            {
                return(new FreeDocument());
            }

            var dict = item;

            object res = null;

            try
            {
                if (ge.OneOutput && dict[ge.Column] == null)
                {
                    analyzeItem.EmptyInput++;
                }
                else
                {
                    res = ge.TransformData(dict);
                }
            }
            catch (Exception ex)
            {
                res = ex.Message;
                analyzeItem.Error++;
                XLogSys.Print.Error($"位于{ge.ETLIndex}, 作用在{ge.Column}的模块 {ge.TypeName} 转换出错, 信息{res}");
            }

            if (ge.OneOutput)
            {
                if (!string.IsNullOrWhiteSpace(ge.NewColumn))
                {
                    if (res != null)
                    {
                        dict.SetValue(ge.NewColumn, res);
                    }
                }
                else
                {
                    dict.SetValue(ge.Column, res);
                }
            }


            return(dict);
        }
Ejemplo n.º 4
0
        public static IEnumerable <T> CountInput <T>(this IEnumerable <T> documents, AnalyzeItem analyzer = null)

        {
            if (documents == null)
            {
                yield break;
            }
            foreach (var document in documents)
            {
                if (analyzer != null)
                {
                    ++analyzer.Input;
                }
                yield return(document);
            }
        }
Ejemplo n.º 5
0
        public static EnumerableFunc FuncAdd(this IColumnProcess tool, EnumerableFunc func, bool isexecute,
                                             Analyzer analyzer)
        {
            AnalyzeItem analyzeItem = null;

            analyzeItem = analyzer?.Set(tool);
            try
            {
                tool.SetExecute(isexecute);
                if (analyzeItem != null)
                {
                    analyzeItem.HasInit = tool.Init(new List <IFreeDocument>());
                }
            }
            catch (Exception ex)
            {
                if (analyzeItem != null)
                {
                    analyzeItem.HasInit = false;
                }
                XLogSys.Print.Error(string.Format(GlobalHelper.Get("key_209"), tool.Column, tool.TypeName, ex));
                return(func);
            }
            if (!tool.Enabled)
            {
                return(func);
            }
            if (tool is IColumnDataTransformer)
            {
                var ge    = tool as IColumnDataTransformer;
                var func1 = func;
                func = source =>
                {
                    var source2 = func1(source).CountInput(analyzeItem);
                    if (ge.IsMultiYield)
                    {
                        return(ge.TransformManyData(source2, analyzeItem).CountOutput(analyzeItem));
                    }
                    ;
                    return(source2.Select(input =>
                    {
                        var now = DateTime.Now;

                        var result = Transform(ge, input, analyzeItem);
                        if (analyzeItem != null)
                        {
                            analyzeItem.RunningTime = DateTime.Now - now;
                        }
                        return result;
                    }).CountOutput(analyzeItem));
                };
            }

            if (tool is IColumnGenerator)
            {
                var ge = tool as IColumnGenerator;

                var func1 = func;
                switch (ge.MergeType)
                {
                case MergeType.Append:

                    func = source => source.CountInput(analyzeItem).ConcatPlus(func1, ge).CountOutput(analyzeItem);
                    break;

                case MergeType.Cross:
                    func = source =>
                           func1(source.CountInput(analyzeItem)).Cross(ge.Generate).CountOutput(analyzeItem);
                    break;

                case MergeType.Merge:
                    func = source =>
                           func1(source.CountInput(analyzeItem)).MergeAll(ge.Generate()).CountOutput(analyzeItem);
                    break;

                case MergeType.Mix:
                    func = source =>
                           func1(source.CountInput(analyzeItem)).Mix(ge.Generate()).CountOutput(analyzeItem);
                    break;
                }
            }


            if (tool is IDataExecutor && isexecute)
            {
                var ge    = tool as IDataExecutor;
                var func1 = func;
                func = source => ge.Execute(func1(source.CountInput(analyzeItem))).CountOutput(analyzeItem);
            }
            else if (tool is IColumnDataFilter)
            {
                var t = tool as IColumnDataFilter;

                if (t.TypeName == GlobalHelper.Get("key_210"))
                {
                    dynamic range = t;
                    var     func1 = func;
                    func = source => func1(source.CountInput(analyzeItem)).Skip((int)range.Skip).Take((int)range.Take)
                           .CountOutput(analyzeItem);
                }
                else

                {
                    var func1 = func;
                    switch (t.FilterWorkMode)
                    {
                    case FilterWorkMode.PassWhenSuccess:
                        func =
                            source =>
                            func1(source.CountInput(analyzeItem))
                            .SkipWhile(t.FilteData)
                            .CountOutput(analyzeItem);
                        break;

                    case FilterWorkMode.ByItem:
                        func =
                            source =>
                            func1(source.CountInput(analyzeItem)).Where(t.FilteData).CountOutput(analyzeItem);
                        break;

                    case FilterWorkMode.StopWhenFail:
                        func =
                            source =>
                            func1(source.CountInput(analyzeItem))
                            .TakeWhile(t.FilteData)
                            .CountOutput(analyzeItem);
                        break;
                    }
                }
            }
            return(func);
        }
Ejemplo n.º 6
0
        private static void ItemCrawl()
        {
            using (var client = new RedisAccess(redisPool))
            {
                try
                {
                    //产生临时库(临时库和缓存总库取并集)
                    string wechatItem = client.PopItemWithHighestScoreFromSortedSet(itemsKey);
                    if (!string.IsNullOrEmpty(wechatItem))
                    {
                        Palas.Protocol.PFItemToAnalyze itemToAnalyze = JsonConvert.DeserializeObject <Palas.Protocol.PFItemToAnalyze>(wechatItem);

                        Palas.Protocol.PFItemToAnalyze analyzeResult = AnalyzeItem.Analyzer(itemToAnalyze, Palas.Protocol.PFAnalyzeFlag.Splite);

                        //6.使用AnalyzeFirst对文章进行第一次分析
                        MultriAnalyzeFlag analyzeFlags = AnalyzeItem.BuildAnalyzeFlag((Enums.AnalyzeFlag)analyzeResult.AnalyzeFlag);
                        Palas.Protocol.PFItemToAnalyze analyzeFirstResult = AnalyzeItem.AnalyzerFirst(analyzeResult, analyzeFlags);

                        //7.使用AnalyzeSecond对文章进行第二次分析
                        Palas.Protocol.PFItemToAnalyze analyzeSecondResult = AnalyzeItem.AnalyzeSecond(analyzeFirstResult, analyzeFlags);

                        //8.使用AnalyzeIssue对文章进行分Issue分析
                        Palas.Protocol.PFItemToAnalyze analyzeIssueResult = AnalyzeItem.IssueAnalyzer(analyzeSecondResult, analyzeFlags);

                        //9.使用IndexThenDup将文章去重索引到ES
                        if (analyzeIssueResult != null)
                        {
                            //此处对Item进行一次转换
                            Palas.Protocol.PFItem pfItem = analyzeIssueResult.Item;
                            Item _item = TypeExchangeUtility.ExchangeItem(pfItem);

                            _item = FilterIssue.FilterExcludeExpression(_item);

                            Enums.ProcessStatus result = Enums.ProcessStatus.Failed;
                            int retry = 0;
                            do
                            {
                                try
                                {
                                    //retry++;
                                    ESAccess.IndexOnly(_item);
                                    result = Enums.ProcessStatus.Succeeded;
                                    //result = DupThenIndexBusiness.DupThenIndexItem(_item);
                                    //if (result == Enums.ProcessStatus.Failed)
                                    //Thread.Sleep(new TimeSpan(0, 0, 30));
                                }
                                catch //(Exception ex)
                                {
                                    result = Enums.ProcessStatus.Failed;
                                    Thread.Sleep(new TimeSpan(0, 1, 00));
                                }
                            }while (result == Enums.ProcessStatus.Failed && retry < 3);

                            //10.判断是否成功
                            //Assert.AreNotEqual(Enums.ProcessStatus.Failed, result);
                            Console.WriteLine(string.Format("Index Dup: {2} paper: {0}, Status: {1}", _item.ItemID, result.ToString(), _item.DuplicationID));
                        }
                    }
                    else
                    {
                        //if have nothing to emit, then sleep for a little while to release CPU
                        Thread.Sleep(100);
                        return;
                    }
                }
                catch (Exception ex)
                {
                }
            }
        }
Ejemplo n.º 7
0
        private static void WechatCrawler()
        {
            using (var client = new RedisAccess(redisPool))
            {
                try
                {
                    //产生临时库(临时库和缓存总库取并集)
                    string wechatItem = client.PopItemWithHighestScoreFromSortedSet(wechatSubsKey);
                    if (!string.IsNullOrEmpty(wechatItem))
                    {
                        WeChatItemModel wechatItemModel = JsonConvert.DeserializeObject <WeChatItemModel>(wechatItem);

                        Palas.Protocol.PFItemToAnalyze pfItemToAnalyze = new Palas.Protocol.PFItemToAnalyze();
                        Palas.Protocol.PFItem          pfItem          = new Palas.Protocol.PFItem();

                        if (wechatItemModel != null && !string.IsNullOrEmpty(wechatItemModel.Id))
                        {
                            pfItem.AuthorID           = wechatItemModel.WechatSubId;
                            pfItem.AuthorName         = wechatItemModel.WechatSubName;
                            pfItem.ContentDetailLevel = Palas.Protocol.PFContentDetailLevel.FullContent;
                            pfItem.Crawler            = "WechatSubs";
                            pfItem.CrawlID            = wechatItemModel.WechatSubId;
                            pfItem.FetchTime          = DateTimeUtility.GetUnixTimeStamp(DateTime.Now);
                            pfItem.ItemID             = wechatItemModel.Id;
                            pfItem.MediaID            = "WeChat";
                            pfItem.MediaName          = "微信";
                            pfItem.PubDate            = DateTimeUtility.GetUnixTimeStamp(wechatItemModel.PubDate);
                            pfItem.Url        = wechatItemModel.Url.Replace("&amp;", "&");
                            pfItem.CleanTitle = wechatItemModel.Title;

                            if (wechatItemModel.Tags != null && wechatItemModel.Tags.Count > 0)
                            {
                                pfItem.Tag = string.Join(",", wechatItemModel.Tags);
                            }

                            string content = GetHtmlContent(pfItem.Url);
                            if (!string.IsNullOrEmpty(content))
                            {
                                pfItem.HTMLText  = HtmlFormattor.FormatHtml(content, pfItem.Url);
                                pfItem.CleanText = HTMLCleaner.CleanHTML(pfItem.HTMLText, true);

                                pfItemToAnalyze.AnalyzeFlag = 312287;
                                pfItemToAnalyze.Item        = pfItem;

                                pfItemToAnalyze.CrawlRecode      = new Palas.Protocol.PFCrawlRecode(pfItem.Crawler, pfItem.CrawlID, "Common", "", "N", 312287, 1, "WeChatSync", 0, 0, 0, "", 0, 0, true, pfItem.MediaID, pfItem.MediaName, pfItem.MediaChannel);
                                pfItemToAnalyze.CrawlRecode.Tags = pfItem.Tag;

                                pfItemToAnalyze.CrawlRecode.JoinIssueIDs = string.Join(",", wechatItemModel.Customers);
                                pfItemToAnalyze.CrawlRecode.MediaType    = 11;
                                pfItemToAnalyze.CrawlRecode.MediaWeight  = 8;

                                // 临时追加

                                if (wechatItemModel.Tags != null && wechatItemModel.Tags.Count > 0)
                                {
                                    pfItem.Tag = string.Join(",", wechatItemModel.Tags);
                                    //pfItemToAnalyze.CrawlRecode.Tags = pfItem.Tag;
                                }
                                // 临时追加完毕

                                //pfItemToAnalyze.CrawlRecode = new Palas.Protocol.PFCrawlRecode(pfItem.Crawler, pfItem.CrawlID, "Common", "", "N", 312287, string.Join(",", wechatItemModel.Customers), 1, "WeChatSync", 0, 0, "", 0, "", "", 0, 0, "", true, 4, pfItem.MediaID, pfItem.MediaName, "", 11, 0, 8, 0, "", "",);

                                Palas.Protocol.PFItemToAnalyze analyzeResult = AnalyzeItem.Analyzer(pfItemToAnalyze, Palas.Protocol.PFAnalyzeFlag.Splite);

                                //6.使用AnalyzeFirst对文章进行第一次分析
                                MultriAnalyzeFlag analyzeFlags = AnalyzeItem.BuildAnalyzeFlag((Enums.AnalyzeFlag)analyzeResult.AnalyzeFlag);
                                Palas.Protocol.PFItemToAnalyze analyzeFirstResult = AnalyzeItem.AnalyzerFirst(analyzeResult, analyzeFlags);

                                //7.使用AnalyzeSecond对文章进行第二次分析
                                Palas.Protocol.PFItemToAnalyze analyzeSecondResult = AnalyzeItem.AnalyzeSecond(analyzeFirstResult, analyzeFlags);

                                //8.使用AnalyzeIssue对文章进行分Issue分析
                                Palas.Protocol.PFItemToAnalyze analyzeIssueResult = AnalyzeItem.IssueAnalyzer(analyzeSecondResult, analyzeFlags);

                                //9.使用IndexThenDup将文章去重索引到ES
                                if (analyzeIssueResult != null)
                                {
                                    //此处对Item进行一次转换
                                    Palas.Protocol.PFItem pfItem2 = analyzeIssueResult.Item;
                                    Item _item = TypeExchangeUtility.ExchangeItem(pfItem2);

                                    _item = FilterIssue.FilterExcludeExpression(_item);

                                    Enums.ProcessStatus result = Enums.ProcessStatus.Failed;
                                    int retry = 0;
                                    do
                                    {
                                        try
                                        {
                                            //retry++;
                                            ESAccess.IndexOnly(_item);
                                            result = Enums.ProcessStatus.Succeeded;
                                            //result = DupThenIndexBusiness.DupThenIndexItem(_item);
                                            //if (result == Enums.ProcessStatus.Failed)
                                            //Thread.Sleep(new TimeSpan(0, 0, 30));
                                        }
                                        catch //(Exception ex)
                                        {
                                            result = Enums.ProcessStatus.Failed;
                                            Thread.Sleep(new TimeSpan(0, 1, 00));
                                        }
                                    }while (result == Enums.ProcessStatus.Failed && retry < 3);

                                    //10.判断是否成功
                                    //Assert.AreNotEqual(Enums.ProcessStatus.Failed, result);
                                    Console.WriteLine(string.Format("Index paper: {0}, Status: {1}", _item.ItemID, result.ToString()));
                                }

                                return;
                            }
                        }
                    }
                    else
                    {
                        //if have nothing to emit, then sleep for a little while to release CPU
                        Thread.Sleep(50);
                    }
                }
                catch (Exception ex)
                {
                }
            }
        }
Ejemplo n.º 8
0
        private static void DoSimpleCrawler()
        {
            //EntityManager.PrepareWithAutoRefreash();

            //1.通过TaskProvider获取Simple Crawler任务
            TaskProvider taskProvider = new TaskProvider();
            string       lockCrawler  = DES.Encrypt("NC31ASTORM", DataAccess.PalasKey);

            Palas.Protocol.PFTaskInfo taskInfo = taskProvider.GetTask(lockCrawler, ConfigurationManager.AppSettings["CrawlConductUrl"] + "Task/GetCrawlTask");
            Console.WriteLine(string.Format("GetTask: {0}", taskInfo.TaskId));

            //2.使用TaskResolve将任务进行分解
            TaskResolve taskResolve = new TaskResolve();
            List <Palas.Protocol.PFCrawlInfo> crawlInfos = taskResolve.ResolveTask(taskInfo);

            foreach (var crawlInfo in crawlInfos)
            {
                //3.使用HttpRequest获取网页信息
                HttpRequest httpRequest = new HttpRequest();
                Palas.Protocol.PFCrawlResponse crawlResponse = httpRequest.DoWebRequest(crawlInfo);

                //4.使用ParserHtml解析HTML信息
                Palas.Framework.Module.ParserHtml parserHtml = new Palas.Framework.Module.ParserHtml();
                List <string> papers = parserHtml.Parser(crawlResponse, taskInfo);

                Console.WriteLine(string.Format("Crawl paper count: {0}", papers.Count));

                foreach (string paper in papers)
                {
                    //5.使用ItemProcess对文章信息进行预处理
                    ItemProcess itemProcess = new ItemProcess();
                    string      item        = itemProcess.Process(paper, taskInfo.BaseInfo);

                    if (!string.IsNullOrEmpty(item))
                    {
                        Palas.Protocol.PFItemToAnalyze pfItemToAnalyze = new Palas.Protocol.PFItemToAnalyze();
                        pfItemToAnalyze.Item        = JsonConvert.DeserializeObject <Palas.Protocol.PFItem>(item);
                        pfItemToAnalyze.CrawlRecode = taskInfo.BaseInfo;
                        pfItemToAnalyze.AnalyzeFlag = taskInfo.BaseInfo.AnalyzeFlag;

                        Palas.Protocol.PFItemToAnalyze analyzeResult = AnalyzeItem.Analyzer(pfItemToAnalyze, Palas.Protocol.PFAnalyzeFlag.Splite);

                        //6.使用AnalyzeFirst对文章进行第一次分析
                        MultriAnalyzeFlag analyzeFlags = AnalyzeItem.BuildAnalyzeFlag((Enums.AnalyzeFlag)analyzeResult.AnalyzeFlag);
                        Palas.Protocol.PFItemToAnalyze analyzeFirstResult = AnalyzeItem.AnalyzerFirst(analyzeResult, analyzeFlags);

                        //7.使用AnalyzeSecond对文章进行第二次分析
                        Palas.Protocol.PFItemToAnalyze analyzeSecondResult = AnalyzeItem.AnalyzeSecond(analyzeFirstResult, analyzeFlags);

                        //8.使用AnalyzeIssue对文章进行分Issue分析
                        Palas.Protocol.PFItemToAnalyze analyzeIssueResult = AnalyzeItem.IssueAnalyzer(analyzeSecondResult, analyzeFlags);

                        //9.使用IndexThenDup将文章去重索引到ES
                        if (analyzeIssueResult != null)
                        {
                            //此处对Item进行一次转换
                            Palas.Protocol.PFItem pfItem = analyzeIssueResult.Item;
                            Item _item = TypeExchangeUtility.ExchangeItem(pfItem);

                            _item = FilterIssue.FilterExcludeExpression(_item);

                            Enums.ProcessStatus result = Enums.ProcessStatus.Failed;
                            int retry = 0;
                            do
                            {
                                try
                                {
                                    //retry++;
                                    ESAccess.IndexOnly(_item);
                                    result = Enums.ProcessStatus.Succeeded;
                                    //result = DupThenIndexBusiness.DupThenIndexItem(_item);
                                    //if (result == Enums.ProcessStatus.Failed)
                                    //Thread.Sleep(new TimeSpan(0, 0, 30));
                                }
                                catch //(Exception ex)
                                {
                                    result = Enums.ProcessStatus.Failed;
                                    Thread.Sleep(new TimeSpan(0, 1, 00));
                                }
                            }while (result == Enums.ProcessStatus.Failed && retry < 3);

                            //10.判断是否成功
                            //Assert.AreNotEqual(Enums.ProcessStatus.Failed, result);
                            Console.WriteLine(string.Format("Index paper: {0}, Status: {1}", _item.ItemID, result.ToString()));
                        }
                    }
                }
            }
        }
Ejemplo n.º 9
0
        public static EnumerableFunc FuncAdd(this IColumnProcess tool, EnumerableFunc func, bool isexecute, Analyzer analyzer)
        {
            AnalyzeItem analyzeItem = null;

            analyzeItem = analyzer?.Set(tool);
            try
            {
                tool.SetExecute(isexecute);
                if (analyzeItem != null)
                {
                    analyzeItem.HasInit = tool.Init(new List <IFreeDocument>());
                }
            }
            catch (Exception ex)
            {
                if (analyzeItem != null)
                {
                    analyzeItem.HasInit = false;
                }
                XLogSys.Print.Error($"位于{tool.Column}列的{tool.TypeName}模块在初始化时出现异常:{ex},请检查任务参数");
                return(func);
            }
            if (!tool.Enabled)
            {
                return(func);
            }
            if (tool is IColumnDataTransformer)
            {
                var ge    = tool as IColumnDataTransformer;
                var func1 = func;
                func = source =>
                {
                    var source2 = func1(source).CountInput(analyzeItem);
                    if (ge.IsMultiYield)
                    {
                        return(ge.TransformManyData(source2).CountOutput(analyzeItem));
                    }
                    ;
                    return(source2.Select(input => Transform(ge, input, analyzeItem)).CountOutput(analyzeItem));
                };
            }

            if (tool is IColumnGenerator)
            {
                var ge = tool as IColumnGenerator;

                var func1 = func;
                switch (ge.MergeType)
                {
                case MergeType.Append:

                    func = source => source.CountInput(analyzeItem).ConcatPlus(func1, ge).CountOutput(analyzeItem);
                    break;

                case MergeType.Cross:
                    func = source => func1(source.CountInput(analyzeItem)).Cross(ge.Generate).CountOutput(analyzeItem);
                    break;

                case MergeType.Merge:
                    func = source => func1(source.CountInput(analyzeItem)).MergeAll(ge.Generate()).CountOutput(analyzeItem);
                    break;

                case MergeType.Mix:
                    func = source => func1(source.CountInput(analyzeItem)).Mix(ge.Generate()).CountOutput(analyzeItem);
                    break;
                }
            }


            if (tool is IDataExecutor && isexecute)
            {
                var ge    = tool as IDataExecutor;
                var func1 = func;
                func = source => ge.Execute(func1(source.CountInput(analyzeItem))).CountOutput(analyzeItem);
            }
            else if (tool is IColumnDataFilter)
            {
                var t = tool as IColumnDataFilter;

                if (t.TypeName == "数量范围选择")
                {
                    dynamic range = t;
                    var     func1 = func;
                    func = source => func1(source.CountInput(analyzeItem)).Skip((int)range.Skip).Take((int)range.Take).CountOutput(analyzeItem);
                }
                else

                {
                    var func1 = func;
                    func = source => func1(source.CountInput(analyzeItem)).Where(t.FilteData).CountOutput(analyzeItem);
                }
            }
            return(func);
        }
Ejemplo n.º 10
0
 public override IEnumerable <IFreeDocument> TransformManyData(IEnumerable <IFreeDocument> datas, AnalyzeItem analyzer = null)
 {
     foreach (var data in datas)
     {
         object d;
         try
         {
             d = eval(data);
         }
         catch (Exception ex)
         {
             if (analyzer != null)
             {
                 analyzer.Analyzer.AddErrorLog(data, ex, this);
             }
             else
             {
                 XLogSys.Print.Error(string.Format(GlobalHelper.Get("key_208"), this.Column, this.TypeName, ex));
             }
             continue;
         }
         foreach (var item2 in ScriptHelper.ToDocuments(d))
         {
             var item3 = item2;
             yield return(item3.MergeQuery(data, NewColumn));
         }
     }
 }
Ejemplo n.º 11
0
        public override IEnumerable <IFreeDocument> TransformManyData(IEnumerable <IFreeDocument> datas, AnalyzeItem analyzer)

        {
            foreach (var data in datas)

            {
                object item = data[Column];
                if (string.IsNullOrEmpty(Script))
                {
                    break;
                }

                if (item == null)
                {
                    continue;
                }
                MatchCollection r = regex.Matches(item.ToString());
                foreach (var p in r)
                {
                    var doc = new FreeDocument();
                    doc.MergeQuery(data, NewColumn);
                    doc.SetValue(Column, p);
                    yield return(doc.MergeQuery(data, NewColumn));
                }
            }
        }
Ejemplo n.º 12
0
 public override IEnumerable <IFreeDocument> TransformManyData(IEnumerable <IFreeDocument> datas, AnalyzeItem analyzer)
 {
     foreach (var data in datas)
     {
         yield return(data);
     }
 }
Ejemplo n.º 13
0
        public override IEnumerable <IFreeDocument> TransformManyData(IEnumerable <IFreeDocument> datas, AnalyzeItem analyzer)

        {
            foreach (var data in datas)

            {
                object item = data[Column];
                var    r    = Split(item.ToString());
                foreach (var p in r)
                {
                    var doc = new FreeDocument();
                    doc.MergeQuery(data, NewColumn);
                    doc.SetValue(Column, p);
                    yield return(doc.MergeQuery(data, NewColumn));
                }
            }
        }
Ejemplo n.º 14
0
        public override IEnumerable <IFreeDocument> TransformManyData(IEnumerable <IFreeDocument> datas, AnalyzeItem analyzer)
        {
            foreach (var data in datas)
            {
                var v = data[Column];
                if (v == null)
                {
                    continue;
                }

                var ps = regex.Split(v.ToString());


                foreach (var p in ps)
                {
                    var doc = new FreeDocument();

                    doc.MergeQuery(data, NewColumn);
                    doc.SetValue(Column, p);
                    yield return(doc);
                }
            }
        }
Ejemplo n.º 15
0
        //TODO: 此处不能使用枚举式迭代,除非在本模块之后没有其他操作

        public override IEnumerable <IFreeDocument> TransformManyData(IEnumerable <IFreeDocument> datas, AnalyzeItem analyzer)
        {
            var collColum = CollectionColumns.Split(' ').Select(d => d.Trim()).ToList();
            var sumColum  = SumColumns.Split(' ').Select(d => d.Trim()).ToList();

            foreach (var data in datas)
            {
                var item = data[Column];
                if (item == null)
                {
                    continue;
                }
                var key = item.ToString();

                IFreeDocument v;
                if (dictionary.TryGetValue(key, out v))
                {
                    foreach (var r in data)
                    {
                        if (collColum.Contains(r.Key))
                        {
                            var list = v[r.Key] as IList;
                            if (data[r.Key] != null)
                            {
                                if (list != null)
                                {
                                    list.Add(data[r.Key]);
                                }
                                else
                                {
                                    v[r.Key] = new List <object>
                                    {
                                        data[r.Key]
                                    };
                                }
                            }
                        }
                        else if (sumColum.Contains(r.Key))
                        {
                            var vnum = v[r.Key];
                            if (vnum == null)
                            {
                                vnum = 0;
                            }
                            var v4 = double.Parse(vnum.ToString());
                            var v3 = data[r.Key];
                            if (v3 == null)
                            {
                                v3 = 0;
                            }
                            var v5 = double.Parse(v3.ToString());
                            v4      += v5;
                            v[r.Key] = v4;
                        }

                        else
                        {
                            if (v[r.Key] == null)
                            {
                                v[r.Key] = r.Value;
                            }
                        }
                    }
                    //yield return v;
                }
                else
                {
                    //显然应当先生成一个新的字典,否则会修改原有集合
                    var newfree = new FreeDocument();
                    data.DictCopyTo(newfree);
                    foreach (var col in collColum)
                    {
                        if (newfree[col] != null)
                        {
                            newfree[col] = new List <object> {
                                newfree[col]
                            }
                        }
                        ;
                        else
                        {
                            newfree[col] = new List <object>();
                        }
                    }

                    dictionary.Add(key, newfree);
                    if (IsLazyLinq == false)
                    {
                        yield return(newfree);
                    }
                }
            }
            if (IsLazyLinq)
            {
                foreach (var item in dictionary)
                {
                    yield return(item.Value);
                }
            }
        }
Ejemplo n.º 16
0
 public static IEnumerable <T> CountOutput <T>(this IEnumerable <T> documents, AnalyzeItem analyzer = null)
 {
     return(documents.Select(d =>
     {
         if (analyzer != null)
         {
             ++analyzer.Output;
         }
         return d;
     }));
 }
Ejemplo n.º 17
0
        public virtual IEnumerable <IFreeDocument> TransformManyData(IEnumerable <IFreeDocument> datas,
                                                                     AnalyzeItem analyzer = null)

        {
            var olddatas     = datas;
            var errorCounter = 0;

            foreach (var data in datas)
            {
                IEnumerable <IFreeDocument> newdatas = null;
                try
                {
                    DateTime now = DateTime.Now;
                    newdatas = InternalTransformManyData(data);
                    if (analyzer != null)
                    {
                        analyzer.RunningTime += DateTime.Now - now;
                    }
                }
                catch (Exception ex)
                {
                    if (analyzer != null)
                    {
                        analyzer.Analyzer.AddErrorLog(data, ex, this);
                    }
                    else
                    {
                        XLogSys.Print.Error(string.Format(GlobalHelper.Get("key_208"), this.Column, this.TypeName, ex));
                    }
                }

                if (MainDescription.IsUIForm)
                {
                    if (((olddatas is IList) == false || !olddatas.Any()) && newdatas is IList &&
                        (!newdatas.Any()))
                    {
                        errorCounter++;
                        if (errorCounter == 5 && isErrorRemind)
                        {
                            //连续三次无值输出,表示为异常现象
                            if (ControlExtended.UIInvoke(() =>
                            {
                                var result =
                                    MessageBox.Show(
                                        string.Format(GlobalHelper.Get("fail_remind"), Column, TypeName),
                                        GlobalHelper.Get("key_570"),
                                        MessageBoxButton.YesNoCancel);
                                if (result == MessageBoxResult.Yes)
                                {
                                    var window = PropertyGridFactory.GetPropertyWindow(this);

                                    var list = processManager.CurrentProcessTasks.Where(
                                        task => task.Publisher == Father && task.IsPause == false).ToList();
                                    list.Execute(task => task.Remove());

// window.Closed += (s, e) => Father.ETLMount++;
                                    Father.ETLMount = Math.Max(0, Father.CurrentETLTools.IndexOf(this));
                                    window.ShowDialog();
                                    window.Topmost = true;
                                    return(true);
                                }
                                if (result == MessageBoxResult.Cancel)
                                {
                                    isErrorRemind = false;
                                    return(true);
                                }
                                return(false);
                            }) == false)
                            {
                                yield break;
                            }
                        }
                    }
                    else
                    {
                        errorCounter = 0;
                    }
                }
                if (newdatas == null)
                {
                    continue;
                }
                foreach (var newdata in newdatas)
                {
                    yield return(newdata);
                }
            }
        }
Ejemplo n.º 18
0
        public override IEnumerable <IFreeDocument> TransformManyData(IEnumerable <IFreeDocument> datas, AnalyzeItem analyzer)
        {
            if (string.IsNullOrEmpty(Column))
            {
                foreach (var data in datas)
                {
                    yield return(data);
                }
                yield break;
            }
            var results  = datas.ToList();
            var columns  = results.Select(d => d[Column].ToString()).ToList();
            var all_keys = results.GetKeys(count: 100).ToList();
            var docs     = new List <FreeDocument>();

            for (var i = 0; i < all_keys.Count(); i++)
            {
                docs.Add(new FreeDocument());
            }
            var pos = 0;

            foreach (var column in columns)
            {
                var pos2 = 0;
                foreach (var doc in docs)
                {
                    doc[column] = results[pos][all_keys[pos2++]];
                }
                pos += 1;
            }
            foreach (var doc in docs)
            {
                yield return(doc);
            }
            //数字列可能会有不显示的问题
        }
Ejemplo n.º 19
0
        public IEnumerable <IFreeDocument> TransformManyData(IEnumerable <IFreeDocument> datas, AnalyzeItem analyzer)
        {
            foreach (var data in datas)
            {
                var doc = MappingDocument(data);
                if (IsCycle)
                {
                    var newdata = doc;
                    while (string.IsNullOrEmpty(newdata[Column].ToString()) == false)
                    {
                        var result =
                            process.Generate(IsExecute, new List <IFreeDocument> {
                            newdata.Clone()
                        }).FirstOrDefault();
                        if (result == null)
                        {
                            break;
                        }
                        yield return(result.Clone());

                        newdata = result;
                    }
                }
                else
                {
                    var result = process.Generate(IsExecute, new List <IFreeDocument> {
                        doc
                    });
                    foreach (var item in result)
                    {
                        yield return(item.MergeQuery(data, NewColumn));
                    }
                }
            }
        }
Ejemplo n.º 20
0
        public override IEnumerable <IFreeDocument> TransformManyData(IEnumerable <IFreeDocument> datas, AnalyzeItem analyzer)
        {
            foreach (var data in datas)
            {
                var item = data[Column].ToString();
                if (string.IsNullOrEmpty(item))
                {
                    continue;
                }
                var itemstr = item;
                lastData = itemstr;
                if (crawlerEnabled)
                {
                    bool isrealjson;
                    var  html = JavaScriptAnalyzer.Json2XML(itemstr, out isrealjson, true);
                    if (isrealjson)
                    {
                        HtmlDocument htmldoc = null;
                        var          doc     = selector.CrawlHtmlData(html, out htmldoc);
                        foreach (var item3 in doc)
                        {
                            yield return(item3.MergeQuery(data, NewColumn));
                        }
                    }
                    continue;
                }
                dynamic d = null;
                try
                {
                    d = serialier.DeserializeObject(itemstr);
                }
                catch (Exception ex)
                {
                    //  XLogSys.Print.Error(ex);
                    continue;
                }


                foreach (var item2 in ScriptHelper.ToDocuments(d))
                {
                    var item3 = item2 as FreeDocument;
                    yield return(item3.MergeQuery(data, NewColumn));
                }
            }
        }
Ejemplo n.º 21
0
        public override IEnumerable <IFreeDocument> TransformManyData(IEnumerable <IFreeDocument> datas, AnalyzeItem analyzer)
        {
            switch (RepeatType)
            {
            case RepeatType.ListRepeat:
                var count = int.Parse(RepeatCount);
                while (count > 0)
                {
                    foreach (var data in datas)
                    {
                        yield return(data.Clone());
                    }
                    count--;
                }
                break;

            case RepeatType.OneRepeat:
                foreach (var data in datas)
                {
                    var c  = data.Query(RepeatCount);
                    var c2 = int.Parse(c);
                    while (c2 > 0)
                    {
                        yield return(data);

                        c2--;
                    }
                }

                break;
            }
        }