Esempio n. 1
0
 public static RawlerBase Tags(this RawlerBase rawler, string tag, string className)
 {
     return(rawler.Add(new Tags()
     {
         Tag = tag, ClassName = className
     }));
 }
Esempio n. 2
0
        public RawlerBase CreateRawlerTree(RawlerBase rawler)
        {
            RawlerLib.Timer.StopWatch.Write("CreateRawlerTree Start");
            var baseUrl = BaseUrl;

            foreach (var item in Nodes)
            {
                urlTfidf.AddDocument(item.OuterHtml.ToHtml(baseUrl).GetLink().Select(n => n.Url));
                imageTfidf.AddDocument(item.OuterHtml.ToHtml(baseUrl).GetImageLink().Select(n => n.Url));
            }
            if (rawler == null)
            {
                rawler = new RawlerBase();
            }
            //RawlerTreeを組み立てて、同じものを纏める。

            var list = Nodes.GroupBy(n => n.GetHTMLWithoutValue()).Select(n => CreateRawler(n.First())).ToArray().GroupBy(n => n.ToXAML()).Select(n => n.First()).ToArray();

            RawlerLib.Timer.StopWatch.Write("CreateRawlerTree AddStart");
            foreach (var item in list)
            {
                item.AddFirst(new GetPageUrl().DataWrite("SourceUrl", DataAttributeType.SourceUrl).GetRoot());
                item.Add(new NextDataRow());
                rawler.Add(item);
            }
            RawlerLib.Timer.StopWatch.Write("CreateRawlerTree MargeStart");

            rawler.MargeChildren();
            RawlerLib.Timer.StopWatch.Write("CreateRawlerTree End");

            return(rawler);
        }
Esempio n. 3
0
 public static RawlerBase Page(this RawlerBase rawler, string url)
 {
     return(rawler.Add(new Page()
     {
         Url = url
     }));
 }
Esempio n. 4
0
 public static RawlerBase GetTesvValue(this RawlerBase rawler, string column)
 {
     return(rawler.Add(new GetTsvValue()
     {
         ColumnName = column
     }));
 }
Esempio n. 5
0
        public void Load(RawlerBase rawler)
        {
            var file = FileName.Convert(rawler);

            if (string.IsNullOrEmpty(file) == false)
            {
                if (System.IO.File.Exists(file) == true)
                {
                    LTSV = System.IO.File.ReadAllText(file);
                }
            }
            var ltsv = LTSV.Convert(rawler);

            if (string.IsNullOrEmpty(ltsv) == false)
            {
                foreach (var item in ltsv.ReadLines())
                {
                    TaskParameter tp = new TaskParameter();
                    foreach (var dic in  item.ParseLtsvLine())
                    {
                        tp.Add(new KeyValue(dic.Key, dic.Value));
                    }
                    if (tp.Count > 0)
                    {
                        this.Add(tp);
                    }
                }
            }
            //ConvertにConvertを適用する。
            foreach (var item in this.SelectMany(n => n))
            {
                item.Value = item.Value.Convert(rawler);
            }
        }
Esempio n. 6
0
        static void Main(string[] args)
        {
            Rawler.Tool.ReportManage.ErrReportEvent += ReportManage_ErrReportEvent;
            Rawler.Tool.ReportManage.ReportEvnet    += ReportManage_ReportEvnet;

            //      args = new List<string>() { @"C:\Users\kiichi\Documents\TwitterData\hamano_satoshi\tweet.xaml" }.ToArray();
            if (args.Length > 0)
            {
                try
                {
                    Dictionary <string, string> dic = new Dictionary <string, string>();
                    foreach (var item in args.Skip(1))
                    {
                        var d = item.Split('=');
                        if (d.Length > 1)
                        {
                            dic[d[0]] = d[1];
                            Rawler.Tool.TempVar.SetVar(d[0], d[1]);
                        }
                    }
                    RawlerBase rawler = (RawlerBase)System.Xaml.XamlServices.Load(args[0]);
                    rawler.SetParent();
                    rawler.Run();
                }
                catch (Exception e)
                {
                    System.Console.WriteLine(e.ToString());
                }
            }
        }
Esempio n. 7
0
 public static RawlerBase Tags(this RawlerBase rawler, string tag)
 {
     return(rawler.Add(new Tags()
     {
         Tag = tag
     }));
 }
Esempio n. 8
0
        //public new void Run()
        //{
        //    Run(true);
        //}

        /// <summary>
        /// 実行
        /// </summary>
        /// <param name="runChildren"></param>
        public override void Run(bool runChildren)
        {
            string containsText = this.ContainsText.Convert(this);
            string text         = this.GetText();

            if (ContainsTextTree != null)
            {
                containsText = RawlerBase.GetText(this.Parent.Text, ContainsTextTree, this);
            }
            if (TextTree != null)
            {
                text = RawlerBase.GetText(this.Parent.Text, TextTree, this);
            }

            if (text.Contains(containsText) == this.Result)
            {
                this.RunChildren(runChildren);
            }

            //if (ContainsTextTree == null)
            //{
            //    if (this.GetText().Contains(this.ContainsText) == this.Result)
            //    {
            //        this.RunChildren(runChildren);
            //    }
            //}
            //else
            //{
            //    string t = RawlerBase.GetText(this.Parent.Text, ContainsTextTree, this);
            //    if (this.GetText().Contains(t) == this.Result)
            //    {
            //        this.RunChildren(runChildren);
            //    }
            //}
        }
Esempio n. 9
0
 public static RawlerBase DataWrite(this RawlerBase rawler, string attribute, DataAttributeType attributeType)
 {
     return(rawler.Add(new DataWrite()
     {
         Attribute = attribute, AttributeType = attributeType
     }));
 }
Esempio n. 10
0
 public static RawlerBase TagClear(this RawlerBase rawler, ReplaceType replece)
 {
     return(rawler.Add(new TagClear()
     {
         ReplaceType = replece
     }));
 }
Esempio n. 11
0
 protected override void CloneEvent(RawlerBase rawler)
 {
     base.CloneEvent(rawler);
     if (rawler is CustomText)
     {
         var r = (CustomText)rawler;
         r.CreateText = this.CreateText;
     }
 }
Esempio n. 12
0
 protected override void CloneEvent(RawlerBase rawler)
 {
     base.CloneEvent(rawler);
     if (rawler is CustomFilter)
     {
         var r = (CustomFilter)rawler;
         r.Check = this.Check;
     }
 }
Esempio n. 13
0
 public void addDocument(string text, RawlerBase rawlerTree)
 {
     var d = new Document();
     d.SetText(text);
     d.SetParent(this);
     var tree = rawlerTree.Clone();
     d.AddChildren(tree);
     documents.Add(d);
 }
Esempio n. 14
0
 protected string GetSwitchValue()
 {
     if (SwitchValueTree != null)
     {
         SwitchValueTree.SetParent();
         return(RawlerBase.GetText(this.Parent.Text, SwitchValueTree, this.Parent));
     }
     else
     {
         return(GetText());
     }
 }
Esempio n. 15
0
 /// <summary>
 /// クローンを作る
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     var clone = new ReLogin();
     RawlerLib.ObjectLib.FildCopy(this, clone);
     clone.SetParent(parent);
     this.CloneEvent(clone);
     clone.children.Clear();
     foreach (var item in this.Children)
     {
         var child = item.Clone(clone);
         clone.AddChildren(child);
     }
     return clone;
 }
Esempio n. 16
0
        public override void Run(bool runChildren)
        {
            if (runChildren)
            {
                SetText(GetText());
                if (TaskLTSV.NullIsEmpty().Length > 0)
                {
                    TaskParameterList.LTSV = TaskLTSV.Convert(this);
                }
                if (TaskLTSVFile.NullIsEmpty().Length > 0)
                {
                    TaskParameterList.FileName = TaskLTSVFile.Convert(this);
                }
                TaskParameterList.Load(this);
                List <Task> list = new List <Task>();

                KeyValueStore key = new KeyValueStore();
                foreach (var item in this.Children)
                {
                    key.AddChildren(item);
                }
                var    xaml = key.ToXAML();
                string err;
                if (TaskParameterList.Count > 0)
                {
                    foreach (var item in TaskParameterList)
                    {
                        var r = (KeyValueStore)RawlerBase.Parse(xaml, out err);
                        foreach (var keyvalue in item)
                        {
                            r.SetKeyValue(keyvalue.Key, keyvalue.Value);
                        }
                        r.SetParent(this.Parent);
                        list.Add(Task.Factory.StartNew(() => r.Run()));
                    }
                }
                else
                {
                    for (int i = 0; i < TaskNum; i++)
                    {
                        var r = (KeyValueStore)RawlerBase.Parse(xaml, out err);
                        r.SetParent(this.Parent);
                        list.Add(Task.Factory.StartNew(() => r.Run()));
                    }
                }
                Task.WaitAll(list.ToArray());
            }
        }
Esempio n. 17
0
 public void Dispose()
 {
     pause = false;
     if (rawler != null)
     {
         rawler.Dispose();
         rawler = null;
     }
     if (tokenSource != null)
     {
         tokenSource.Cancel();
         tokenSource.Dispose();
     }
     ReportManage.ListClear();
     Application.Current.Shutdown();
 }
Esempio n. 18
0
        /// <summary>
        /// クローンを作る。
        /// </summary>
        /// <param name="parent"></param>
        /// <returns></returns>
        public override RawlerBase Clone(RawlerBase parent)
        {
            return base.Clone<GetScriptVariable>(parent);
            //var clone = new GetScriptVariable();
            //RawlerLib.ObjectLib.FildCopy(this, clone);

            //clone.SetParent(parent);
            //this.CloneEvent(clone);
            //clone.children.Clear();
            //foreach (var item in this.Children)
            //{
            //    var child = item.Clone(clone);
            //    clone.AddChildren(child);
            //}
            //return clone;
        }
Esempio n. 19
0
 public void Dispose()
 {
     pause = false;
     if (rawler != null)
     {
         rawler.Dispose();
         rawler = null;
     }
     if (tokenSource != null)
     {
         tokenSource.Cancel();
         tokenSource.Dispose();
     }
     ReportManage.ListClear();
     Application.Current.Shutdown();
 }
Esempio n. 20
0
        /// <summary>
        /// このクラスでの実行すること。
        /// </summary>
        /// <param name="runChildren"></param>
        public override void Run(bool runChildren)
        {
            if (Text2Tree == null)
            {
                ReportManage.ErrReport(this, "Text2Treeが空です。実行にはText2Treeが必要です。");
                return;
            }
            Text2Tree.SetParent(this);
            string t = RawlerBase.GetText(this.parent.Text, Text2Tree, this);

            var result = Compute(GetText(), t);

            if (result <= maxDistance)
            {
                SetText(result.ToString());

                base.Run(runChildren);
            }
        }
Esempio n. 21
0
        /// <summary>
        /// データをゲットする。
        /// </summary>
        /// <returns></returns>
        public ICollection <Data> GetData()
        {
            List <Data>        list        = new List <Data>();
            Queue <RawlerBase> queueRawler = new Queue <RawlerBase>();

            queueRawler.Enqueue(Rawler);
            while (queueRawler.Count > 0)
            {
                RawlerBase tmp = queueRawler.Dequeue();
                if (tmp is Data)
                {
                    list.Add(tmp as Data);
                }
                foreach (var item in tmp.Children)
                {
                    queueRawler.Enqueue(item);
                }
            }
            return(list);
        }
Esempio n. 22
0
        /// <summary>
        /// クローンを作る。
        /// </summary>
        /// <param name="parent"></param>
        /// <returns></returns>
        public override RawlerBase Clone(RawlerBase parent)
        {
            var clone = new Parallel();
            RawlerLib.ObjectLib.FildCopy(this, clone);
            clone.SetParent(parent);
            this.CloneEvent(clone);
            clone.children.Clear();

            clone.DocumentsClear();
            foreach (var item in documents)
            {
                var child = (Document)item.Clone(clone);
                clone.documents.Add(child);
            }
            foreach (var item in this.Children)
            {
                var child = item.Clone(clone);
                clone.AddChildren(child);
            }
            return clone;
        }
Esempio n. 23
0
        public string Rawler2XAML(RawlerBase rawler)
        {
            StringBuilder xaml = new StringBuilder(System.Xaml.XamlServices.Save(rawler));

            xaml = xaml.Replace("\"{x:Null}\"", "Null").Replace(" Enable=\"True\"", "").Replace(" Comment=\"\"", "");

            System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(@"\w*=Null");
            List <string> list = new List <string>();

            foreach (System.Text.RegularExpressions.Match item in regex.Matches(xaml.ToString()))
            {
                list.Add(item.Value);
            }

            foreach (var item in list.Distinct())
            {
                xaml = xaml.Replace(" " + item, string.Empty);
            }


            return(xaml.ToString());
        }
Esempio n. 24
0
        /// <summary>
        /// このクラスでの実行すること。
        /// </summary>
        /// <param name="runChildren"></param>
        public override void Run(bool runChildren)
        {
            var  login = this.GetAncestorRawler().OfType <TwitterLogin>().First();
            bool flag  = true;
            Dictionary <string, object> dic = new Dictionary <string, object>();
            string track;

            if (TrackTree != null)
            {
                TrackTree.SetParent(this);
                track = RawlerBase.GetText(GetText(), TrackTree, this);
            }
            else
            {
                track = Track;
            }
            dic.Add("track", track);
            var stream = login.Token.Streaming.StartObservableStream(StreamingType.Filter, new StreamingParameters(dic)).Publish();

            stream.OfType <StatusMessage>()
            .Subscribe(x => {
                Document d = new Document()
                {
                    TextValue = Codeplex.Data.DynamicJson.Serialize(x.Status)
                };
                d.SetParent(this);
                foreach (var item in this.Children)
                {
                    d.AddChildren(item);
                }
                d.Run();
            });
            stream.OfType <WarningMessage>().Subscribe(x => ReportManage.ErrReport(this, x.Message));

            ////  stream.OfType<EventMessage>()
            //      .Subscribe(x => Console.WriteLine("{0} by @{1}", x.Event, x.Source.ScreenName));
            disposable = stream.Connect();
        }
Esempio n. 25
0
        public void Write(RawlerBase rawler, string text, bool viewTotal)
        {
            if (DoRun)
            {
                if (viewParent)
                {
                    text = this.Parent.ToObjectString() + " " + text;
                }

                var t = sw.ElapsedMilliseconds + "\t" + text;
                total += sw.ElapsedMilliseconds;
                if (viewTotal)
                {
                    ReportManage.Report(rawler, t,true,true);
                    ReportManage.Report(rawler,"Total:"+ total, true, true);
                }
                else
                {
                    ReportManage.Report(rawler, t,true,true);
                }
                sw.Restart();
            }
        }
Esempio n. 26
0
 public void SetUp(RawlerBase rawler)
 {
     if (string.IsNullOrEmpty(this.ConsumerKey) || string.IsNullOrEmpty(this.ConsumerSecret))
     {
         if( KeyValueStore.ContainsKey(rawler, "ConsumerKey", "ConsumerSecret"))
         {
             this.ConsumerKey = KeyValueStore.GetValueByKey(rawler, "ConsumerKey");
             this.ConsumerSecret = KeyValueStore.GetValueByKey(rawler, "ConsumerSecret");
             ReportManage.Report(rawler, "KeyValueStoreからのAPI Keyを使います", true, true);
         }
         else if (string.IsNullOrEmpty(SetTwitterApiKeys.consumerKey) || string.IsNullOrEmpty(SetTwitterApiKeys.consumerSecret))
         {
             this.ConsumerKey = "gHVupgapEXlTZdu7rf3oOg";
             this.ConsumerSecret = "YOicLtW8utx3NJyy88wtzq8QN3ilXeQoEGCPIJNzo";
             ReportManage.Report(rawler, "RawlerのAPI Keyを使います", true, true);
         }
         else
         {
             this.ConsumerKey = SetTwitterApiKeys.consumerKey;
             this.ConsumerSecret = SetTwitterApiKeys.consumerSecret;
         }
     }
 }
Esempio n. 27
0
 /// <summary>
 /// このクラスでの実行すること。
 /// </summary>
 /// <param name="runChildren"></param>
 public override void Run(bool runChildren)
 {
     if (string.IsNullOrEmpty(Format))
     {
         ReportManage.ErrReport(this, "Formatが空です");
     }
     else if (Args == null)
     {
         ReportManage.ErrReport(this, "Argsが空です");
     }
     else
     {
         List <string> list = new List <string>();
         foreach (var item in Args)
         {
             item.SetParent(this.Parent);
             item.SetParent();
             list.Add(RawlerBase.GetText(GetText(), item, this.Parent));
         }
         SetText(string.Format(Format, list.ToArray()));
     }
     base.Run(runChildren);
 }
Esempio n. 28
0
 public void SetUp(RawlerBase rawler)
 {
     if (string.IsNullOrEmpty(this.ConsumerKey) || string.IsNullOrEmpty(this.ConsumerSecret))
     {
         if (KeyValueStore.ContainsKey(rawler, "ConsumerKey", "ConsumerSecret"))
         {
             this.ConsumerKey    = KeyValueStore.GetValueByKey(rawler, "ConsumerKey");
             this.ConsumerSecret = KeyValueStore.GetValueByKey(rawler, "ConsumerSecret");
             ReportManage.Report(rawler, "KeyValueStoreからのAPI Keyを使います", true, true);
         }
         else if (string.IsNullOrEmpty(SetTwitterApiKeys.consumerKey) || string.IsNullOrEmpty(SetTwitterApiKeys.consumerSecret))
         {
             this.ConsumerKey    = "gHVupgapEXlTZdu7rf3oOg";
             this.ConsumerSecret = "YOicLtW8utx3NJyy88wtzq8QN3ilXeQoEGCPIJNzo";
             ReportManage.Report(rawler, "RawlerのAPI Keyを使います", true, true);
         }
         else
         {
             this.ConsumerKey    = SetTwitterApiKeys.consumerKey;
             this.ConsumerSecret = SetTwitterApiKeys.consumerSecret;
         }
     }
 }
Esempio n. 29
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<GetWordsInSouce>(parent);
 }
Esempio n. 30
0
 /// <summary>
 /// クローンを作る
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<LoginClient>(parent);
 }
Esempio n. 31
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<DataGrouping>(parent);
 }
Esempio n. 32
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return(base.Clone <Keyphrase>(parent));
 }
Esempio n. 33
0
 /// <summary>
 /// クローンを作る
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<ImageLinks>(parent);
 }
Esempio n. 34
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<GetFileExtension>(parent);
 }
Esempio n. 35
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return(base.Clone <LevenshteinDistance>(parent));
 }
Esempio n. 36
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<UrlEncode>(parent);
 }
Esempio n. 37
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return(base.Clone <TwitterStreamingFilter>(parent));
 }
Esempio n. 38
0
 public static RawlerBase GetPageUrl(this RawlerBase rawler)
 {
     return(rawler.Add(new GetPageUrl()));
 }
Esempio n. 39
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return(base.Clone <StringFormat>(parent));
 }
Esempio n. 40
0
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<NextDataRow>(parent);
 }
Esempio n. 41
0
        public RawlerBase CreateRawler(HtmlNode node)
        {
            var        baseUrl = BaseUrl;
            RawlerBase rawler  = null;

            bool flag次のノードを調べる = true;

            if (targetTag.Contains(node.Name))
            {
                Tags tags = new Tags()
                {
                    Tag = node.Name
                };
                if (node.Attributes.Where(n => n.Name == "class").Any())
                {
                    tags.ClassName = node.Attributes.Where(n => n.Name == "class").First().Value;
                }
                if (node.Attributes.Where(n => n.Name == "id").Any())
                {
                    tags.IdName = node.Attributes.Where(n => n.Name == "id").First().Value;
                }
                if (node.ChildNodes.Count() == 1 && node.ChildNodes.Where(n => n.Name == "#text").Any())
                {
                    tags.AddChildren(new DataWrite()
                    {
                        Attribute = tags.ClassName
                    });
                    flag次のノードを調べる = false;
                }
                if (node.Attributes.Where(n => n.Name == "style" && n.Value.Contains("background")).Any())
                {
                    tags.TagVisbleType = TagVisbleType.Outer;
                    rawler             = tags.Add(new ImageLinks()
                    {
                        ImageType = ImageType.BackgroundImage
                    }).DataWrite(node.GetClassName() + "_Image", DataAttributeType.Image).GetRoot();
                }

                rawler = tags;
            }
            else if (node.Name == "a")
            {
                var resultUrlTFIDF = urlTfidf.GetResult(node.OuterHtml.ToHtml(baseUrl).GetLink().Select(n => n.Url));
                var url            = node.OuterHtml.ToHtml(baseUrl).GetLink().FirstDefault <RawlerLib.Web.Link, string>(n => n.Url, null);
                if (url != null)
                {
                    //IDF が0以下の時、すべてのドキュメントで存在する。
                    if (urlTfidf.IDFDic.GetValueOrDefault(url) != null && urlTfidf.IDFDic.GetValueOrDefault(url).Value <= 0)
                    {
                        rawler        = null;
                        flag次のノードを調べる = false;
                    }
                    else
                    {
                        if (resultUrlTFIDF.GetTakeTopValue(n => n.TFIDF).Where(n => n.Word == url).Any())
                        {
                            rawler = new Links()
                            {
                                VisbleType = LinkVisbleType.Tag
                            }.AddRange(
                                new Links()
                            {
                                VisbleType = LinkVisbleType.Url
                            }.DataWrite(node.GetClassName() + "_MainLink", DataAttributeType.Url).GetRoot());
                            if (node.ChildNodes.Count == 1 && node.ChildNodes.First().Name == "#text")
                            {
                                rawler.Add(new Links()
                                {
                                    VisbleType = LinkVisbleType.Label
                                }.DataWrite(node.GetClassName() + "_MainLabel").GetRoot());
                            }
                        }
                        else
                        {
                            rawler = new Links()
                            {
                                VisbleType = LinkVisbleType.Tag
                            }.AddRange(
                                new Links()
                            {
                                VisbleType = LinkVisbleType.Url, Enable = enableGetSubUrlLink
                            }.DataWrite(node.GetClassName() + "_SubLink").GetRoot(),
                                new Links()
                            {
                                VisbleType = LinkVisbleType.Label
                            }.DataWrite(node.GetClassName() + "_SubLabel").GetRoot()
                                );
                        }
                    }
                }
                else
                {
                    //URLがないAタグの場合。
                    Tags tags = new Tags()
                    {
                        Tag = node.Name
                    };
                    if (node.Attributes.Where(n => n.Name == "class").Any())
                    {
                        tags.ClassName = node.Attributes.Where(n => n.Name == "class").First().Value;
                    }
                    if (node.Attributes.Where(n => n.Name == "id").Any())
                    {
                        tags.IdName = node.Attributes.Where(n => n.Name == "id").First().Value;
                    }
                    rawler = tags;
                }
                if (node.ChildNodes.Count == 1 && node.ChildNodes.Where(n => n.Name == "#text").Any())
                {
                    flag次のノードを調べる = false;
                }
            }
            else if (node.Name == "img")
            {
                var resultImgeTFIDF = imageTfidf.GetResult(node.OuterHtml.ToHtml(baseUrl).GetImageLink().Select(n => n.Url));

                var url = node.OuterHtml.ToHtml(baseUrl).GetImageLink().FirstDefault(n => n.Url, null);
                if (url != null)
                {
                    if (imageTfidf.IDFDic.Count > 0 && imageTfidf.IDFDic.GetValueOrDefault(url).Value <= 0)
                    {
                        rawler        = null;
                        flag次のノードを調べる = false;
                    }
                    else
                    {
                        if (resultImgeTFIDF.GetTakeTopValue(n => n.TFIDF).Where(n => n.Word == url).Any())
                        {
                            rawler = new ImageLinks().DataWrite(node.GetClassName() + "_Image", DataAttributeType.Image).GetRoot();
                        }
                        else
                        {
                            if (iconImageColumn)
                            {
                                rawler = new DataWrite()
                                {
                                    AttributeTree = new ImageLinks()
                                    {
                                        VisbleType = LinkVisbleType.Label
                                    }, Value = "1"
                                };
                            }
                            else
                            {
                                rawler = new ImageLinks().DataWrite(node.GetClassName() + "_Icon", DataAttributeType.Image).GetRoot();
                            }
                        }
                    }
                }
            }
            ///背景画像に反応させる。
            else if (node.Attributes.Where(n => n.Name == "style" && n.Value.Contains("background")).Any())
            {
                rawler = new ImageLinks()
                {
                    ImageType = ImageType.BackgroundImage
                }.DataWrite(node.GetClassName() + "_Image", DataAttributeType.Image).GetRoot();
            }
            else if (node.Name == "span")
            {
                Tags tags = new Tags()
                {
                    Tag = node.Name
                };
                if (node.Attributes.Where(n => n.Name == "class").Any())
                {
                    tags.ClassName = node.Attributes.Where(n => n.Name == "class").First().Value;
                }
                if (node.ChildNodes.Count() == 1 && node.ChildNodes.Where(n => n.Name == "#text").Any())
                {
                    tags.AddChildren(new DataWrite()
                    {
                        Attribute = tags.ClassName
                    });
                    flag次のノードを調べる = false;
                }

                rawler = tags;
            }
            else if (node.Name == "#comment")
            {
                flag次のノードを調べる = false;
            }
            else
            {
                var t = node.OuterHtml.Replace("\n", "").Trim();
                if (t.Length > 0)
                {
                    rawler = new TagClear().Trim().Add(new DataWrite()
                    {
                        Attribute = node.GetClassName() + "_" + node.Name
                    }).GetRoot();
                    if (node.ChildNodes.Count == 1 && node.ChildNodes.Where(n => n.Name == "#text").Any())
                    {
                        flag次のノードを調べる = false;
                    }
                }
            }
            if (rawler != null && node.ChildNodes.Count == 1 && node.ChildNodes.Where(n => n.Name == "span").Any())
            {
                rawler.AddChildren(new DataWrite()
                {
                    Attribute = node.GetClassName()
                });
            }

            foreach (var item in node.ChildNodes)
            {
                if (flag次のノードを調べる)
                {
                    var r = CreateRawler(item);

                    if (r != null && rawler != null)
                    {
                        rawler.AddChildren(r);
                    }
                    else
                    {
                        if (r != null && rawler == null)
                        {
                            rawler = r;
                        }
                    }
                }
            }
            return(rawler);
        }
Esempio n. 42
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<MecabPreprocessing>(parent);
 }
Esempio n. 43
0
 /// <summary>
 /// クローンを作る。
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<GetPageHtml>(parent);
 }
Esempio n. 44
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<AddAllInputParameter>(parent);
 }
Esempio n. 45
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<ChangeText>(parent);
 }
Esempio n. 46
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<IteratorSourceClear>(parent);
 }
Esempio n. 47
0
 /// <summary>
 /// クローンを作る。
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return(base.Clone <Contains>(parent));
 }
Esempio n. 48
0
 public static RawlerBase ImageLinks(this RawlerBase rawler)
 {
     return(rawler.Add(new ImageLinks()));
 }
Esempio n. 49
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<GetCurrentIterator>(parent);
 }
Esempio n. 50
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<RawlerClass1>(parent);
 }
Esempio n. 51
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<XPathSelectNodes>(parent);
 }
Esempio n. 52
0
 /// <summary>
 /// クローンを作る。
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<RemoveHtmlComment>(parent);
 }
Esempio n. 53
0
 public static RawlerBase Trim(this RawlerBase rawler)
 {
     return(rawler.Add(new Trim()));
 }
Esempio n. 54
0
        public string Rawler2XAML(RawlerBase rawler)
        {
            StringBuilder xaml = new StringBuilder(System.Xaml.XamlServices.Save(rawler));
            xaml = xaml.Replace("\"{x:Null}\"", "Null").Replace(" Enable=\"True\"","").Replace(" Comment=\"\"","");

            System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(@"\w*=Null");
            List<string> list = new List<string>();
            foreach (System.Text.RegularExpressions.Match item in regex.Matches(xaml.ToString()))
            {
                list.Add(item.Value);
            }

            foreach (var item in list.Distinct())
            {
                xaml = xaml.Replace(" " + item, string.Empty);
            }

            return xaml.ToString();
        }
Esempio n. 55
0
        private void button1_Click(object sender, RoutedEventArgs e)
        {
            if (isBusy)
            {
                MessageBox.Show("実行中です");
                return;
            }
            tokenSource = new CancellationTokenSource();
            object obj = null;
            string xaml = textEditor.Text;
            var insertParameter = new Tool.InsertParameterWindow();
            if (insertParameter.Analyze(textEditor.Text) == true)
            {
                xaml = insertParameter.Xaml;
            }
            else
            {
                return;
            }

            try
            {
                obj = System.Xaml.XamlServices.Parse(xaml);
            }
            catch (Exception ex)
            {
                ReportManage.ErrReport(new RawlerBase(), "XAMLの形式がおかしいです" + ex.Message);

            }
            if (obj == null)
            {
                return;
            }
            if ((obj is Rawler.Tool.RawlerBase)==false)
            {
                ReportManage.ErrReport(new RawlerBase(), "キャストできませんでした。XAMLの形式がおかしいです");
            }
            try
            {
                rawler = (obj as Rawler.Tool.RawlerBase);
                ReportManage.RowCount = 0;
                rawler.SetParent();
                startDate = DateTime.Now;
                foreach (var item in rawler.GetConectAllRawler())
                {
                    item.BeginRunEvent += (o, arg) =>
                    {
                        tokenSource.Token.ThrowIfCancellationRequested();
                        while (pause)
                        {
                            System.Threading.Thread.Sleep(1000);
                        }
                    };
                }
                isBusy = true;
                Task.Factory.StartNew(() => rawler.Run(), tokenSource.Token).ContinueWith((t) => { StopWatch(); isBusy = false; });
            }
            catch (OperationCanceledException oce)
            {
                ReportManage.ErrReport(new RawlerBase(), "キャンセルされました");
                MessageBox.Show("キャンセルされました");
            }
            catch (Exception ex)
            {
                ReportManage.ErrReport(new RawlerBase(), ex.Message);
            }
        }
Esempio n. 56
0
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<PageOnce>(parent);
 }
Esempio n. 57
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return(base.Clone <CaseDateTime>(parent));
 }
Esempio n. 58
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<DataWriteTweet>(parent);
 }
Esempio n. 59
0
 public static RawlerBase NextPage(this RawlerBase rawler)
 {
     return(rawler.Add(new NextPage()));
 }
Esempio n. 60
0
 /// <summary>
 /// Clone
 /// </summary>
 /// <param name="parent"></param>
 /// <returns></returns>
 public override RawlerBase Clone(RawlerBase parent)
 {
     return base.Clone<LevenshteinDistance>(parent);
 }