static void Main(string[] args) { for (int i = 0; i < 20; i++) { NodeList nodeList = new NodeList(20, 80, 15, 100, 100); AlgorithmFunction.AlgorithmPreparation(nodeList, 15); ////质心算法 //AlgorithmFunction.CenterOfMass_algorithm(nodeList, 1); //DataExport.DataExportToExcel(nodeList, @"d:/COM.xls"); ////DV-Hop算法 List<Node> generalNodeList = nodeList.GetAllGeneralNode(); foreach (GeneralNode gn in generalNodeList) { gn.EstimatedX = gn.EstimatedY = 0d; gn.IsLocatable = gn.IsAlreadyLocated = false; } AlgorithmFunction.DV_Hop_algorithm(nodeList); DataExport.DataExportToExcel(nodeList, @"d:/DV-Hop.xls"); //Revised DV-Hop算法 foreach (GeneralNode gn in generalNodeList) { gn.EstimatedX = gn.EstimatedY = 0d; gn.IsLocatable = gn.IsAlreadyLocated = false; } AlgorithmFunction.Revised_DV_Hop_algorithm(nodeList, 5); DataExport.DataExportToExcel(nodeList, @"d:/Revised-DV-Hop.xls"); } Console.WriteLine("==========Done=========="); Console.ReadKey(); }
public Expression(IEnumerable<Node> value) { if(value is NodeList) Value = value as NodeList; else Value = new NodeList(value); }
/// <summary> /// Parse node contents add return a fresh node. /// </summary> /// <param name="parent">Node that this is a subnode to. Can be null</param> /// <param name="prototypes">A list with node types</param> /// <param name="line">Line to parse</param> /// <param name="offset">Where to start the parsing. Will be set to where the next node should start parsing</param> /// <returns>A node corresponding to the bla bla; null if parsing failed.</returns> /// <exception cref="CodeGeneratorException"></exception> public override Node Parse(NodeList prototypes, Node parent, LineInfo line, ref int offset) { if (offset > line.Data.Length - 1) throw new CodeGeneratorException(line.LineNumber, line.Data, "Tried to parse after end of line"); if (line.Data[offset] != '_') throw new CodeGeneratorException(line.LineNumber, line.Data, "Not a PartialNode"); // From the first " sign (offset + 2) find the next " sign int pos = -1; for (int i = offset + 2; i < line.Data.Length; ++i) { if (line.Data[i] == '\"') { pos = i; break; } } if (pos == -1) throw new CodeGeneratorException(line.LineNumber, line.Data, "PartialNode does not contain an end paranthesis."); // Cut out the data between the two above found " signs and then start processing the address // The address is converted from the format /example/example/ to \\example\\example.haml PartialNode node = (PartialNode)prototypes.CreateNode("_", parent); node._target = line.Data.Substring(offset + 2, pos - offset - 2); if (node._target[node._target.Length - 1] == '/') node._target = node._target.Substring(0, node._target.Length - 1); if (node._target[0] == '/') node._target = node._target.Substring(1); node._target = node._target.Replace("/", "\\\\"); node._target += ".haml"; offset = pos + 1; return node; }
/// <summary> /// Parse node contents add return a fresh node. /// </summary> /// <param name="prototypes">List containing all node types</param> /// <param name="parent">Node that this is a subnode to. Can be null</param> /// <param name="line">Line to parse</param> /// <param name="offset">Where to start the parsing. Should be set to where the next node should start parsing.</param> /// <returns>A node corresponding to the bla bla; null if parsing failed.</returns> /// <exception cref="Exceptions.CodeGeneratorException"></exception> public override Node Parse(NodeList prototypes, Node parent, LineInfo line, ref int offset) { offset = line.Data.Length; return new DocTypeTag( @"<!DOCTYPE html PUBLIC ""-//W3C//DTD XHTML 1.0 Strict//EN"" ""http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"">".Replace("\"", "\"\""), parent); }
public Selector(IEnumerable<Element> elements) { if (elements is NodeList<Element>) Elements = elements as NodeList<Element>; else Elements = new NodeList<Element>(elements); }
// // The `primary` rule is the *entry* and *exit* point of the parser. // The rules here can appear at any level of the parse tree. // // The recursive nature of the grammar is an interplay between the `block` // rule, which represents `{ ... }`, the `ruleset` rule, and this `primary` rule, // as represented by this simplified grammar: // // primary → (ruleset | rule)+ // ruleset → selector+ block // block → '{' primary '}' // // Only at one point is the primary rule not called from the // block rule: at the root level. // public NodeList Primary(Parser parser) { Node node; var root = new NodeList(); NodeList comments = null; GatherComments(parser); while (node = MixinDefinition(parser) || Rule(parser) || PullComments() || Ruleset(parser) || MixinCall(parser) || Directive(parser)) { if (comments = PullComments()) { root.AddRange(comments); } comments = node as NodeList; if (comments) { foreach (Comment c in comments) { c.IsPreSelectorComment = true; } root.AddRange(comments); } else root.Add(node); GatherComments(parser); } return root; }
public CartoSelector(IEnumerable<Element> elements, Env env) : base(elements) { m_filters = new CartoFilterSet(); m_zooms = new NodeList<CartoZoomElement>(); m_elements = new NodeList<CartoElement>(); m_conditions = 0; if (env == null) env = new Env(); // TODO foreach (Element elem in elements) { if (elem is CartoFilterElement) { m_filters.Add(elem as CartoFilterElement, env); m_conditions++; } else if (elem is CartoZoomElement) { m_zooms.Add(elem as CartoZoomElement); m_conditions++; } else if (elem is CartoAttachmentElement) m_attachment = (elem as CartoAttachmentElement).Value; else m_elements.Add((CartoElement)elem); } }
public void Build() { NodeList<BuildAction> roots = new NodeList<BuildAction>(); NodeList<BuildAction> allgames = new NodeList<BuildAction>(); foreach (var replay in m_replays) { foreach (var player in replay.Players) { var actions = replay.Actions.Where(x => x.Player == player && x.ActionType == Entities.ActionType.Build) .OrderBy(y => y.Sequence) .Cast<BuildAction>(); if (actions.Count() > 0) { BuildAction action = actions.ElementAt(0); Node<BuildAction> node = new Node<BuildAction>(1, action, buildTree(actions)); allgames.Add(node); if (roots.Where(x => x.Value.ObjectType == action.ObjectType).Count() == 0) { roots.Add(node); } } } } countOccurances(roots, allgames); m_roots = roots; m_allGames = allgames; }
public override Node Evaluate(Env env) { foreach (var frame in env.Frames) { NodeList mixins; if ((mixins = frame.Find(Selector, null)).Count == 0) continue; var rules = new NodeList(); foreach (var node in mixins) { if(!(node is Ruleset)) continue; var ruleset = node as Ruleset; if(!ruleset.MatchArguements(Arguments, env)) continue; if (node is Mixin.Definition) { var mixin = node as Mixin.Definition; rules.AddRange(mixin.Evaluate(Arguments, env).Rules); } else { if (ruleset.Rules != null) rules.AddRange(ruleset.Rules); } // todo fix for other Ruleset types? } return rules; } throw new ParsingException(Selector.ToCSS().Trim() + " is undefined"); }
public LoopNode(string initExpression, string iterExpression, string testExpression, NodeList nodes) { m_initExpression = initExpression; m_iterExpression = iterExpression; m_testExpression = testExpression; m_nodes.AddRange(nodes); }
/// <summary> /// Initializes order by clause. /// </summary> internal OrderByClause(NodeList<OrderByClauseItem> orderByClauseItem, Node skipExpr, Node limitExpr, uint methodCallCount) { _orderByClauseItem = orderByClauseItem; _skipExpr = skipExpr; _limitExpr = limitExpr; _methodCallCount = methodCallCount; }
public NodeList<ScEvent> ProcessGames(List<ScGame> games) { NodeList<ScEvent> roots = new NodeList<ScEvent>(); NodeList<ScEvent> allgames = new NodeList<ScEvent>(); foreach (ScGame game in games) { Node<ScEvent> node = new Node<ScEvent>(1, game.Events[0], buildTree(0, game, games)); allgames.Add(node); long counter = 0; foreach (Node<ScEvent> root in roots) { if (root.Value.Unit == node.Value.Unit) { counter++; foreach (Node<ScEvent> n in node.Neighbors) { List<Node<ScEvent>> q = root.Neighbors.Where(e => e.Value.Unit == n.Value.Unit).ToList(); if (q.Count == 0) root.Neighbors.Add(n); } } } if (roots.Count == 0 || counter == 0) roots.Add(node); } CountOccurances(roots, allgames); return roots; }
/// <summary> /// Constructs a new node. /// </summary> internal Node(String name, NodeType type = NodeType.Element, NodeFlags flags = NodeFlags.None) { _name = name ?? String.Empty; _type = type; _children = new NodeList(); _flags = flags; }
public override PropertyTreeMetaObject StartStep(PropertyTreeMetaObject target, PropertyTreeNavigator self, NodeList children) { Predicate<PropertyTreeNavigator> predicate = ImplicitDirective(target, "source"); var node = children.FindAndRemove(predicate).FirstOrDefault(); if (node != null) { IServiceProvider serviceProvider = Parent.GetBasicServices(node); var uriContext = node as IUriContext; TargetSourceDirective ss; ss = this.DirectiveFactory.CreateTargetSource(node, uriContext); if (ss != null) { try { target = target.BindStreamingSource(ss, serviceProvider); } catch (Exception ex) { if (ex.IsCriticalException()) throw; Parent.errors.FailedToLoadFromSource(ss.Uri, ex, node.FileLocation); } } } return target; }
public void Cluster(int k, NodeList<BuildAction> observations) { // Use random observations as centroids //List<Centroid> centroids = initialCentroidRandom(k, observations); List<Centroid> centroids = initialCentroidReasonable(observations); // OBS! Ignores k foreach (Centroid c in centroids) observations.Remove(c.Value); assignToCentroid(observations, centroids); // TODO: Check if stability has occured instead // Im tired, no moar coffee.... for (int i = 0; i < 3; i++) { centroids = iterate(centroids); assignToCentroid(observations, centroids); } foreach (var c in centroids) { var err = c.Observations.Where(x => x.Value.ObjectType != c.Value.Value.ObjectType); System.Console.WriteLine("Error count: " + err.Count()); } m_clusters = centroids; }
public Selector(NodeList<Element> elements) { Elements = elements; if (Elements[0].Combinator.Value == "") Elements[0].Combinator.Value = " "; }
/// <summary> /// 算法准备阶段 /// </summary> /// <param name="nodeList"></param> /// <param name="count"></param> public static void AlgorithmPreparation(NodeList nodeList, int count) { //取得节点的邻居节点 nodeList.GetNeighbourNode(nodeList); //得到网络节点的跳数 nodeList.GetNodeAllHop(nodeList, count); }
/// <summary> /// 质心算法 /// </summary> /// <param name="nodeList"></param> /// <param name="j">质心算法中取j跳范围内的信标节点帮助计算</param> public static void CenterOfMass_algorithm(NodeList nodeList, int j) { List<Node> generalNodeList = nodeList.GetAllGeneralNode(); foreach (GeneralNode gn in generalNodeList) { //循环,进行定位 List<Node> AssistLocateNodeList = new List<Node>(); //将信标节点加入到协助定位的节点列表中 foreach (int nodeId in gn.HopCountTable.Keys) { if (nodeList.GetNodeById(nodeId).IsBeaconNode && gn.HopCountTable[nodeId] <= j) { AssistLocateNodeList.Add(nodeList.GetNodeById(nodeId)); } } if (AssistLocateNodeList.Count >= 1) { double sumEstimatedX = 0d; double sumEstimatedY = 0d; foreach (BeaconNode bn in AssistLocateNodeList) { sumEstimatedX += bn.RealX; sumEstimatedY += bn.RealY; } gn.IsLocatable = true; gn.IsAlreadyLocated = true; gn.EstimatedX = sumEstimatedX / AssistLocateNodeList.Count; gn.EstimatedY = sumEstimatedY / AssistLocateNodeList.Count; } } }
/// <summary> /// Initializes method ast node. /// </summary> internal MethodExpr( Node expr, DistinctKind distinctKind, NodeList<Node> args) : this(expr, distinctKind, args, null) { }
public AStar(int width, int height) { w = width; h = height; //MAKE THE ARRAY OF COORDINATES SO THAT WE SEARCH THEM IN THE RIGHT ORDER coords = new System.Collections.Generic.List<Vector2>(); coords.Add(new Vector2(0,-1)); // UP coords.Add(new Vector2(1,0)); // RIGHT coords.Add(new Vector2(0,1)); // DOWN coords.Add(new Vector2(-1,0)); // LEFT if(allowDiagonals) { coords.Add(new Vector2(-1,-1)); // UP-LEFT coords.Add(new Vector2(1,-1)); // UP-RIGHT coords.Add(new Vector2(1,1)); // DOWN-RIGHT coords.Add(new Vector2(-1,1)); // DOWNLEFT } relCurrent = new Vector2(); relLast = new Vector2(); startNode = new GridNode(); endNode = new GridNode(); open = new NodeList(w*h); closed = new NodeList(w*h); createGrid(w,h); r = new RandomSeed(THE_SEED); }
protected Ruleset(NodeList<Selector> selectors, NodeList rules, Ruleset originalRuleset) : this() { Selectors = selectors; Rules = rules; OriginalRuleset = originalRuleset ?? this; }
public ResourceNode(Game game, NodeList subNode, String name, ResourceNodeType type = ResourceNodeType.MESH) { m_game = game; m_subNodes = subNode; m_name = name; m_type = type; }
private NodeList SiteList; //List containing sites for this module #endregion Fields #region Constructors /// <summary> /// Create a new module /// </summary> public Module() { //Initialize list containers SiteList = new NodeList(); ImportList = new NodeList(); FunctionDefinitionList = new NodeList(); }
public override PropertyTreeMetaObject StartStep(PropertyTreeMetaObject target, PropertyTreeNavigator self, NodeList children) { foreach (var child in children.Rest()) { string msg; if (target.ComponentType.IsHiddenUX()) { msg = SR.BinderMissingPropertyNoType(child.QualifiedName); } else { msg = SR.BinderMissingProperty(child.QualifiedName, target.ComponentType); } try { var info = new InterfaceUsageInfo(InterfaceUsage.Missing, msg, null, true); Parent.Callback.OnPropertyAnnotation(child.QualifiedName.ToString(), info); } catch (Exception ex) { if (ex.IsCriticalException()) throw; throw PropertyTreesFailure.UnmatchedMembersGenericError(ex, child.FileLocation); } } return target; }
public override PropertyTreeMetaObject StartStep( PropertyTreeMetaObject target, PropertyTreeNavigator self, NodeList children) { if (!(target is UntypedToTypedMetaObject)) return target; if (!children.Any()) return target; try { // TODO Only supports one child (lame spec) var rootType = target.Root.ComponentType; var types = children.Select(t => ConvertToType(t, rootType)).ToArray(); target = target.BindGenericParameters(types); } catch (Exception ex) { if (ex.IsCriticalException()) throw; Parent.errors.CouldNotBindGenericParameters(target.ComponentType, ex, self.FileLocation); } Parent.Bind(target, children.First(), null); children.Clear(); return target; }
/// <summary> /// Initializes function definition using the name, the optional argument definitions and the body expression. /// </summary> internal FunctionDefinition(Identifier name, NodeList<PropDefinition> argDefList, Node body, int startPosition, int endPosition) { _name = name; _paramDefList = argDefList; _body = body; _startPosition = startPosition; _endPosition = endPosition; }
public ClusterNode(NodeHeader header) { this.Header = header; nodeList = new NodeList(this); Console.WriteLine(header); Console.WriteLine(); }
public void Load_Nodes_From_Xml_File() { var file = "Nodes.xml"; Assert.IsTrue(File.Exists(file)); var list = new NodeList(file); Assert.IsNotNull(list); Assert.Greater(list.Count, 0); }
public LazyHtmlCollection(NodeList nodes, Func<Element, bool> matcher = null) : base(new List<Element>()) { nodes.HtmlCollections.Add(new WeakReference<LazyHtmlCollection>(this)); Nodes = nodes; Matcher = matcher; }
public static List<Node> PossibleNode = new List<Node>(); // Les noeuds possibles (cases adjacentes de tout le chemin) #endregion Fields #region Methods public static MyLinkedList<Tile> CalculatePathWithAStar(Map map, Tile startTile, Tile endTile) { PossibleNode.Clear(); NodeList<Node> openList = new NodeList<Node>(); // Contiens tout les noeuds candidat (qui vont être examinés) NodeList<Node> closedList = new NodeList<Node>(); // Contiens la liste des meilleurs noeuds (le resultat du plus cours chemin) List<Node> possibleNodes; // cases adjacentes du noeud courant // Le noeud de départ Node startNode = new Node(startTile, null, endTile); // FIXME : on recupère le noeud de départ /**********************************/ /* Traitement des noeuds candidat */ /**********************************/ openList.Add(startNode); while (openList.Count > 0) // Tant que la liste ouverte contient des éléments { Node current = openList[0]; openList.RemoveAt(0); closedList.Add(current); if (current.Tile == endTile) // si l'élément courant est la case destination { MyLinkedList<Tile> solution = new MyLinkedList<Tile>(); // on reverse la liste fermée et on la retourne pour l'avoir dans le bonne ordre while (current.Parent != null) { solution.AddFirst(current.Tile); current = current.Parent; } return solution; } possibleNodes = current.GetPossibleNode(map, endTile); // FIXME : recupère la listes des cases adjacentes // on ajoute cette liste a notre variable static qui contient l'ensemble des listes adjacentes (gestion de l'affichage) PossibleNode.AddRange(possibleNodes) ; /***************************************/ /* Ajout des noeuds adjacents candidat */ /***************************************/ for (int i = 0; i < possibleNodes.Count; i++) // on vérifie que chaque noeuds adjacent (possibleNodes) { if (!closedList.Contains(possibleNodes[i])) // n'existe pas dans la liste fermée (eviter la redondance) { if (openList.Contains(possibleNodes[i])) // FIXME : Si il existe dans la liste ouverte on vérifie { if (possibleNodes[i].EstimatedMovement < openList[possibleNodes[i]].EstimatedMovement) // si le cout de deplacement du // noeud est inferieur a un coût calculer précedement, dance cas la on remonte le chemin dans la liste ouverte openList[possibleNodes[i]].Parent = current; } else openList.DichotomicInsertion(possibleNodes[i]); } } } return null; }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "huifont"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.AsString(); temp = temp.Substring(temp.IndexOf("/") + 1, temp.Length - temp.IndexOf("/") - 1); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl("http://www.hebggzy.cn/022/022001/022001002/" + i + ".html", Encoding.UTF8); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "right-text-li"))); if (listNode != null && listNode.Count > 0) { for (int j = 0; j < listNode.Count; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty; INode node = listNode[j]; ATag aTag = node.GetATag(); prjName = aTag.GetAttribute("title"); string temp = prjName.GetNotChina(); if (!string.IsNullOrWhiteSpace(temp) && temp.Length > 1) { code = prjName.Substring(0, prjName.IndexOf(temp.Substring(0, 2))); } if (!string.IsNullOrWhiteSpace(code)) { prjName = prjName.GetReplace(code); } beginDate = node.ToPlainTextString().GetDateRegex(); InfoUrl = "http://www.hebggzy.cn" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "article-main"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); bidCtx = HtmlTxt.GetReplace(new string[] { "<br/>", "<br />", "<br>" }, "\r\n").GetReplace("\r\n\r\n,\r\n\r\n,\r\n\r\n,\r\n\r\n,\r\n\r\n,\r\n\r\n,\r\n\r\n", "\r\n").ToCtxString(); prjAddress = bidCtx.GetAddressRegex(); buildUnit = bidCtx.GetBuildRegex(); bidUnit = bidCtx.GetBidRegex().GetReplace("A"); bidMoney = bidCtx.GetMoneyRegex(); prjMgr = bidCtx.GetMgrRegex(); if (string.IsNullOrWhiteSpace(code)) { code = bidCtx.GetCodeRegex().GetCodeDel(); } if (buildUnit.Contains("公司")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司"; } if (buildUnit.Contains("地址")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址")) + "地址"; } msgType = "河北省公共资源交易中心"; specType = "政府采购"; bidType = "建设工程"; BidInfo info = ToolDb.GenBidInfo("河北省", "河北省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k] as ATag; if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://www.hebggzy.cn/" + a.Link.GetReplace("../,./"); } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
/// <summary> /// Constructs a new instance of XmlNode /// </summary> /// <param name="name">The name of the node</param> public XmlNode(string name) { this.name = name; this.attributes = new AttributeDictionary(); this.childNodes = new NodeList(); }
protected override IList ExecuteCrawl(bool crawlAll) { //取得页码 int pageInt = 1, sqlCount = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "divPage"))); if (pageList != null && pageList.Count > 0) { try { string temp = pageList.AsString(); pageInt = Convert.ToInt32(temp.GetRegexBegEnd("共", "页")); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "&pageindex=" + i.ToString(), Encoding.UTF8); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "list")), true), new TagNameFilter("li"))); if (nodeList != null && nodeList.Count > 0) { for (int j = 0; j < nodeList.Count; j++) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; infoType = "通知公告"; releaseTime = nodeList[j].ToPlainTextString().GetDateRegex(); headName = nodeList[j].GetATag().LinkText; //try //{ // headName = headName.Substring(3, headName.Length - 3).Replace(".",""); //} //catch { headName = nodeList[j].ToNodePlainString().Replace(releaseTime, ""); } infoUrl = "http://www.szzszx.com.cn" + nodeList[j].GetATagHref(); string htldtl = string.Empty; try { htldtl = ToolHtml.GetHtmlByUrl(infoUrl, Encoding.UTF8).GetJsString(); } catch { } parser = new Parser(new Lexer(htldtl)); NodeList noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "content"))); if (noList != null && noList.Count > 0) { ctxHtml = noList[0].ToHtml().Replace("<br/>", "\r\n").Replace("<BR/>", ""); infoCtx = ctxHtml.ToCtxString().Replace(" ", "").Replace(" ", "").Replace("\t\t", "\t").Replace("\t\t", "\t"); infoCtx = Regex.Replace(infoCtx, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase).Replace(" ", "").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n"); msgType = MsgTypeCosnt.ShenZhenZSWMsgType; infoScorce = infoScorce.Replace(" ", ""); NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "深圳市工程", string.Empty, infoCtx, infoType); sqlCount++; if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } ToolDb.SaveEntity(info, this.ExistCompareFields, ExistsUpdate); //if (ToolDb.SaveEntity(info, this.ExistCompareFields,ExistsUpdate)) //{ // #region 抓取附件 // parser = new Parser(new Lexer(ctxHtml)); // NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img")); // if (imgList != null && imgList.Count > 0) // { // for (int m = 0; m < imgList.Count; m++) // { // try // { // ImageTag img = imgList[m] as ImageTag; // string src = img.GetAttribute("src"); // if (src.ToLower().Contains(".gif")) // continue; // BaseAttach obj = null; // if (src.Contains("http")) // { // obj = ToolHtml.GetBaseAttach(src, headName, info.Id); // } // else // { // obj = ToolHtml.GetBaseAttach("http://www.szzszx.com.cn" + src.Replace("../", "/").Replace("./", "/"), headName, info.Id); // } // if (obj != null) // ToolDb.SaveEntity(obj, string.Empty); // } // catch { } // } // } // parser = new Parser(new Lexer(ctxHtml)); // NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); // if (aNode != null && aNode.Count > 0) // { // for (int a = 0; a < aNode.Count; a++) // { // ATag aTag = aNode[a] as ATag; // string s = aTag.Link; // if (aTag.IsAtagAttach()) // { // try // { // BaseAttach obj = null; // string href = aTag.GetATagHref(); // if (href.Contains("http")) // { // obj = ToolHtml.GetBaseAttach(href, aTag.LinkText, info.Id); // } // else // { // obj = ToolHtml.GetBaseAttach("http://www.szzszx.com.cn"+href.Replace("../","/").Replace("./","/"), aTag.LinkText, info.Id); // } // if (obj != null) // ToolDb.SaveEntity(obj, string.Empty); // } // catch { continue; } // } // } // } // #endregion //} } } } } return(null); }
public NavGraphNode GetNode(double x, double y) { return(NodeList.FirstOrDefault(n => n.Position.X == x && n.Position.Y == y)); }
public int NumberOfActiveNodes() { return(NodeList.Count(x => x.Index >= 0)); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <InviteInfo>(); int pageInt = 1; string html = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pages"))); if (tdNodes != null && tdNodes.Count > 0) { string pageTemp = tdNodes[0].ToPlainTextString().Trim(); string paTe = "kd" + pageTemp; pageTemp = paTe.GetRegexBegEnd("kd", "条"); try { pageInt = int.Parse(pageTemp); } catch (Exception ex) { } try { if (pageInt % 20 > 0) { pageInt = (pageInt / 20) + 1; } else { pageInt = pageInt / 20; } } catch (Exception ex) { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + i + ".html", Encoding.Default); } catch { continue; }; } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "list")), true), new TagNameFilter("li"))); if (listNode != null && listNode.Count > 0) { for (int j = 0; j < listNode.Count; j++) { INode node = listNode[j]; ATag aTag = node.GetATag(); if (aTag == null) { continue; } string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; prjName = aTag.LinkText; beginDate = listNode[j].ToPlainTextString().GetDateRegex(); InfoUrl = aTag.Link; string dtlBeginDate = string.Empty; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "pd0 par1"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); inviteCtx = HtmlTxt.Replace("</p>", "\r\n").ToCtxString(); buildUnit = inviteCtx.GetBuildRegex(); prjAddress = inviteCtx.GetAddressRegex(); try { Parser parses = new Parser(new Lexer(htmldtl)); NodeList codel = parses.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("p"), new HasAttributeFilter("id", "name"))); if (codel != null && codel.Count > 0) { code = codel[0].ToPlainTextString().Trim(); } } catch {} if (string.IsNullOrWhiteSpace(code)) { code = inviteCtx.GetCodeRegex().GetChina(); } if (code.Contains(")")) { code = code.GetReplace(")", ""); } inviteType = prjName.GetInviteBidType(); //inviteType = ToolHtml.GetInviteTypes(prjName); InviteInfo info = ToolDb.GenInviteInfo("广东省", "韶关市区", string.Empty, string.Empty, code, prjName, prjAddress, buildUnit, beginDate, string.Empty, inviteCtx, string.Empty, "韶关市住房和城乡建设局", inviteType, "建设工程", specType, InfoUrl, HtmlTxt); list.Add(info); } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "0h120")), true), new TagNameFilter("a"))); if (sNode != null && sNode.Count > 0) { try { string temp = sNode[sNode.Count - 2].ToNodePlainString(); pageInt = Convert.ToInt32(temp.GetReplace("[,]")); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "0h120"))); if (viewList != null && viewList.Count > 0) { for (int j = 0; j < viewList.Count; j++) { TableTag table = viewList[j] as TableTag; string prjName = string.Empty, InfoUrl = string.Empty, beginDate = string.Empty, HtmlTxt = string.Empty; ATag aTag = viewList[j].GetATag(); if (aTag == null) { continue; } string tempName = aTag.LinkText.Trim().GetReplace(" "); beginDate = table.ToNodePlainString().GetDateRegex(); InfoUrl = "http://baihua.huidong.gov.cn/" + aTag.Link; string htlDtl = string.Empty; try { htlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htlDtl)); NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "fontzoom"))); if (dtl != null && dtl.Count > 0) { HtmlTxt = dtl.AsHtml(); if (tempName.Contains("中标") || tempName.Contains("成交") || tempName.Contains("结果")) { string buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty; bidCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString(); prjName = bidCtx.GetRegex("工程名称,项目名称,工程监理名称"); if (string.IsNullOrEmpty(prjName)) { prjName = tempName; } code = bidCtx.GetCodeRegex().GetCodeDel(); buildUnit = bidCtx.GetBuildRegex(); if (buildUnit.Contains("招标代理")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理")); } if (buildUnit.Contains("公司")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司"; } bidUnit = bidCtx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegex("中标候选公司,中标候选人"); } bidMoney = bidCtx.GetMoneyRegex(); try { if (decimal.Parse(bidMoney) > 100000) { bidMoney = (decimal.Parse(bidMoney) / 10000).ToString(); } } catch { } msgType = "惠东县白花镇人民政府"; specType = "政府采购"; bidType = prjName.GetInviteBidType(); BidInfo info = ToolDb.GenBidInfo("广东省", "惠州市区", "惠东县", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://baihua.huidong.gov.cn/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } else { string code = string.Empty, buildUnit = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty; inviteCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString(); prjName = inviteCtx.GetRegex("工程名称,项目名称,工程监理名称"); if (string.IsNullOrEmpty(prjName)) { prjName = tempName; } inviteType = prjName.GetInviteBidType(); code = inviteCtx.GetCodeRegex().GetCodeDel(); buildUnit = inviteCtx.GetBuildRegex(); prjAddress = inviteCtx.GetAddressRegex(); if (buildUnit.Contains("招标代理")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理")); } if (buildUnit.Contains("公司")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("公司")) + "公司"; } msgType = "惠东县白花镇人民政府"; specType = "政府采购"; InviteInfo info = ToolDb.GenInviteInfo("广东省", "惠州市区", "惠东县", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://baihua.huidong.gov.cn/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } } return(list); }
public void A_node_list() { NodeList = new NodeList <T>(); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <NoticeInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "td"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode[0].ToNodePlainString().GetRegexBegEnd("录共", "页"); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "biddingSupplements")), true), new TagNameFilter("table"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { TableRow tr = table.Rows[j]; string InfoTitle = string.Empty, InfoType = string.Empty, PublistTime = string.Empty, InfoCtx = string.Empty, InfoUrl = string.Empty, prjCode = string.Empty, buildUnit = string.Empty, htmlTxt = string.Empty, area = string.Empty; InfoType = "补充通知"; prjCode = tr.Columns[1].ToNodePlainString(); ATag aTag = tr.Columns[3].GetATag(); InfoTitle = aTag.LinkText; if (InfoTitle.Contains("..")) { InfoTitle = aTag.GetAttribute("title"); } PublistTime = tr.Columns[4].ToPlainTextString().GetDateRegex(); InfoUrl = "http://www.fzztb.com" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "content"))); if (dtlNode != null && dtlNode.Count > 0) { htmlTxt = dtlNode.AsHtml().GetJsString(); InfoCtx = htmlTxt.ToCtxString(); buildUnit = InfoCtx.GetBuildRegex(); NoticeInfo info = ToolDb.GenNoticeInfo("福建省", "福建省及地市", "福州市", string.Empty, InfoTitle, InfoType, InfoCtx, PublistTime, string.Empty, "福州市城乡建设委员会", InfoUrl, prjCode, buildUnit, string.Empty, string.Empty, "建设工程", string.Empty, htmlTxt); parser = new Parser(new Lexer(htmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://www.fzztb.com" + a.Link; } if (Encoding.Default.GetByteCount(link) > 500) { continue; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "lblPageCount"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode[0].ToNodePlainString(); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__LASTFOCUS", "__VIEWSTATE", "__VIEWSTATEGENERATOR", "__EVENTVALIDATION", "gcbh_Text_Box", "gcmc_TextBox", "num_TextBox", "ImageButton3.x", "ImageButton3.y" }, new string[] { "", "", "", viewState, "B0108473", eventValidation, "", "", "", "5", "12" }); try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "DataGrid1"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { TableRow tr = table.Rows[j]; ATag aTag = tr.Columns[2].GetATag(); if (aTag == null) { continue; } string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; code = tr.Columns[1].ToNodePlainString(); prjName = aTag.LinkText.GetReplace(" "); beginDate = tr.Columns[3].ToPlainTextString().GetDateRegex(); InfoUrl = "http://www.bcactc.com/home/gcxx/" + aTag.Link; string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "hei_text"))); if (dtlNode != null && dtlNode.Count > 0) { TableTag dtlTable = dtlNode[0] as TableTag; HtmlTxt = dtlTable.ToHtml(); for (int r = 0; r < dtlTable.RowCount; r++) { for (int c = 0; c < dtlTable.Rows[r].ColumnCount; c++) { string temp = dtlTable.Rows[r].Columns[c].ToHtml().GetReplace("<br>,<br/>", "\r\n").ToCtxString(); if (!temp.Contains("\r\n")) { temp = dtlTable.Rows[r].Columns[c].ToNodePlainString(); } if (!IsTable(dtlTable.Rows[r].ToHtml())) { if ((c + 1) % 2 == 0) { bidCtx += temp + "\r\n"; } else { bidCtx += temp.GetReplace(":,:") + ":"; } } else { bidCtx += GetTableBid(dtlTable.Rows[r].ToHtml()); } } } bidCtx = bidCtx.GetReplace(":\r\n", ":"); if (code.Contains("..")) { code = bidCtx.GetCodeRegex(); } buildUnit = bidCtx.GetBuildRegex(); if (string.IsNullOrEmpty(buildUnit)) { buildUnit = bidCtx.GetRegex("建设单位名称"); } bidUnit = bidCtx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegex("中标侯选人"); } bidMoney = bidCtx.GetMoneyRegex(); prjMgr = bidCtx.GetMgrRegex(); msgType = "北京市建设工程发包承包交易中心"; specType = "建设工程"; bidType = "施工"; BidInfo info = ToolDb.GenBidInfo("北京市", "北京市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k] as ATag; if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://www.bcactc.com/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
public Selector Selector(NodeList <Element> elements, NodeLocation location) { // TODO throw new System.NotImplementedException(); }
public MixinDefinition MixinDefinition(string name, NodeList <dotless.Core.Parser.Tree.Rule> parameters, NodeList rules, Condition condition, bool variadic, NodeLocation location) { return(new MixinDefinition(name, parameters, rules, condition, variadic) { Location = location }); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); //取得页码 int pageInt = 1; string html = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellspacing", "5"))); if (tdNodes != null && tdNodes.Count > 0) { NodeList aNodes = new NodeList(); tdNodes[0].CollectInto(aNodes, new TagNameFilter("a")); if (aNodes != null && aNodes.Count > 0) { for (int i = 0; i < aNodes.Count; i++) { ATag aTag = aNodes[i] as ATag; if (aTag.ToPlainTextString().Contains("尾页")) { Regex re = new Regex(@"[^0-9]+"); pageInt = int.Parse(re.Replace(aTag.Link, "")); break; } } } } parser.Reset(); for (int i = 1; i <= pageInt; i++) { try { html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode("http://218.20.201.20/www/zbmsg/2008/xzb_list.asp?page=" + i.ToString() + "&id=13828"), Encoding.Default); } catch (Exception ex) { continue; } parser = new Parser(new Lexer(html)); tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasParentFilter(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "news_list")), true))); if (tdNodes != null && tdNodes.Count > 0) { for (int j = 0; j < tdNodes.Count; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, buildScale = string.Empty, buildCycle = string.Empty, levels = string.Empty, structType = string.Empty, bidMoney = string.Empty, buildType = string.Empty, buildQual = string.Empty, InfoUrl = string.Empty, beginDate = string.Empty, bidType = string.Empty, HtmlTxt = string.Empty; decimal decMoney = 0; StringBuilder ctx = new StringBuilder(); ATag aTag = tdNodes[j] as ATag; if (aTag.Link.Contains("xzb_show.asp")) { InfoUrl = "http://218.20.201.20/www/zbmsg/2008/" + aTag.Link.Remove(aTag.Link.IndexOf("&")); Regex regexHtml = new Regex(@"<div[^>]*>[\s]*</div>"); string dlHtml = string.Empty; try { dlHtml = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.Default).ToLower().Replace(" ", ""); } catch (Exception ex) { continue; } string filterHtml = dlHtml.Replace("\n", "").Replace("\r", "").Replace("<u>", "<a>").Replace("</u>", "</a>"); prjName = aTag.ToPlainTextString(); //内容 Parser ctxParser = new Parser(new Lexer(dlHtml)); NodeList ctxNodes = ctxParser.ExtractAllNodesThatMatch(new HasParentFilter(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "news_show")), false)); ctx.Append(ctxNodes.AsString().Replace(" ", "")); HtmlTxt = ctxNodes.AsHtml(); Parser dlParser = new Parser(new Lexer(regexHtml.Replace(filterHtml, ""))); NodeList dlNodes = dlParser.ExtractAllNodesThatMatch(new HasParentFilter(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "news_show")), false)); //搜索附件 NodeList findFiles = dlNodes.ExtractAllNodesThatMatch(new TagNameFilter("a"), true); NodeList fileNode = new NodeList(); if (findFiles != null && findFiles.Count > 0) { for (int f = 0; f < findFiles.Count; f++) { ATag fileA = findFiles[f] as ATag; if (fileA.Link.Contains("uploadfile")) { fileNode.Add(fileA); } } } INode nods = dlNodes[0].Parent.Parent.Parent.Parent; //发布日期 if (nods != null) { TableTag tb = nods as TableTag; for (int t = 0; t < tb.RowCount; t++) { TableRow tr = tb.Rows[t]; if (tr.ToPlainTextString().Contains("发布日期")) { beginDate = tr.ToPlainTextString().Substring(tr.ToPlainTextString().IndexOf("[") + 1, tr.ToPlainTextString().IndexOf("]") - tr.ToPlainTextString().IndexOf("[") - 1); break; } } } for (int k = 0; k < dlNodes.Count; k++) { if (dlNodes[k] is ITag) { //对a标签进行过滤 Regex strReplace = new Regex(@"<a[^>]*>|</a>"); if (dlNodes[k].ToPlainTextString().Contains("中标候选人为:") || dlNodes[k].ToPlainTextString().Contains("中标人为:")) { NodeList bidUnitNode = new NodeList(); dlNodes[k].CollectInto(bidUnitNode, new TagNameFilter("a")); if (bidUnitNode.Count > 0) { //找出匹配的项 Regex regexbidUnit = new Regex(@"<a[^>]*>[^<]*</a>"); MatchCollection matchbidUnit = null; if (dlNodes[k].ToPlainTextString().Contains("中标候选人为:")) { matchbidUnit = regexbidUnit.Matches(dlNodes[k].ToHtml().Substring(dlNodes[k].ToHtml().IndexOf("中标候选人为:"))); } else if (dlNodes[k].ToPlainTextString().Contains("中标人为:")) { matchbidUnit = regexbidUnit.Matches(dlNodes[k].ToHtml().Substring(dlNodes[k].ToHtml().IndexOf("中标人为:"))); } if (matchbidUnit != null && matchbidUnit.Count > 0) { bidUnit = strReplace.Replace(matchbidUnit[0].ToString(), ""); } if (string.IsNullOrEmpty(bidUnit)) { bidUnit = dlNodes[k + 1].ToPlainTextString().Trim(); } } else { bidUnit = dlNodes[k + 1].ToPlainTextString(); } } if (dlNodes[k].ToPlainTextString().Contains("中标价:") || dlNodes[k].ToPlainTextString().Contains("投标报价:") || dlNodes[k].ToPlainTextString().Contains("中标价:") || dlNodes[k].ToPlainTextString().Contains("中标价为")) { Regex regdecimal = new Regex(@"\d{1,}[\.]?\d{0,}"); NodeList moneyNode = new NodeList(); dlNodes[k].CollectInto(moneyNode, new TagNameFilter("a")); if (moneyNode.Count > 0) { Regex regexmoney = new Regex(@"<a[^>]*>[^<]*</a>"); MatchCollection matchmoney = null; if (dlNodes[k].ToPlainTextString().Contains("中标价:")) { matchmoney = regexmoney.Matches(dlNodes[k].ToHtml().Substring(dlNodes[k].ToHtml().IndexOf("中标价:"))); } if (dlNodes[k].ToPlainTextString().Contains("投标报价:")) { matchmoney = regexmoney.Matches(dlNodes[k].ToHtml().Substring(dlNodes[k].ToHtml().IndexOf("投标报价:"))); } if (matchmoney != null && matchmoney.Count > 0) { if (dlNodes[k].ToPlainTextString().Contains("万元")) { try { decMoney = decimal.Parse(regdecimal.Matches(dlNodes[k].ToPlainTextString())[0].ToString()); } catch (Exception ex) { } } else { try { decMoney = decimal.Parse(regdecimal.Matches(dlNodes[k].ToPlainTextString())[0].ToString()) / 10000; } catch (Exception ex) { } } } } else { if (dlNodes[k].ToPlainTextString().Contains("万元")) { decMoney = decimal.Parse(regdecimal.Matches(dlNodes[k].ToPlainTextString().ToString())[0].ToString()); } else { decMoney = decimal.Parse(regdecimal.Matches(dlNodes[k].ToPlainTextString().ToString())[0].ToString()) / 10000; } } } } } string regexstr = @"<[^>]*>"; string ctxStr = Regex.Replace(ctx.ToString(), regexstr, string.Empty, RegexOptions.IgnoreCase); bidUnit = bidUnit.Replace(" ", "").Trim(); Regex reg = new Regex(@"[\u4e00-\u9fa5]"); if (!reg.IsMatch(bidUnit)) { bidUnit = ""; } else { Regex regBidMoneys = new Regex(@"[0-9]+[.]{0,1}[0-9]+"); string t = regBidMoneys.Match(bidUnit).Value; if (!string.IsNullOrEmpty(t)) { bidUnit = ""; } } if (string.IsNullOrEmpty(bidUnit) || decMoney <= 0) { string txt = string.Empty; parser = new Parser(new Lexer(dlHtml)); NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasParentFilter(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "news_show")), true))); if (dtList != null && dtList.Count > 1) { for (int k = 0; k < dtList.Count; k++) { if (dtList[k].ToPlainTextString().Trim().Contains("中标候选人") || dtList[k].ToPlainTextString().Trim().Contains("中标人")) { try { if (string.IsNullOrEmpty(dtList[k + 1].ToPlainTextString().Trim())) { txt += dtList[k].ToPlainTextString().Trim(); string text = txt.Remove(txt.Length - txt.IndexOf("为:") - 2); if (string.IsNullOrEmpty(text)) { txt += dtList[k].ToPlainTextString().Trim(); txt += dtList[k + 2].ToPlainTextString().Trim() + "\r\n"; } else { txt += dtList[k].ToPlainTextString().Trim() + "\r\n"; } } else { txt += dtList[k].ToPlainTextString().Trim(); string text = txt.Remove(txt.Length - txt.IndexOf("为:") - 2); if (string.IsNullOrEmpty(text)) { txt += dtList[k].ToPlainTextString().Trim(); txt += dtList[k + 1].ToPlainTextString().Trim() + "\r\n"; } else { txt += dtList[k].ToPlainTextString().Trim() + "\r\n"; } } } catch { } } else { txt += dtList[k].ToPlainTextString().Trim() + "\r\n"; } } if (string.IsNullOrEmpty(bidUnit)) { Regex regBidUnit = new Regex(@"(中标单位|中标候选单位|中标候选人为|中标人为):[^\r\n]+\r\n"); bidUnit = regBidUnit.Match(txt.Replace("\r\n\r\n", "")).Value.Replace("中标候选人为", "").Replace("中标人为", "").Replace("中标单位:", "").Replace("中标候选单位:", "").Replace(":", "").Trim(); } if (decMoney <= 0) { Regex regBidMoneystr = new Regex(@"(金额|价格|报价|中标价)(:|:)[^\r\n]+\r\n"); string monerystr = regBidMoneystr.Match(txt).Value.Replace("中标价", "").Replace("金额", "").Replace("价格", "").Replace("报价", "").Replace(":", "").Replace(":", "").Replace(",", "").Replace(",", "").Trim(); Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+"); if (!string.IsNullOrEmpty(regBidMoney.Match(monerystr).Value)) { if (monerystr.Contains("万元") || monerystr.Contains("万美元")) { decMoney = decimal.Parse(regBidMoney.Match(monerystr).Value); } else { try { decMoney = decimal.Parse(regBidMoney.Match(monerystr).Value) / 10000; if (decMoney < decimal.Parse("0.1")) { decMoney = 0; } } catch (Exception) { decMoney = 0; } } } } } } if (string.IsNullOrEmpty(bidUnit) || decMoney <= 0) { string txt = string.Empty; parser = new Parser(new Lexer(dlHtml)); NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("p"), new HasParentFilter(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "news_show")), true))); if (dtList != null && dtList.Count > 1) { for (int k = 0; k < dtList.Count; k++) { if (dtList[k].ToPlainTextString().Trim().Contains("中标候选人") || dtList[k].ToPlainTextString().Trim().Contains("中标人")) { if (string.IsNullOrEmpty(dtList[k + 1].ToPlainTextString().Trim())) { k++; txt += dtList[k].ToPlainTextString().Trim(); } else { txt += dtList[k].ToPlainTextString().Trim(); string text = txt.Remove(txt.Length - txt.IndexOf("为:") - 2); if (string.IsNullOrEmpty(text)) { txt = ""; txt += dtList[k].ToPlainTextString().Trim(); } else { txt = ""; txt += dtList[k].ToPlainTextString().Trim() + "\r\n"; } } } else { txt += dtList[k].ToPlainTextString().Trim() + "\r\n"; } Regex regexsHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>"); txt = regexsHtml.Replace(txt, ""); } if (string.IsNullOrEmpty(bidUnit)) { Regex regBidUnit = new Regex(@"(中标单位|中标候选单位|中标候选人为|中标人为):[^\r\n]+\r\n"); bidUnit = regBidUnit.Match(txt.Replace("\r\n\r\n", "")).Value.Replace("中标候选人为", "").Replace("中标人为", "").Replace("中标单位:", "").Replace("中标候选单位:", "").Replace(":", "").Trim(); } if (string.IsNullOrEmpty(bidMoney)) { Regex regBidMoneystr = new Regex(@"(金额|价格|报价|中标价|中标价为)(:|:)[^\r\n]+\r\n"); string monerystr = regBidMoneystr.Match(txt).Value.Replace("中标价为", "").Replace("中标价", "").Replace("金额", "").Replace("价格", "").Replace("报价", "").Replace(":", "").Replace(":", "").Replace(",", "").Replace(",", "").Trim(); Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+"); if (!string.IsNullOrEmpty(regBidMoney.Match(monerystr).Value)) { if (monerystr.Contains("万元") || monerystr.Contains("万美元")) { decMoney = decimal.Parse(regBidMoney.Match(monerystr).Value); } else { try { decMoney = decimal.Parse(regBidMoney.Match(monerystr).Value) / 10000; if (decMoney < decimal.Parse("0.1")) { decMoney = 0; } } catch (Exception) { decMoney = 0; } } } } } } prjName = ToolDb.GetPrjName(prjName.Replace(" ", "")); bidType = ToolHtml.GetInviteTypes(prjName); BidInfo info = ToolDb.GenBidInfo("广东省", "广州市区", "番禺区", string.Empty, string.Empty, prjName, buildUnit, beginDate, bidUnit, beginDate, string.Empty, ctxStr, string.Empty, "广州市番禺区建设局", bidType, "建设工程", string.Empty, decMoney.ToString(), InfoUrl, string.Empty, HtmlTxt); list.Add(info); if (fileNode.Count > 0) { try { for (int f = 0; f < fileNode.Count; f++) { BaseAttach attach = ToolDb.GenBaseAttach((fileNode[0] as ATag).StringText, info.Id, "http://218.20.201.20" + (fileNode[0] as ATag).Link); base.AttachList.Add(attach); } } catch { } } dlParser.Reset(); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
/// <summary> /// Initializes case expression with else sub-expression. /// </summary> /// <param name="whenThenExpr"> whenThen expression list </param> /// <param name="elseExpr"> else expression </param> internal CaseExpr(NodeList <WhenThenExpr> whenThenExpr, Node elseExpr) { _whenThenExpr = whenThenExpr; _elseExpr = elseExpr; }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; string cookiestr = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default, ref cookiestr); } catch { } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "MoreInfoList1_Pager"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode[0].ToNodePlainString().GetRegexBegEnd("1/", "页"); pageInt = int.Parse(temp); } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); string __CSRFTOKEN = ToolHtml.GetHtmlInputValue(html, "__CSRFTOKEN"); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__CSRFTOKEN", "__EVENTTARGET", "__EVENTARGUMENT", "__LASTFOCUS", "__VIEWSTATE", "__VIEWSTATEGENERATOR", "__EVENTVALIDATION", "MoreInfoList1$txtProjectName", "MoreInfoList1$txtBiaoDuanName", "MoreInfoList1$txtBiaoDuanNo", "MoreInfoList1$txtJSDW", "MoreInfoList1$StartDate", "MoreInfoList1$EndDate", "MoreInfoList1$jpdDi", "MoreInfoList1$jpdXian" }, new string[] { __CSRFTOKEN, "MoreInfoList1$Pager", i.ToString(), "", viewState, "76D0A3AC", eventValidation, "", "", "", "", "", "", "-1", "-1" }); try { cookiestr = cookiestr.GetReplace("path=/; HttpOnly").Replace(",", ""); html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default, ref cookiestr); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty; TableRow tr = table.Rows[j]; ATag aTag = tr.Columns[1].GetATag(); prjName = aTag.GetAttribute("title").GetReplace(";"); area = prjName.GetReplace("[", "【").GetReplace("]", "】").GetRegexBegEnd("【", "】"); if (!string.IsNullOrEmpty(area)) { prjName = prjName.GetReplace("[" + area + "]"); } else { prjName = prjName.GetReplace("[,]"); } beginDate = tr.Columns[3].ToPlainTextString().GetDateRegex(); InfoUrl = "http://www.jszb.com.cn/jszb/YW_info/" + aTag.GetAttribute("onclick").Replace("(", "(").GetRegexBegEnd("(", ",").GetReplace("\",../,./"); string htmldtl = string.Empty; try { htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "Table1"))); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); TableTag tag = dtlNode[0] as TableTag; for (int r = 0; r < tag.RowCount; r++) { for (int c = 0; c < tag.Rows[r].ColumnCount; c++) { string temp = tag.Rows[r].Columns[c].ToNodePlainString(); if ((c + 1) % 2 == 0) { bidCtx += temp + "\r\n"; } else { bidCtx += temp.GetReplace(":,:") + ":"; } } } buildUnit = bidCtx.GetBuildRegex(); code = bidCtx.GetCodeRegex().GetCodeDel(); bidUnit = bidCtx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = bidCtx.GetRegex("第一中标候选单位为,第一名"); } bidMoney = bidCtx.GetMoneyRegex(); if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = bidCtx.GetMoneyRegex(null, true); } prjMgr = bidCtx.GetMgrRegex(); msgType = "江苏省建设工程招标投标办公室"; specType = "建设工程"; bidType = "建设工程"; BidInfo info = ToolDb.GenBidInfo("江苏省", "江苏省及地市", area, string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k] as ATag; if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://www.jszb.com.cn/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
/// <summary> /// Initializes case expression without else sub-expression. /// </summary> /// <param name="whenThenExpr"> whenThen expression list </param> internal CaseExpr(NodeList <WhenThenExpr> whenThenExpr) : this(whenThenExpr, null) { }
public bool IsPresent(int nodeIndex) { return(NodeList.Exists(x => x.Index == nodeIndex)); }
private NodeList GetWindowNodes(AccessibilityNodeInfo n, AccessibilityEvent e, Func <AccessibilityNodeInfo, bool> condition, bool disposeIfUnused, NodeList nodes = null, int recursionDepth = 0) { if (nodes == null) { nodes = new NodeList(); } var dispose = disposeIfUnused; if (n != null && recursionDepth < 50) { if (n.WindowId == e.WindowId && !(n.ViewIdResourceName?.StartsWith(SystemUiPackage) ?? false) && condition(n)) { dispose = false; nodes.Add(n); } for (var i = 0; i < n.ChildCount; i++) { var childNode = n.GetChild(i); if (i > 100) { global::Android.Util.Log.Info(BitwardenTag, "Too many child iterations."); break; } else if (childNode.GetHashCode() == n.GetHashCode()) { global::Android.Util.Log.Info(BitwardenTag, "Child node is the same as parent for some reason."); } else { GetWindowNodes(childNode, e, condition, true, nodes, recursionDepth++); } } } if (dispose) { n?.Dispose(); } return(nodes); }
public void AddNode(NavGraphNode node) { NodeList.Add(node); NextNodeIndex++; }
public GraphNode(T value, NodeList <T> neighbors) : base(value, neighbors) { }
public void Clear() { EdgeList.Clear(); NodeList.Clear(); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <MeetInfo>(); string html = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("bordercolor", "#222222"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int i = 0; i < table.RowCount; i++) { string meetTime = string.Empty, prjName = string.Empty, meetName = string.Empty, place = string.Empty, builUnit = string.Empty; TableRow tr = table.Rows[i]; if (tr.GetAttribute("valign") == "top") { string meetstr = string.Empty; string temp = tr.Columns[1].ToPlainTextString().Replace("(Y)", "").Replace("(Y)", "").Replace("(一年)", "").Replace("(一年)", ""); string code = temp.Replace("(", "(").Replace(")", ")").GetRegexBegEnd("(", ")"); if (!string.IsNullOrEmpty(code)) { meetstr = temp.Replace("(" + code + ")", "").Replace("(" + code + ")", "");//.Replace("(", "").Replace("(", "").Replace(")", "").Replace(")", ""); } else { meetstr = temp; } string[] str = meetstr.Split(' '); prjName = str[0]; if (str.Length > 1) { place = str[1]; } if (string.IsNullOrEmpty(builUnit) && str.Length > 2) { place = str[2]; } meetName = "开标会"; System.Text.RegularExpressions.Regex regDate = new System.Text.RegularExpressions.Regex(@"\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}"); meetTime = regDate.Match(meetstr).Value; if (string.IsNullOrEmpty(meetTime)) { meetTime = meetstr.GetDateRegex(); } MeetInfo info = ToolDb.GenMeetInfo("广东省", "深圳政府采购", string.Empty, string.Empty, prjName, place, meetName, meetTime, string.Empty, "深圳市政府采购南山分中心", SiteUrl, code, builUnit, string.Empty, string.Empty); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { IList <MeetInfo> result = list as IList <MeetInfo>; // 删除 string bDate = result.OrderBy(x => x.BeginDate).ToList()[0].BeginDate.ToString().GetDateRegex("yyyy/MM/dd"); string eDate = Convert.ToDateTime(result.OrderByDescending(x => x.BeginDate).ToList()[0].BeginDate).AddDays(1).ToString().GetDateRegex("yyyy/MM/dd"); string sqlwhere = " where City='深圳政府采购' and InfoSource='深圳市政府采购南山分中心' and BeginDate>='" + bDate + "' and BeginDate<='" + eDate + "'"; string delMeetSql = "delete from MeetInfo " + sqlwhere; int countMeet = ToolDb.ExecuteSql(delMeetSql); return(list); } } } } if (list != null && list.Count > 0) { IList <MeetInfo> result = list as IList <MeetInfo>; // 删除 string bDate = result.OrderBy(x => x.BeginDate).ToList()[0].BeginDate.ToString().GetDateRegex("yyyy/MM/dd"), eDate = Convert.ToDateTime(result.OrderByDescending(x => x.BeginDate).ToList()[0].BeginDate).AddDays(1).ToString().GetDateRegex("yyyy/MM/dd"); string sqlwhere = " where City='深圳政府采购' and InfoSource='深圳市政府采购南山分中心' and BeginDate>='" + bDate + "' and BeginDate<='" + eDate + "'"; string delMeetSql = "delete from MeetInfo " + sqlwhere; int countMeet = ToolDb.ExecuteSql(delMeetSql); } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { //取得页码 int pageInt = 1, sqlCount = 0; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default).GetJsString(); } catch (Exception ex) { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("height", "25")), true), new TagNameFilter("a"))); if (pageList != null && pageList.Count > 0) { try { string temp = pageList.GetATag(pageList.Count - 1).Link + "kdxx"; temp = temp.GetRegexBegEnd("page=", "kdxx").Replace("&", ""); pageInt = Convert.ToInt32(temp); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "&page=" + i.ToString(), Encoding.Default).GetJsString(); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "bszn_right_table")), true), new TagNameFilter("table"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; for (int j = 1; j < table.RowCount; j++) { string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty; TableRow tr = table.Rows[j]; infoType = "办事指南"; headName = tr.Columns[1].ToNodePlainString(); releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex(); infoUrl = "http://www.gzzb.gd.cn" + tr.Columns[1].GetATagHref(); string htldtl = string.Empty; try { htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htldtl)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "contentDiv"))); if (dtlList != null && dtlList.Count > 0) { ctxHtml = dtlList.AsHtml(); infoCtx = ctxHtml.ToCtxString(); msgType = MsgTypeCosnt.GuangZhouMsgType; NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "广州市区", string.Empty, infoCtx, infoType); sqlCount++; if (!crawlAll && sqlCount >= this.MaxCount) { return(null); } if (ToolDb.SaveEntity(info, this.ExistCompareFields)) { parser = new Parser(new Lexer(ctxHtml)); NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img")); if (imgList != null && imgList.Count > 0) { for (int img = 0; img < imgList.Count; img++) { ImageTag imgTag = imgList[img] as ImageTag; try { BaseAttach obj = ToolHtml.GetBaseAttach(imgTag.GetAttribute("src"), headName, info.Id); if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } catch { } } } parser = new Parser(new Lexer(ctxHtml)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int a = 0; a < aNode.Count; a++) { ATag aTag = aNode[a] as ATag; if (aTag.IsAtagAttach()) { try { BaseAttach obj = null; if (aTag.Link.Contains("http")) { obj = ToolHtml.GetBaseAttach(aTag.Link, aTag.LinkText, info.Id); } else { obj = ToolHtml.GetBaseAttach("http://www.gzzb.gd.cn" + aTag.Link, aTag.LinkText, info.Id); } if (obj != null) { ToolDb.SaveEntity(obj, string.Empty); } } catch { } } } } } } } } } return(null); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <BidInfo>(); string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int page = 1; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + MaxCount, Encoding.UTF8); } catch { return(list); } JavaScriptSerializer serializer = new JavaScriptSerializer(); serializer.MaxJsonLength = 50000000; Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html); object[] dicList = (object[])smsTypeJson["data"]; foreach (object obj in dicList) { Dictionary <string, object> dic = obj as Dictionary <string, object>; string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; prjName = Convert.ToString(dic["TITLE"]); beginDate = Convert.ToString(dic["CREATED_ON"]); InfoUrl = Convert.ToString(dic["URL"]); string htldtl = string.Empty; try { htldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString(); } catch { continue; } Parser parser = new Parser(new Lexer(htldtl)); NodeList dtlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("body")); if (dtlNode != null && dtlNode.Count > 0) { HtmlTxt = dtlNode.AsHtml(); bidCtx = HtmlTxt.ToCtxString().Replace("\r\n\r\n\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\t\r\n\t\r\n", "\r\n\t").Replace("\r\n\t\r\n\t\r\n", "\r\n\t").Replace("\r\n\t\r\n\t\r\n", "\r\n\t").Replace("\r\n\t\r\n\t\r\n", "\r\n\t"); bool isOk = true; bidCtx = System.Web.HttpUtility.HtmlDecode(bidCtx); while (isOk) { string str = bidCtx.GetRegexBegEnd("&#", ";"); if (!string.IsNullOrEmpty(str)) { bidCtx = bidCtx.Replace("&#" + str + ";", ""); } else { isOk = false; } } buildUnit = bidCtx.GetBuildRegex(); prjAddress = bidCtx.GetAddressRegex(); bidUnit = bidCtx.GetBidRegex(); bidMoney = bidCtx.GetMoneyRegex(); if (!string.IsNullOrEmpty(bidUnit) && bidMoney == "0") { bidMoney = bidCtx.GetMoneyRegex(null, true, "万元"); } string ctx = string.Empty; #region 多table匹配 if (string.IsNullOrEmpty(bidUnit)) { parser = new Parser(new Lexer(htldtl)); NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "holder")), true), new TagNameFilter("table"))); if (dtList != null && dtList.Count > 0) { for (int c = 0; c < dtList.Count; c++) { TableTag tab = dtList[c] as TableTag; if (IsTableBid(tab)) { for (int d = 0; d < tab.Rows[0].ColumnCount; d++) { try { ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":"; ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n"; } catch { } } break; } } if (string.IsNullOrEmpty(ctx)) { if (dtList.Count > 3) { TableTag tab = dtList[2] as TableTag; if (tab.RowCount > 1) { for (int d = 0; d < tab.Rows[0].ColumnCount; d++) { ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":"; ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n"; } } if (!ctx.Contains("投标供应商") || !ctx.Contains("成交供应商") || !ctx.Contains("中标供应商")) { ctx = string.Empty; tab = dtList[1] as TableTag; if (tab.RowCount > 1) { for (int d = 0; d < tab.Rows[0].ColumnCount; d++) { ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":"; ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n"; } } } } else if (dtList.Count > 2) { TableTag tab = dtList[1] as TableTag; if (tab.RowCount > 1) { for (int d = 0; d < tab.Rows[0].ColumnCount; d++) { ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":"; ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n"; } } } else { TableTag tab = dtList[0] as TableTag; if (tab.RowCount > 1) { for (int d = 0; d < tab.Rows[0].ColumnCount; d++) { string start = System.Web.HttpUtility.HtmlDecode(tab.Rows[0].Columns[d].ToNodePlainString()); string end = System.Web.HttpUtility.HtmlDecode(tab.Rows[1].Columns[d].ToNodePlainString()); ctx += start + ":"; ctx += end + "\r\n"; } } } } bidUnit = ctx.GetBidRegex(); bidMoney = ctx.GetMoneyRegex(new string[] { "成交金额" }); if (bidMoney == "" || bidMoney == "0") { bidMoney = ctx.GetMoneyRegex(); } if (!string.IsNullOrEmpty(bidUnit) && bidMoney == "0") { string dtlCtx = string.Empty, unit = string.Empty, money = string.Empty; TableTag tab = dtList[0] as TableTag; for (int c = 0; c < tab.RowCount; c++) { if ((c + 2) <= tab.RowCount) { if (tab.Rows[c].ToNodePlainString().Contains(bidUnit)) { for (int d = 0; d < tab.Rows[c].ColumnCount; d++) { dtlCtx += tab.Rows[0].Columns[d].ToNodePlainString() + ":"; dtlCtx += tab.Rows[c].Columns[d].ToNodePlainString() + "\r\n"; } break; } } } if (string.IsNullOrEmpty(dtlCtx)) { Parser tableParser = new Parser(new Lexer(HtmlTxt)); NodeList tableNode = tableParser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (string.IsNullOrEmpty(dtlCtx) && tableNode.Count > 1) { tab = tableNode[1] as TableTag; for (int c = 0; c < tab.RowCount; c++) { if ((c + 2) <= tab.RowCount) { if (tab.Rows[c].ToNodePlainString().Contains(bidUnit)) { for (int d = 0; d < tab.Rows[c].ColumnCount; d++) { dtlCtx += tab.Rows[0].Columns[d].ToNodePlainString() + ":"; dtlCtx += tab.Rows[c].Columns[d].ToNodePlainString() + "\r\n"; } break; } } } } if (string.IsNullOrEmpty(dtlCtx) && tableNode.Count > 2) { tab = tableNode[2] as TableTag; for (int c = 0; c < tab.RowCount; c++) { if ((c + 2) <= tab.RowCount) { if (tab.Rows[c].ToNodePlainString().Contains(bidUnit)) { for (int d = 0; d < tab.Rows[c].ColumnCount; d++) { dtlCtx += tab.Rows[0].Columns[d].ToNodePlainString() + ":"; dtlCtx += tab.Rows[c].Columns[d].ToNodePlainString() + "\r\n"; } break; } } } } if (string.IsNullOrEmpty(dtlCtx) && tableNode.Count > 3) { tab = tableNode[3] as TableTag; for (int c = 0; c < tab.RowCount; c++) { if ((c + 2) <= tab.RowCount) { if (tab.Rows[c].ToNodePlainString().Contains(bidUnit)) { for (int d = 0; d < tab.Rows[c].ColumnCount; d++) { dtlCtx += tab.Rows[0].Columns[d].ToNodePlainString() + ":"; dtlCtx += tab.Rows[c].Columns[d].ToNodePlainString() + "\r\n"; } break; } } } } if (string.IsNullOrEmpty(dtlCtx) && tableNode.Count > 4) { tab = tableNode[4] as TableTag; for (int c = 0; c < tab.RowCount; c++) { if ((c + 2) <= tab.RowCount) { if (tab.Rows[c].ToNodePlainString().Contains(bidUnit)) { for (int d = 0; d < tab.Rows[c].ColumnCount; d++) { dtlCtx += tab.Rows[0].Columns[d].ToNodePlainString() + ":"; dtlCtx += tab.Rows[c].Columns[d].ToNodePlainString() + "\r\n"; } break; } } } } if (string.IsNullOrEmpty(dtlCtx) && tableNode.Count > 5) { tab = tableNode[5] as TableTag; for (int c = 0; c < tab.RowCount; c++) { if ((c + 2) <= tab.RowCount) { if (tab.Rows[c].ToNodePlainString().Contains(bidUnit)) { for (int d = 0; d < tab.Rows[c].ColumnCount; d++) { dtlCtx += tab.Rows[0].Columns[d].ToNodePlainString() + ":"; dtlCtx += tab.Rows[c].Columns[d].ToNodePlainString() + "\r\n"; } break; } } } } } unit = dtlCtx.GetBidRegex(); money = dtlCtx.GetMoneyRegex(); if (bidUnit == unit) { bidMoney = money; } } if (bidUnit.Contains("无中标") || bidUnit.Contains("没有")) { bidUnit = "没有中标商"; bidMoney = "0"; } } } if (string.IsNullOrEmpty(bidUnit)) { parser = new Parser(new Lexer(htldtl)); NodeList dtList = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (dtList != null && dtList.Count > 0) { for (int c = 0; c < dtList.Count; c++) { TableTag tab = dtList[c] as TableTag; if (IsTableBid(tab)) { for (int d = 0; d < tab.Rows[0].ColumnCount; d++) { try { ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":"; ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n"; } catch { } } break; } } if (string.IsNullOrEmpty(ctx)) { if (dtList.Count > 3) { TableTag tab = dtList[2] as TableTag; if (tab.RowCount > 1) { for (int d = 0; d < tab.Rows[0].ColumnCount; d++) { try { ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":"; ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n"; } catch { } } } } else if (dtList.Count > 2) { TableTag tab = dtList[1] as TableTag; if (tab.RowCount > 1) { for (int d = 0; d < tab.Rows[0].ColumnCount; d++) { try { ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":"; ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n"; } catch { } } } } else if (dtList.Count > 1) { TableTag tab = dtList[1] as TableTag; if (tab.RowCount > 1) { for (int d = 0; d < tab.Rows[0].ColumnCount; d++) { try { ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":"; ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n"; } catch { } } } } else { TableTag tab = dtList[0] as TableTag; if (tab.RowCount > 1) { for (int d = 0; d < tab.Rows[0].ColumnCount; d++) { try { ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":"; ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n"; } catch { } } } } } bidUnit = ctx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = ctx.GetRegex("中标承包商"); } bidMoney = ctx.GetMoneyRegex(); if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = bidCtx.GetRegex("中标价").GetMoney(); } if (string.IsNullOrEmpty(bidUnit)) { if (dtList.Count > 4) { TableTag tab = dtList[dtList.Count - 1] as TableTag; if (tab.RowCount > 1) { for (int d = 0; d < tab.Rows[0].ColumnCount; d++) { try { ctx += tab.Rows[0].Columns[d].ToNodePlainString() + ":"; ctx += tab.Rows[1].Columns[d].ToNodePlainString() + "\r\n"; } catch { } } } } bidUnit = ctx.GetBidRegex(); if (string.IsNullOrEmpty(bidUnit)) { bidUnit = ctx.GetRegex("中标承包商"); } bidMoney = ctx.GetMoneyRegex(); if (bidMoney == "0" || string.IsNullOrEmpty(bidMoney)) { bidMoney = bidCtx.GetRegex("中标价").GetMoney(); } } if (bidUnit.Contains("无中标") || bidUnit.Contains("没有")) { bidUnit = "没有中标商"; bidMoney = "0"; } } } #endregion if (string.IsNullOrEmpty(bidUnit)) { if (bidCtx.Contains("供应商不足")) { bidUnit = "没有中标商"; bidMoney = "0"; } } if (bidMoney != "0") { try { decimal mon = decimal.Parse(bidMoney); if (mon > 100000) { bidMoney = bidMoney.GetMoney(); } } catch { } } bidType = prjName.GetInviteBidType(); string[] CodeRegex = { "工程编号", "项目编号", "招标编号", "中标编号" }; code = bidCtx.GetCodeRegex(CodeRegex).GetCodeDel(); prjName = prjName.Replace("成交", ""); if (string.IsNullOrEmpty(code)) { code = bidCtx.Replace(")", "kdxx").Replace(")", "kdxx").GetRegexBegEnd("招标编号", "kdxx").Replace(":", "").Replace(":", ""); } if (string.IsNullOrEmpty(code)) { code = bidCtx.Replace(")", "kdxx").Replace(")", "kdxx").GetRegexBegEnd("项目编号", "kdxx").Replace(":", "").Replace(":", ""); } if (string.IsNullOrEmpty(code)) { code = bidCtx.Replace(")", "kdxx").Replace(")", "kdxx").GetRegexBegEnd("工程编号", "kdxx").Replace(":", "").Replace(":", ""); } if (string.IsNullOrEmpty(code)) { code = bidCtx.Replace(")", "kdxx").Replace(")", "kdxx").GetRegexBegEnd("编号", "kdxx").Replace(":", "").Replace(":", ""); } if (Encoding.Default.GetByteCount(code) > 50) { code = string.Empty; } if (!string.IsNullOrEmpty(code)) { code = code.GetChina(); } if (code.Contains("(")) { code = ""; } bidType = prjName.GetInviteBidType(); specType = "政府采购"; msgType = "大鹏新区公共资源交易中心"; BidInfo info = ToolDb.GenBidInfo("广东省", "深圳区及街道工程", "大鹏新区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string htl = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int page = 1; string eventValidation = string.Empty; try { htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(list); } if (htl.Contains("RowCount")) { try { int index = htl.IndexOf("RowCount"); string pageStr = htl.Substring(index, htl.Length - index).Replace("RowCount", "").Replace("}", "").Replace(":", "").Replace("\"", ""); decimal b = decimal.Parse(pageStr) / 20; if (b.ToString().Contains(".")) { page = Convert.ToInt32(b) + 1; } else { page = Convert.ToInt32(b); } } catch { } } for (int i = 1; i <= page; i++) { if (i > 1) { try { htl = this.ToolWebSite.GetHtmlByUrl("http://www.szjs.gov.cn/build/build.ashx?_=1352582850568&menu=%E9%A1%B9%E7%9B%AE%E4%BF%A1%E6%81%AF&type=%E6%96%BD%E5%B7%A5%E7%9B%91%E7%90%86%E5%90%88%E5%90%8C%E5%A4%87%E6%A1%88&pageSize=20&pageIndex=" + i.ToString(), Encoding.UTF8); } catch (Exception ex) { continue; } } JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(htl); foreach (KeyValuePair <string, object> obj in smsTypeJson) { if (obj.Key != "DataList") { continue; } object[] array = (object[])obj.Value; foreach (object obj2 in array) { Dictionary <string, object> dicSmsType = (Dictionary <string, object>)obj2; string pProvince = string.Empty, pUrl = string.Empty, pCity = string.Empty, pSubcontractCode = string.Empty, pSubcontractName = string.Empty, pSubcontractCompany = string.Empty, pInfoSource = string.Empty, pRecordDate = string.Empty, pCompactPrice = string.Empty, pCompactType = string.Empty, pBuildUnit = string.Empty, pPrjCode = string.Empty, PrjName = string.Empty, pPrjMgrQual = string.Empty, pPrjMgrName = string.Empty, pContUnit = string.Empty, pCreatetime = string.Empty; try { string noid = Convert.ToString(dicSmsType["Nid"]); PrjName = Convert.ToString(dicSmsType["PrjName"]); pBuildUnit = Convert.ToString(dicSmsType["ConstOrg"]); pContUnit = Convert.ToString(dicSmsType["CorpName"]); pCompactType = Convert.ToString(dicSmsType["PactType"]); pRecordDate = Convert.ToString(dicSmsType["IssueDate"]); pUrl = "http://www.szjs.gov.cn/build/htba_detail.aspx?id=" + noid; string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(pUrl), Encoding.UTF8).Trim(); } catch (Exception) { continue; } Parser parser = new Parser(new Lexer(htmldetail)); NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "js-table mar-l-4"))); if (dtList != null && dtList.Count > 0) { TableTag table = dtList[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { TableRow dr = table.Rows[j]; string ctx = string.Empty; for (int k = 0; k < dr.ColumnCount; k++) { ctx += dr.Columns[k].ToPlainTextString().Trim().Replace(" ", "").Replace("\r", "").Replace("\n", ""); } pInfoSource += ctx + "\r\n"; } Regex regexsubcode = new Regex(@"分包工程编号(:|:)[^\r\n]+\r\n"); pSubcontractCode = regexsubcode.Match(pInfoSource).Value.Replace("分包工程编号:", "").Trim(); Regex regexsubname = new Regex(@"分包工程名称(:|:)[^\r\n]+\r\n"); pSubcontractName = regexsubname.Match(pInfoSource).Value.Replace("分包工程名称:", "").Trim(); Regex regexsubcom = new Regex(@"分包工程发包单位(:|:)[^\r\n]+\r\n"); pSubcontractCompany = regexsubcom.Match(pInfoSource).Value.Replace("分包工程发包单位:", "").Trim(); Regex regpCompactPrice = new Regex(@"合同价(:|:)[^\r\n]+\r\n"); pCompactPrice = regpCompactPrice.Match(pInfoSource).Value.Replace("合同价:", "").Trim(); Regex regpPrjMgrQual = new Regex(@"项目经理资格(:|:)[^\r\n]+\r\n"); pPrjMgrQual = regpPrjMgrQual.Match(pInfoSource).Value.Replace("项目经理资格:", "").Trim(); Regex regpPrjMgrName = new Regex(@"项目经理名称(:|:)[^\r\n]+\r\n"); pPrjMgrName = regpPrjMgrName.Match(pInfoSource).Value.Replace("项目经理名称:", "").Trim(); Regex regpPrjCode = new Regex(@"(工程编号|总包工程编号)(:|:)[^\r\n]+\r\n"); pPrjCode = regpPrjCode.Match(pInfoSource).Value.Replace("总包工程编号", "").Replace("工程编号", "").Replace(":", "").Replace(":", "").Replace("总包", "").Trim(); Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+"); if (pCompactPrice.Contains("万")) { pCompactPrice = pCompactPrice.Remove(pCompactPrice.IndexOf("万")).Trim(); pCompactPrice = regBidMoney.Match(pCompactPrice).Value; } else { try { pCompactPrice = (decimal.Parse(regBidMoney.Match(pCompactPrice).Value) / 10000).ToString(); if (decimal.Parse(pCompactPrice) < decimal.Parse("0.1")) { pCompactPrice = "0"; } } catch (Exception) { pCompactPrice = "0"; } } } ProjectConpact info = ToolDb.GenProjectConpact("广东省", pUrl, "深圳市区", pSubcontractCode, pSubcontractName, pSubcontractCompany, pInfoSource, pRecordDate, pCompactPrice, pCompactType, pBuildUnit, pPrjCode, PrjName, pPrjMgrQual, pPrjMgrName, pContUnit, pCreatetime, "深圳市住房和建设局"); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } catch { continue; } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string htl = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int page = 1; string eventValidation = string.Empty; try { htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.Default, ref cookiestr); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(htl)); NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "fanyie"))); if (tableNodeList != null && tableNodeList.Count > 0) { try { Regex regexPage = new Regex(@"共\d+页"); page = int.Parse(regexPage.Match(tableNodeList.AsString()).Value.Trim(new char[] { '共', '页' })); } catch { } } for (int j = 1; j <= page; j++) { if (j > 1) { try { htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl + "&ipage=" + j.ToString()), Encoding.Default); } catch (Exception ex) { continue; } } parser = new Parser(new Lexer(htl)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "list_headnews")), true), new TagNameFilter("li"))); if (nodeList != null && nodeList.Count > 0) { for (int i = 0; i < nodeList.Count; i++) { ATag aTag = nodeList.SearchFor(typeof(ATag), true)[i] as ATag; string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; prjName = nodeList[i].ToPlainTextString().Replace(" ", ""); InfoUrl = "http://gcjs.yunfu.gov.cn" + aTag.Link; Regex regDate = new Regex(@"\d{4}-\d{1,2}-\d{1,2}"); beginDate = regDate.Match(prjName).Value.Trim(); if (!string.IsNullOrEmpty(beginDate)) { prjName = prjName.Replace(beginDate, "").Trim(); } if (prjName.Contains("招标公告") || prjName.Contains("补充公告")) { continue; } string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(InfoUrl), Encoding.Default).Replace(" ", ""); } catch (Exception) { Logger.Error("BidYunFu"); continue; } string htm = string.Empty; Parser parserdetail = new Parser(new Lexer(htmldetail)); NodeList dtnode = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "STYLE10"))); if (dtnode != null && dtnode.Count > 0) { htm = dtnode[0].ToHtml(); bidCtx = dtnode.AsString().Replace("\n", "\r\n"); HtmlTxt = dtnode.AsHtml(); Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>"); bidCtx = regexHtml.Replace(bidCtx, ""); if (bidCtx.Contains("第一中标候选人")) { try { string ctx = string.Empty; ctx = bidCtx.Substring(bidCtx.IndexOf("第一中标候选人")).ToString().Replace("\r\n", "").Replace(",", "\r\n").Replace(";", "\r\n"); Regex regBidUnit = new Regex(@"第一中标候选人(:|:)[^\r\n]+\r\n"); bidUnit = regBidUnit.Match(ctx).Value.Replace("第一中标候选人:", "").Replace("第一中标候选人: ", "").Trim(); Regex regMoney = new Regex(@"(中标价|投标价|投标报价)(:|:|)[^\r\n]+\r\n"); bidMoney = regMoney.Match(ctx).Value.Replace("中标价:", "").Replace("投标报价", "").Replace("投标价", "").Replace(",", "").Trim(); Regex regPrjMgr = new Regex(@"(项目总监|项目负责人|项目经理姓名及资质证书编号)(:|:)[^\r\n]+\r\n"); prjMgr = regPrjMgr.Match(ctx).Value.Replace("项目总监:", "").Replace("项目负责人:", "").Replace("项目经理姓名及资质证书编号:", "").Trim(); if (prjMgr.Contains("(")) { prjMgr = prjMgr.Remove(prjMgr.IndexOf("(")).ToString(); } } catch { } } if (bidCtx.Contains("第一候选人")) { try { string ctx = string.Empty; ctx = bidCtx.Substring(bidCtx.IndexOf("第一候选人")).ToString().Replace("\r\n", "").Replace(",", "\r\n").Replace(";", "\r\n"); Regex regBidUnit = new Regex(@"第一候选人(:|:)[^\r\n]+\r\n"); bidUnit = regBidUnit.Match(ctx).Value.Replace("第一候选人:", "").Replace("第一候选人: ", "").Trim(); Regex regMoney = new Regex(@"(中标价|投标价|投标报价)(:|:|)[^\r\n]+\r\n"); bidMoney = regMoney.Match(ctx).Value.Replace("中标价:", "").Replace("投标报价", "").Replace("投标价", "").Replace(",", "").Trim(); Regex regPrjMgr = new Regex(@"(项目总监|项目负责人|项目经理姓名及资质证书编号|项目经理)(:|:)[^\r\n]+\r\n"); prjMgr = regPrjMgr.Match(ctx).Value.Replace("项目总监:", "").Replace("项目负责人:", "").Replace("项目经理姓名及资质证书编号:", "").Replace("项目经理:", "").Trim(); if (prjMgr.Contains("(")) { prjMgr = prjMgr.Remove(prjMgr.IndexOf("(")).ToString(); } } catch { } } Regex regBidMoney = new Regex(@"[0-9]+[.]{0,1}[0-9]+"); if (bidMoney.Contains("万")) { bidMoney = bidMoney.Remove(bidMoney.IndexOf("万")).Trim(); bidMoney = regBidMoney.Match(bidMoney).Value; } else { try { bidMoney = (decimal.Parse(regBidMoney.Match(bidMoney).Value) / 10000).ToString(); if (decimal.Parse(bidMoney) < decimal.Parse("0.1")) { bidMoney = "0"; } } catch (Exception) { bidMoney = "0"; } } Regex regBuidUnit = new Regex(@"(招 标 人|招标人|招 标人)(:|:)[^\r\n]+\r\n"); buildUnit = regBuidUnit.Match(bidCtx).Value.Replace("招 标 人:", "").Replace("招 标人:", "").Replace("招标人:", "").Trim(); if (buildUnit.Contains("招标代理机构")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理机构")).ToString().Trim(); } msgType = "云浮市工程建设交易中心"; specType = "建设工程"; if (buildUnit == "") { buildUnit = ""; } if (bidUnit == "") { bidUnit = ""; } if (Encoding.Default.GetByteCount(buildUnit) > 150) { buildUnit = ""; } if (Encoding.Default.GetByteCount(bidUnit) > 150) { bidUnit = ""; } prjName = ToolDb.GetPrjName(prjName); bidType = ToolHtml.GetInviteTypes(prjName); BidInfo info = ToolDb.GenBidInfo("广东省", "云浮市区", "", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, beginDate, beginDate, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); //取得页码 int pageInt = 1; string html = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.UTF8); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new TagNameFilter("div")), new HasAttributeFilter("id", "page_div"))); if (sNode != null && sNode.Count > 0) { string page = ToolHtml.GetRegexString(sNode.AsString(), "共", "页"); try { pageInt = int.Parse(page); } catch { pageInt = 7; } } parser.Reset(); for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl("http://www.conghua.gov.cn/zgch/zbzb/list_" + i.ToString() + ".shtml", Encoding.Default); } catch (Exception ex) { continue; } } parser = new Parser(new Lexer(html)); sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "list_list"))), new TagNameFilter("table"))); if (sNode != null && sNode.Count > 0) { TableTag table = sNode[0] as TableTag; for (int j = 0; j < table.RowCount; j++) { TableRow tr = table.Rows[j]; string projectName = ToolHtml.GetHtmlAtagValue("title", tr.ToHtml()); if (!projectName.Contains("中标") && !projectName.Contains("结果") && !projectName.Contains("候选单位公示")) { string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; prjName = projectName; inviteType = ToolHtml.GetInviteTypes(projectName); beginDate = ToolHtml.GetRegexDateTime(tr.Columns[1].ToPlainTextString()); InfoUrl = "http://www.conghua.gov.cn" + ToolHtml.GetHtmlAtagValue("href", tr.ToHtml()).Replace("..", ""); string htmlDtl = string.Empty; try { htmlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8); htmlDtl = ToolHtml.GetRegexHtlTxt(htmlDtl); } catch { continue; } parser = new Parser(new Lexer(htmlDtl)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "zoomcon"))); if (dtlList != null && dtlList.Count > 0) { HtmlTxt = dtlList.ToHtml(); inviteCtx = dtlList.AsString().Replace(" ", ""); buildUnit = ToolHtml.GetRegexString(inviteCtx, ToolHtml.BuildRegex, true); if (!string.IsNullOrEmpty(buildUnit) && buildUnit.Contains(" ")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf(" ")); } buildUnit = ToolHtml.GetSubString(buildUnit, 150); msgType = "广州建设工程交易中心"; specType = "建设工程"; inviteType = inviteType == "" ? "小型工程" : inviteType; if (string.IsNullOrEmpty(buildUnit)) { buildUnit = "广州建设工程交易中心"; } InviteInfo info = ToolDb.GenInviteInfo("广东省", "广州市区", "从化市", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } else { string prjName = string.Empty, buildUnit = string.Empty, bidUnit = string.Empty, bidMoney = string.Empty, code = string.Empty, bidDate = string.Empty, beginDate = string.Empty, endDate = string.Empty, bidType = string.Empty, specType = string.Empty, InfoUrl = string.Empty, msgType = string.Empty, bidCtx = string.Empty, prjAddress = string.Empty, remark = string.Empty, prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; prjName = projectName; bidType = ToolHtml.GetInviteTypes(projectName); beginDate = ToolHtml.GetRegexDateTime(tr.Columns[1].ToPlainTextString()); InfoUrl = "http://www.conghua.gov.cn" + ToolHtml.GetHtmlAtagValue("href", tr.ToHtml()).Replace("..", ""); string htmlDtl = string.Empty; try { htmlDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8); htmlDtl = ToolHtml.GetRegexHtlTxt(htmlDtl); } catch { continue; } parser = new Parser(new Lexer(htmlDtl)); NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "zoomcon"))); if (dtlList != null && dtlList.Count > 0) { HtmlTxt = dtlList.ToHtml(); bidCtx = dtlList.AsString(); buildUnit = ToolHtml.GetRegexString(bidCtx, ToolHtml.BuildRegex, true); buildUnit = ToolHtml.GetSubString(buildUnit, 150); msgType = "广州建设工程交易中心"; specType = "建设工程"; bidType = bidType == "" ? bidType : "小型工程"; parser = new Parser(new Lexer(HtmlTxt)); NodeList bidNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table")); if (bidNode != null && bidNode.Count > 0) { string ctx = string.Empty; TableTag bidTable = bidNode[0] as TableTag; try { for (int r = 0; r < bidTable.RowCount; r++) { ctx += bidTable.Rows[r].Columns[0].ToNodePlainString() + ":"; ctx += bidTable.Rows[r].Columns[1].ToNodePlainString() + "\r\n"; } } catch { } bidUnit = ctx.GetRegex("单位名称,承包意向人名称"); bidMoney = ctx.GetMoneyRegex(); prjMgr = ctx.GetMgrRegex(); if (prjMgr.Contains("/")) { prjMgr = prjMgr.Remove(prjMgr.IndexOf("/")); } } if (string.IsNullOrEmpty(buildUnit)) { buildUnit = "广州建设工程交易中心"; } BidInfo info = ToolDb.GenBidInfo("广东省", "广州市区", "从化市", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, InfoUrl, prjMgr, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } } return(list); }
public Node(T data, NodeList <T> neighbors) { this.data = data; this.neighbors = neighbors; }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new ArrayList(); string html = string.Empty; string cookiestr = string.Empty; string viewState = string.Empty; int sqlCount = 0; int pageInt = 1; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch { return(null); } Parser parser = new Parser(new Lexer(html)); NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("cellspacing", "2"), new TagNameFilter("table"))); if (pageList != null && pageList.Count > 0) { string pageString = pageList.AsString(); Regex regexPage = new Regex(@"共[^页]+页,"); Match pageMatch = regexPage.Match(pageString); try { pageInt = int.Parse(pageMatch.Value.Replace("共", "").Replace("页,", "").Replace(" ", "")); } catch { pageInt = 1; } } for (int j = 1; j <= pageInt; j++) { if (j > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "__EVENTVALIDATION", "ctl00$Header$drpSearchType", "ctl00$Header$txtQymc", "ctl00$Content$hdnOperate", "ctl00$hdnPageCount" }, new string[] { "ctl00$Content$GridView1", "Page$" + j.ToString(), viewState, eventValidation, "0", string.Empty, string.Empty, pageInt.ToString() }); try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr); } catch { } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_Content_GridView1"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = nodeList[0] as TableTag; int rows = table.RowCount; if (pageInt > 1) { rows = rows - 1; } for (int i = 1; i < rows; i++) { string bProv = string.Empty, bCity = string.Empty, bArea = string.Empty, bPrjno = string.Empty, bPrjname = string.Empty, bBidresultendtime = string.Empty, bBaseprice = string.Empty, bBiddate = string.Empty, bBuildunit = string.Empty, bBidmethod = string.Empty, bRemark = string.Empty, bInfourl = string.Empty; TableRow tr = table.Rows[i] as TableRow; bPrjno = tr.Columns[1].ToPlainTextString(); bPrjname = tr.Columns[2].ToPlainTextString(); bBidresultendtime = tr.Columns[3].ToPlainTextString(); bInfourl = "http://www.szjsjy.com.cn/BusinessInfo/" + tr.Columns[4].GetATagHref(); string htmlDtl = string.Empty; try { htmlDtl = this.ToolWebSite.GetHtmlByUrl(bInfourl, Encoding.UTF8); } catch { } BidProject info = ToolDb.GenResultProject("广东省", "深圳市", "", bPrjno, bPrjname, bBidresultendtime, bBaseprice, bBiddate, bBuildunit, bBidmethod, bRemark, bInfourl); sqlCount++; if (sqlCount > this.MaxCount) { return(null); } if (ToolDb.SaveEntity(info, ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx, null)) { Parser dtparser = new Parser(new Lexer(htmlDtl)); NodeList dtList = dtparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_GridView1"), new TagNameFilter("table"))); if (dtList != null && dtList.Count > 0) { TableTag dttable = dtList[0] as TableTag; for (int t = 1; t < dttable.RowCount; t++) { ATag file = dttable.SearchFor(typeof(ATag), true)[t - 1] as ATag; if (file.IsAtagAttach()) { string url = "http://www.szjsjy.com.cn/" + file.Link.Replace("../", "").Replace("./", ""); BaseAttach entity = ToolHtml.GetBaseAttach(url, file.LinkText, info.Id, "SiteManage\\Files\\Attach\\"); if (entity != null) { ToolDb.SaveEntity(entity, string.Empty); } } } } } } } } return(list); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <InviteInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default); } catch { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList sNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "cNavBar_cTotalPages"))); if (sNode != null && sNode.Count > 0) { try { string temp = sNode[0].ToNodePlainString(); pageInt = int.Parse(temp); } catch { pageInt = 1; } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { viewState = this.ToolWebSite.GetAspNetViewState(html); eventValidation = this.ToolWebSite.GetAspNetEventValidation(html); NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "__VIEWSTATE", "__VIEWSTATEGENERATOR", "__EVENTVALIDATION", "cSortField", "cSortDirection", "cID", "cParentID", "cLeft:cParentID", "cLeft:cID", "cNavBar:cPageIndex" }, new string[] { viewState, "8A9C3F4D", eventValidation, "", "", "1080100", "1080000", "1080000", "1080100", i.ToString() }); try { html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.Default); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList viewList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "list")), true), new TagNameFilter("li"))); if (viewList != null && viewList.Count > 0) { for (int j = 0; j < viewList.Count; j++) { string code = string.Empty, buildUnit = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty, area = string.Empty; ATag aTag = viewList[j].GetATag(); string beginDate = viewList[j].ToPlainTextString().GetDateRegex(); string prjName = aTag.GetAttribute("title"); string InfoUrl = "http://xzedu.zhuhai.gov.cn/" + aTag.Link.GetReplace("./"); string htmDtl = string.Empty; try { htmDtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString(); } catch { continue; } parser = new Parser(new Lexer(htmDtl)); NodeList dtl = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "news_view_main")), true), new TagNameFilter("li"))); if (dtl != null && dtl.Count > 1) { HtmlTxt = dtl.AsHtml().ToLower(); inviteCtx = HtmlTxt.ToLower().GetReplace("</p>,</br>,<br>", "\r\n").ToCtxString(); buildUnit = inviteCtx.GetBuildRegex(); code = inviteCtx.GetCodeRegex().GetCodeDel(); if (buildUnit.Contains("地址")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("地址")); } if (buildUnit.Contains("招标代理")) { buildUnit = buildUnit.Remove(buildUnit.IndexOf("招标代理")); } specType = "政府采购"; inviteType = prjName.GetInviteBidType(); msgType = "珠海市香洲区教育局"; InviteInfo info = ToolDb.GenInviteInfo("广东省", "珠海市区", "香洲区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, otherType, InfoUrl, HtmlTxt); list.Add(info); parser = new Parser(new Lexer(HtmlTxt)); NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a")); if (aNode != null && aNode.Count > 0) { for (int k = 0; k < aNode.Count; k++) { ATag a = aNode[k].GetATag(); if (a.IsAtagAttach()) { string link = string.Empty; if (a.Link.ToLower().Contains("http")) { link = a.Link; } else { link = "http://xzedu.zhuhai.gov.cn/" + a.Link; } BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link); base.AttachList.Add(attach); } } } if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }