public void HandlePage(object state) { LinkEntity link = (LinkEntity)state; m_HtmlContent = Network.GetHTML(link.Url); List<string> foundUrls = RegEx.GetWebPageLinks(m_SiteEntity.RootUrl, m_HtmlContent); foreach (string noFollowRule in m_SiteEntity.NoFollowExpressions) //remove no follow urls. { foundUrls.RemoveAll(o => !string.IsNullOrEmpty(RegEx.GetRegexMatch(o, noFollowRule))); } OnFoundLinks(foundUrls,link.Depth); foreach (var extractionElement in m_SiteEntity.ExtractionElements) //extract the root elements { List<string> matches = GetElementsFromString(m_HtmlContent, extractionElement); foreach (var item in matches) { Interlocked.Increment(ref m_ElementCounter); ElementEntity newElement = new ElementEntity(); newElement.Id = m_ElementCounter; newElement.Name = extractionElement.Name; newElement.Value=item; int fieldsCounter = 0; foreach (var field in extractionElement.Fields) { string fieldMatch = GetElementFromString(newElement.Value, field.Value); if (string.IsNullOrEmpty(fieldMatch) == false) { fieldsCounter++; ElementEntity newField = new ElementEntity(); newField.Id = fieldsCounter; newField.Name = field.Key; newField.Value = fieldMatch; newElement.Fields.Add(newField.Name, newField); } } m_Results.Add(newElement); } } if (m_Results.Count > 0) OnFoundElements(m_Results); OnHandlingFinished(link); }
public void InsertElement(ElementEntity element) { lock (m_SyncXmlWriter) { m_XmlTextWriter.WriteStartElement("Element"); m_XmlTextWriter.WriteStartElement("Name"); m_XmlTextWriter.WriteString(element.Name); m_XmlTextWriter.WriteEndElement(); m_XmlTextWriter.WriteStartElement("Value"); m_XmlTextWriter.WriteString(element.Value); m_XmlTextWriter.WriteEndElement(); m_XmlTextWriter.WriteStartElement("Fields"); foreach (var field in element.Fields) { m_XmlTextWriter.WriteStartElement("Field"); m_XmlTextWriter.WriteStartElement("Name"); m_XmlTextWriter.WriteString(field.Value.Name); m_XmlTextWriter.WriteEndElement(); m_XmlTextWriter.WriteStartElement("Value"); m_XmlTextWriter.WriteString(field.Value.Value); m_XmlTextWriter.WriteEndElement(); m_XmlTextWriter.WriteEndElement(); } m_XmlTextWriter.WriteEndElement(); m_XmlTextWriter.WriteEndElement(); } m_Stored++; }