public HTMLElementPatern( MiMFa_XMLElement sampleHTMLElement, MiMFa_Boolean all = MiMFa_Boolean.False, MiMFa_Similarity elementSimilarity = MiMFa_Similarity.Null, MiMFa_Usage usage = MiMFa_Usage.Null, MiMFa_LinkJob linkJob = MiMFa_LinkJob.Null, string destinationPath = "", MiMFa_XMLElementItems elementItems = MiMFa_XMLElementItems.Null, string childName = "", string attrName = "", MiMFa_TableValuePositionType tableValuePositionType = MiMFa_TableValuePositionType.Null, string colName = null) { SampleHTMLElement = sampleHTMLElement; ElementSimilarity = elementSimilarity; All = all; Usage = usage; LinkJob = linkJob; ElementItems = elementItems; if (string.IsNullOrEmpty(destinationPath)) { DestinationPath = Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments); } else { DestinationPath = destinationPath; } AttributeName = attrName; ChildName = childName; ColName = colName; TableValuePositionType = tableValuePositionType; }
public MiMFa_Table GetTable(MiMFa_Table dt, HTMLElementPatern elementPatern, List <MiMFa_XMLElement> xmls, out MiMFa_XMLElement elem) { elem = null; switch (elementPatern.ElementSimilarity) { case MiMFa_Similarity.Congruent: elem = MiMFa_XMLElement.FindCongruent(xmls, elementPatern.SampleHTMLElement); break; case MiMFa_Similarity.Like: elem = MiMFa_XMLElement.FindLike(xmls, elementPatern.SampleHTMLElement); break; case MiMFa_Similarity.Same: elem = MiMFa_XMLElement.FindSame(xmls, elementPatern.SampleHTMLElement); break; case MiMFa_Similarity.Duplicate: elem = MiMFa_XMLElement.FindDuplicate(xmls, elementPatern.SampleHTMLElement); break; case MiMFa_Similarity.This: elem = MiMFa_XMLElement.Find(xmls, elementPatern.SampleHTMLElement); break; } return(AddInTable(dt, elementPatern, elem, xmls)); }
public MiMFa_Table FetchSampleTable(WebPatern patern) { if (patern.Paterns.Count == 0 && patern.DefaultPatern != null) { return(SampleTable = GetTable(patern.DefaultPatern, MiMFa_XMLElement.GetCopy(patern.DefaultPatern.SampleHTMLElements))); } return(SampleTable); }
public HTMLElementPatern(HTMLElementPatern patern) { SampleHTMLElement = patern.SampleHTMLElement; ElementSimilarity = patern.ElementSimilarity; All = patern.All; Usage = patern.Usage; LinkJob = patern.LinkJob; ElementItems = patern.ElementItems; DestinationPath = patern.DestinationPath; AttributeName = patern.AttributeName; ChildName = patern.ChildName; ColName = patern.ColName; TableValuePositionType = patern.TableValuePositionType; }
public MiMFa_Table AddInTable(MiMFa_Table dt, HTMLElementPatern elementPatern, MiMFa_XMLElement elem, List <MiMFa_XMLElement> xmls) { if (elem == null) { return(MiMFa_Table.AddInTable(dt, "", elementPatern.ColName, elementPatern.TableValuePositionType)); } string str = GetValueFromXML(elementPatern, elem); if (elementPatern.Usage != MiMFa_Usage.Set) { dt = MiMFa_Table.AddInTable(dt, str, elementPatern.ColName, elementPatern.TableValuePositionType); } else { switch (elementPatern.LinkJob) { case MiMFa_LinkJob.InternalPage: AppendInFetchQueue(new FetchPatern(fetch_url, str), true); break; case MiMFa_LinkJob.ExternalPage: AppendInFetchQueue(fetch_webpatern.FindFetchPaternFor(str, fetch_url.Source), true); break; //case MiMFa_LinkJob.Download: default: string file = string.Join("", elementPatern.DestinationPath, "\\", MiMFa_UnicCode.CreateNewString(10), "_"); if (MiMFa_Internet.IsWellURL(str)) { file += MiMFa_StringService.CompressedText(MiMFa_Path.NormalizeForFileAndFolderName(str.Split('/').Last()), 25, ""); } bool b = MiMFa_Internet.DownloadOrSave(str, file); dt = MiMFa_Table.AddInTable(dt, file, elementPatern.ColName, elementPatern.TableValuePositionType); dt = MiMFa_Table.AddInTable(dt, str, string.Join("", "auto_", elementPatern.ColName, "_URL"), MiMFa_TableValuePositionType.NextColumnCell); dt = MiMFa_Table.AddInTable(dt, b + "", string.Join("", "auto_", elementPatern.ColName, "_State"), MiMFa_TableValuePositionType.NextColumnCell); break; } } return(dt); }
public string GetValueFromXML(HTMLElementPatern elementPatern, MiMFa_XMLElement elem) { switch (elementPatern.Usage) { case MiMFa_Usage.Null: case MiMFa_Usage.Get: switch (elementPatern.ElementItems) { case MiMFa_XMLElementItems.Null: case MiMFa_XMLElementItems.ThisContent: return(elem.OuterText); case MiMFa_XMLElementItems.ThisTag: return(elem.Outer); case MiMFa_XMLElementItems.ThisAttribute: return(elem.GetAttribute(elementPatern.AttributeName)); case MiMFa_XMLElementItems.ChildContent: return((elem.GetFirstChildElementByTagName(elementPatern.ChildName) ?? new MiMFa_XMLElement(-1, "", "", "")).OuterText); case MiMFa_XMLElementItems.ChildTag: return((elem.GetFirstChildElementByTagName(elementPatern.ChildName) ?? new MiMFa_XMLElement(-1, "", "", "")).Outer); case MiMFa_XMLElementItems.ChildAttribute: return((elem.GetFirstChildElementByTagName(elementPatern.ChildName) ?? new MiMFa_XMLElement(-1, "", "", "")).GetAttribute(elementPatern.AttributeName)); case MiMFa_XMLElementItems.ChildrenContent: return((new MiMFa_XMLElement(-1, "", "", "") { Children = elem.GetChildrenElementsByTagName(elementPatern.ChildName) }).OuterText); case MiMFa_XMLElementItems.ChildrenTag: return((new MiMFa_XMLElement(-1, "", "", "") { Children = elem.GetChildrenElementsByTagName(elementPatern.ChildName) }).Outer); case MiMFa_XMLElementItems.ChildrenAttribute: return(MiMFa_CollectionService.GetAllItems((new MiMFa_XMLElement(-1, "", "", "") { Children = elem.GetChildrenElementsByTagName(elementPatern.ChildName) }).GetChildAttributes(elementPatern.AttributeName), "¶")); } break; case MiMFa_Usage.Set: string str = elem.GetAttribute("href"); if (string.IsNullOrWhiteSpace(str)) { str = elem.GetAttribute("src"); } if (string.IsNullOrWhiteSpace(str)) { str = elem.GetAttribute("url"); } if (string.IsNullOrWhiteSpace(str)) { str = elem.InnerText; } if (!string.IsNullOrWhiteSpace(str)) { if (Uri.IsWellFormedUriString(str, UriKind.Relative)) { str = MiMFa_Internet.GetBaseWebURL(fetch_url) + (str.StartsWith("/") ? "" : "/") + str; } } else { str = elem.OuterText; } return(str); } return(" "); }
public MiMFa_Table GetTable(FetchPatern patern, List <MiMFa_XMLElement> xmls) { MiMFa_Table mdt = new MiMFa_Table(); if (patern.HTMLElementsPatern == null) { return(mdt); } List <HTMLElementPatern> continuehep = new List <HTMLElementPatern>(); MiMFa_CollectionService.CopyTo(ref continuehep, patern.HTMLElementsPatern); List <MiMFa_XMLElement> me = new List <MiMFa_XMLElement>(); foreach (var item in patern.HTMLElementsPatern) { me.Add(item.SampleHTMLElement); } MiMFa_XMLElement e = MiMFa_XMLElement.GetElementsCommonParent(me); while (continuehep.Count > 0) { if (e != null) { e = MiMFa_XMLElement.Find(xmls, e); } List <MiMFa_XMLElement> scope = e == null ? xmls : new List <MiMFa_XMLElement>() { e }; bool all = false; MiMFa_XMLElement elem = null; int len = continuehep.Count; for (int i = 0; i < len; i++) { MiMFa_XMLElement ele = null; mdt = GetTable(mdt, continuehep[i], scope, out ele); if (continuehep[i].All != MiMFa_Boolean.True) { continuehep.RemoveAt(i); i--; len--; } else if (ele != null) { elem = ele; all = true; } if (ele != null) { if (continuehep.Count - 1 > i + 1 && continuehep[i].SampleHTMLElement == continuehep[i + 1].SampleHTMLElement.Parent) { ele.StartTag = ""; } else { xmls = MiMFa_XMLElement.GetLastSplitIn(xmls, ele); } } } if (e == null) { break; } if (elem == null) { xmls = MiMFa_XMLElement.GetLastSplitIn(xmls, e); } else if (all) { xmls = MiMFa_XMLElement.GetLastSplitIn(xmls, elem); } else { break; } } // if (fetch_url.TransposeResult) { mdt = mdt.Transpose(true); } NumberOfLastTableRows = mdt.MainTable.Rows.Count - 1; NumberOfAllTableRows += NumberOfLastTableRows; if (string.IsNullOrWhiteSpace(fetch_url.TableAddress)) { fetch_url.TableAddress = TempDirectory + DateTime.Now.Ticks + MiMFa_Table.Extention; } if (NumberOfLastTableRows > 0) { try { if (!string.IsNullOrWhiteSpace(fetch_url.Source)) { DataColumn dcs = mdt.AddColumnSafe("auto_Source"); for (int i = 1; i < mdt.Rows.Count; i++) { mdt.Rows[i][dcs] = fetch_url.Source; } } } catch { } } try { MiMFa_Table nmt = null; MiMFa_IOService.OpenDeserializeFile(fetch_url.TableAddress, ref nmt); if (nmt != null) { nmt = MiMFa_Table.ConcatTable(nmt, mdt); } } catch { } MiMFa_IOService.SaveSerializeFile(fetch_url.TableAddress, mdt); return(mdt); }
public void ShowDocument(IEnumerable <MiMFa_XMLElement> htmlElements) { ShowDocument(MiMFa_XMLElement.GetOuter(htmlElements)); }