public MiMFa_Table GetTable(MiMFa_Table dt, HTMLElementPatern elementPatern, List <MiMFa_XMLElement> xmls, out MiMFa_XMLElement elem) { elem = null; switch (elementPatern.ElementSimilarity) { case MiMFa_Similarity.Congruent: elem = MiMFa_XMLElement.FindCongruent(xmls, elementPatern.SampleHTMLElement); break; case MiMFa_Similarity.Like: elem = MiMFa_XMLElement.FindLike(xmls, elementPatern.SampleHTMLElement); break; case MiMFa_Similarity.Same: elem = MiMFa_XMLElement.FindSame(xmls, elementPatern.SampleHTMLElement); break; case MiMFa_Similarity.Duplicate: elem = MiMFa_XMLElement.FindDuplicate(xmls, elementPatern.SampleHTMLElement); break; case MiMFa_Similarity.This: elem = MiMFa_XMLElement.Find(xmls, elementPatern.SampleHTMLElement); break; } return(AddInTable(dt, elementPatern, elem, xmls)); }
public HTMLElementPatern(HTMLElementPatern patern) { SampleHTMLElement = patern.SampleHTMLElement; ElementSimilarity = patern.ElementSimilarity; All = patern.All; Usage = patern.Usage; LinkJob = patern.LinkJob; ElementItems = patern.ElementItems; DestinationPath = patern.DestinationPath; AttributeName = patern.AttributeName; ChildName = patern.ChildName; ColName = patern.ColName; TableValuePositionType = patern.TableValuePositionType; }
public MiMFa_Table AddInTable(MiMFa_Table dt, HTMLElementPatern elementPatern, MiMFa_XMLElement elem, List <MiMFa_XMLElement> xmls) { if (elem == null) { return(MiMFa_Table.AddInTable(dt, "", elementPatern.ColName, elementPatern.TableValuePositionType)); } string str = GetValueFromXML(elementPatern, elem); if (elementPatern.Usage != MiMFa_Usage.Set) { dt = MiMFa_Table.AddInTable(dt, str, elementPatern.ColName, elementPatern.TableValuePositionType); } else { switch (elementPatern.LinkJob) { case MiMFa_LinkJob.InternalPage: AppendInFetchQueue(new FetchPatern(fetch_url, str), true); break; case MiMFa_LinkJob.ExternalPage: AppendInFetchQueue(fetch_webpatern.FindFetchPaternFor(str, fetch_url.Source), true); break; //case MiMFa_LinkJob.Download: default: string file = string.Join("", elementPatern.DestinationPath, "\\", MiMFa_UnicCode.CreateNewString(10), "_"); if (MiMFa_Internet.IsWellURL(str)) { file += MiMFa_StringService.CompressedText(MiMFa_Path.NormalizeForFileAndFolderName(str.Split('/').Last()), 25, ""); } bool b = MiMFa_Internet.DownloadOrSave(str, file); dt = MiMFa_Table.AddInTable(dt, file, elementPatern.ColName, elementPatern.TableValuePositionType); dt = MiMFa_Table.AddInTable(dt, str, string.Join("", "auto_", elementPatern.ColName, "_URL"), MiMFa_TableValuePositionType.NextColumnCell); dt = MiMFa_Table.AddInTable(dt, b + "", string.Join("", "auto_", elementPatern.ColName, "_State"), MiMFa_TableValuePositionType.NextColumnCell); break; } } return(dt); }
public string GetValueFromXML(HTMLElementPatern elementPatern, MiMFa_XMLElement elem) { switch (elementPatern.Usage) { case MiMFa_Usage.Null: case MiMFa_Usage.Get: switch (elementPatern.ElementItems) { case MiMFa_XMLElementItems.Null: case MiMFa_XMLElementItems.ThisContent: return(elem.OuterText); case MiMFa_XMLElementItems.ThisTag: return(elem.Outer); case MiMFa_XMLElementItems.ThisAttribute: return(elem.GetAttribute(elementPatern.AttributeName)); case MiMFa_XMLElementItems.ChildContent: return((elem.GetFirstChildElementByTagName(elementPatern.ChildName) ?? new MiMFa_XMLElement(-1, "", "", "")).OuterText); case MiMFa_XMLElementItems.ChildTag: return((elem.GetFirstChildElementByTagName(elementPatern.ChildName) ?? new MiMFa_XMLElement(-1, "", "", "")).Outer); case MiMFa_XMLElementItems.ChildAttribute: return((elem.GetFirstChildElementByTagName(elementPatern.ChildName) ?? new MiMFa_XMLElement(-1, "", "", "")).GetAttribute(elementPatern.AttributeName)); case MiMFa_XMLElementItems.ChildrenContent: return((new MiMFa_XMLElement(-1, "", "", "") { Children = elem.GetChildrenElementsByTagName(elementPatern.ChildName) }).OuterText); case MiMFa_XMLElementItems.ChildrenTag: return((new MiMFa_XMLElement(-1, "", "", "") { Children = elem.GetChildrenElementsByTagName(elementPatern.ChildName) }).Outer); case MiMFa_XMLElementItems.ChildrenAttribute: return(MiMFa_CollectionService.GetAllItems((new MiMFa_XMLElement(-1, "", "", "") { Children = elem.GetChildrenElementsByTagName(elementPatern.ChildName) }).GetChildAttributes(elementPatern.AttributeName), "¶")); } break; case MiMFa_Usage.Set: string str = elem.GetAttribute("href"); if (string.IsNullOrWhiteSpace(str)) { str = elem.GetAttribute("src"); } if (string.IsNullOrWhiteSpace(str)) { str = elem.GetAttribute("url"); } if (string.IsNullOrWhiteSpace(str)) { str = elem.InnerText; } if (!string.IsNullOrWhiteSpace(str)) { if (Uri.IsWellFormedUriString(str, UriKind.Relative)) { str = MiMFa_Internet.GetBaseWebURL(fetch_url) + (str.StartsWith("/") ? "" : "/") + str; } } else { str = elem.OuterText; } return(str); } return(" "); }