Exemplo n.º 1
0
 public HTMLElementPatern(
     MiMFa_XMLElement sampleHTMLElement,
     MiMFa_Boolean all = MiMFa_Boolean.False,
     MiMFa_Similarity elementSimilarity = MiMFa_Similarity.Null,
     MiMFa_Usage usage                  = MiMFa_Usage.Null,
     MiMFa_LinkJob linkJob              = MiMFa_LinkJob.Null,
     string destinationPath             = "",
     MiMFa_XMLElementItems elementItems = MiMFa_XMLElementItems.Null,
     string childName = "",
     string attrName  = "",
     MiMFa_TableValuePositionType tableValuePositionType = MiMFa_TableValuePositionType.Null,
     string colName = null)
 {
     SampleHTMLElement = sampleHTMLElement;
     ElementSimilarity = elementSimilarity;
     All          = all;
     Usage        = usage;
     LinkJob      = linkJob;
     ElementItems = elementItems;
     if (string.IsNullOrEmpty(destinationPath))
     {
         DestinationPath = Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments);
     }
     else
     {
         DestinationPath = destinationPath;
     }
     AttributeName          = attrName;
     ChildName              = childName;
     ColName                = colName;
     TableValuePositionType = tableValuePositionType;
 }
Exemplo n.º 2
0
        public MiMFa_Table GetTable(MiMFa_Table dt, HTMLElementPatern elementPatern, List <MiMFa_XMLElement> xmls, out MiMFa_XMLElement elem)
        {
            elem = null;
            switch (elementPatern.ElementSimilarity)
            {
            case MiMFa_Similarity.Congruent:
                elem = MiMFa_XMLElement.FindCongruent(xmls, elementPatern.SampleHTMLElement);
                break;

            case MiMFa_Similarity.Like:
                elem = MiMFa_XMLElement.FindLike(xmls, elementPatern.SampleHTMLElement);
                break;

            case MiMFa_Similarity.Same:
                elem = MiMFa_XMLElement.FindSame(xmls, elementPatern.SampleHTMLElement);
                break;

            case MiMFa_Similarity.Duplicate:
                elem = MiMFa_XMLElement.FindDuplicate(xmls, elementPatern.SampleHTMLElement);
                break;

            case MiMFa_Similarity.This:
                elem = MiMFa_XMLElement.Find(xmls, elementPatern.SampleHTMLElement);
                break;
            }
            return(AddInTable(dt, elementPatern, elem, xmls));
        }
Exemplo n.º 3
0
 public MiMFa_Table FetchSampleTable(WebPatern patern)
 {
     if (patern.Paterns.Count == 0 && patern.DefaultPatern != null)
     {
         return(SampleTable = GetTable(patern.DefaultPatern, MiMFa_XMLElement.GetCopy(patern.DefaultPatern.SampleHTMLElements)));
     }
     return(SampleTable);
 }
Exemplo n.º 4
0
 public HTMLElementPatern(HTMLElementPatern patern)
 {
     SampleHTMLElement = patern.SampleHTMLElement;
     ElementSimilarity = patern.ElementSimilarity;
     All                    = patern.All;
     Usage                  = patern.Usage;
     LinkJob                = patern.LinkJob;
     ElementItems           = patern.ElementItems;
     DestinationPath        = patern.DestinationPath;
     AttributeName          = patern.AttributeName;
     ChildName              = patern.ChildName;
     ColName                = patern.ColName;
     TableValuePositionType = patern.TableValuePositionType;
 }
Exemplo n.º 5
0
        public MiMFa_Table AddInTable(MiMFa_Table dt, HTMLElementPatern elementPatern, MiMFa_XMLElement elem, List <MiMFa_XMLElement> xmls)
        {
            if (elem == null)
            {
                return(MiMFa_Table.AddInTable(dt, "", elementPatern.ColName, elementPatern.TableValuePositionType));
            }
            string str = GetValueFromXML(elementPatern, elem);

            if (elementPatern.Usage != MiMFa_Usage.Set)
            {
                dt = MiMFa_Table.AddInTable(dt, str, elementPatern.ColName, elementPatern.TableValuePositionType);
            }
            else
            {
                switch (elementPatern.LinkJob)
                {
                case MiMFa_LinkJob.InternalPage:
                    AppendInFetchQueue(new FetchPatern(fetch_url, str), true);
                    break;

                case MiMFa_LinkJob.ExternalPage:
                    AppendInFetchQueue(fetch_webpatern.FindFetchPaternFor(str, fetch_url.Source), true);
                    break;

                //case MiMFa_LinkJob.Download:
                default:
                    string file = string.Join("", elementPatern.DestinationPath, "\\", MiMFa_UnicCode.CreateNewString(10), "_");
                    if (MiMFa_Internet.IsWellURL(str))
                    {
                        file += MiMFa_StringService.CompressedText(MiMFa_Path.NormalizeForFileAndFolderName(str.Split('/').Last()), 25, "");
                    }
                    bool b = MiMFa_Internet.DownloadOrSave(str, file);
                    dt = MiMFa_Table.AddInTable(dt, file, elementPatern.ColName, elementPatern.TableValuePositionType);
                    dt = MiMFa_Table.AddInTable(dt, str, string.Join("", "auto_", elementPatern.ColName, "_URL"), MiMFa_TableValuePositionType.NextColumnCell);
                    dt = MiMFa_Table.AddInTable(dt, b + "", string.Join("", "auto_", elementPatern.ColName, "_State"), MiMFa_TableValuePositionType.NextColumnCell);
                    break;
                }
            }
            return(dt);
        }
Exemplo n.º 6
0
        public string GetValueFromXML(HTMLElementPatern elementPatern, MiMFa_XMLElement elem)
        {
            switch (elementPatern.Usage)
            {
            case MiMFa_Usage.Null:
            case MiMFa_Usage.Get:
                switch (elementPatern.ElementItems)
                {
                case MiMFa_XMLElementItems.Null:
                case MiMFa_XMLElementItems.ThisContent:
                    return(elem.OuterText);

                case MiMFa_XMLElementItems.ThisTag:
                    return(elem.Outer);

                case MiMFa_XMLElementItems.ThisAttribute:
                    return(elem.GetAttribute(elementPatern.AttributeName));

                case MiMFa_XMLElementItems.ChildContent:
                    return((elem.GetFirstChildElementByTagName(elementPatern.ChildName) ?? new MiMFa_XMLElement(-1, "", "", "")).OuterText);

                case MiMFa_XMLElementItems.ChildTag:
                    return((elem.GetFirstChildElementByTagName(elementPatern.ChildName) ?? new MiMFa_XMLElement(-1, "", "", "")).Outer);

                case MiMFa_XMLElementItems.ChildAttribute:
                    return((elem.GetFirstChildElementByTagName(elementPatern.ChildName) ?? new MiMFa_XMLElement(-1, "", "", "")).GetAttribute(elementPatern.AttributeName));

                case MiMFa_XMLElementItems.ChildrenContent:
                    return((new MiMFa_XMLElement(-1, "", "", "")
                    {
                        Children = elem.GetChildrenElementsByTagName(elementPatern.ChildName)
                    }).OuterText);

                case MiMFa_XMLElementItems.ChildrenTag:
                    return((new MiMFa_XMLElement(-1, "", "", "")
                    {
                        Children = elem.GetChildrenElementsByTagName(elementPatern.ChildName)
                    }).Outer);

                case MiMFa_XMLElementItems.ChildrenAttribute:
                    return(MiMFa_CollectionService.GetAllItems((new MiMFa_XMLElement(-1, "", "", "")
                    {
                        Children = elem.GetChildrenElementsByTagName(elementPatern.ChildName)
                    }).GetChildAttributes(elementPatern.AttributeName), "¶"));
                }
                break;

            case MiMFa_Usage.Set:
                string str = elem.GetAttribute("href");
                if (string.IsNullOrWhiteSpace(str))
                {
                    str = elem.GetAttribute("src");
                }
                if (string.IsNullOrWhiteSpace(str))
                {
                    str = elem.GetAttribute("url");
                }
                if (string.IsNullOrWhiteSpace(str))
                {
                    str = elem.InnerText;
                }
                if (!string.IsNullOrWhiteSpace(str))
                {
                    if (Uri.IsWellFormedUriString(str, UriKind.Relative))
                    {
                        str = MiMFa_Internet.GetBaseWebURL(fetch_url) + (str.StartsWith("/") ? "" : "/") + str;
                    }
                }
                else
                {
                    str = elem.OuterText;
                }
                return(str);
            }
            return(" ");
        }
Exemplo n.º 7
0
        public MiMFa_Table GetTable(FetchPatern patern, List <MiMFa_XMLElement> xmls)
        {
            MiMFa_Table mdt = new MiMFa_Table();

            if (patern.HTMLElementsPatern == null)
            {
                return(mdt);
            }
            List <HTMLElementPatern> continuehep = new List <HTMLElementPatern>();

            MiMFa_CollectionService.CopyTo(ref continuehep, patern.HTMLElementsPatern);
            List <MiMFa_XMLElement> me = new List <MiMFa_XMLElement>();

            foreach (var item in patern.HTMLElementsPatern)
            {
                me.Add(item.SampleHTMLElement);
            }
            MiMFa_XMLElement e = MiMFa_XMLElement.GetElementsCommonParent(me);

            while (continuehep.Count > 0)
            {
                if (e != null)
                {
                    e = MiMFa_XMLElement.Find(xmls, e);
                }
                List <MiMFa_XMLElement> scope = e == null ? xmls : new List <MiMFa_XMLElement>()
                {
                    e
                };
                bool             all  = false;
                MiMFa_XMLElement elem = null;
                int len = continuehep.Count;
                for (int i = 0; i < len; i++)
                {
                    MiMFa_XMLElement ele = null;
                    mdt = GetTable(mdt, continuehep[i], scope, out ele);
                    if (continuehep[i].All != MiMFa_Boolean.True)
                    {
                        continuehep.RemoveAt(i);
                        i--;
                        len--;
                    }
                    else if (ele != null)
                    {
                        elem = ele;
                        all  = true;
                    }
                    if (ele != null)
                    {
                        if (continuehep.Count - 1 > i + 1 && continuehep[i].SampleHTMLElement == continuehep[i + 1].SampleHTMLElement.Parent)
                        {
                            ele.StartTag = "";
                        }
                        else
                        {
                            xmls = MiMFa_XMLElement.GetLastSplitIn(xmls, ele);
                        }
                    }
                }
                if (e == null)
                {
                    break;
                }
                if (elem == null)
                {
                    xmls = MiMFa_XMLElement.GetLastSplitIn(xmls, e);
                }
                else if (all)
                {
                    xmls = MiMFa_XMLElement.GetLastSplitIn(xmls, elem);
                }
                else
                {
                    break;
                }
            }
            //
            if (fetch_url.TransposeResult)
            {
                mdt = mdt.Transpose(true);
            }
            NumberOfLastTableRows = mdt.MainTable.Rows.Count - 1;
            NumberOfAllTableRows += NumberOfLastTableRows;
            if (string.IsNullOrWhiteSpace(fetch_url.TableAddress))
            {
                fetch_url.TableAddress = TempDirectory + DateTime.Now.Ticks + MiMFa_Table.Extention;
            }
            if (NumberOfLastTableRows > 0)
            {
                try
                {
                    if (!string.IsNullOrWhiteSpace(fetch_url.Source))
                    {
                        DataColumn dcs = mdt.AddColumnSafe("auto_Source");
                        for (int i = 1; i < mdt.Rows.Count; i++)
                        {
                            mdt.Rows[i][dcs] = fetch_url.Source;
                        }
                    }
                }
                catch { }
            }
            try
            {
                MiMFa_Table nmt = null;
                MiMFa_IOService.OpenDeserializeFile(fetch_url.TableAddress, ref nmt);
                if (nmt != null)
                {
                    nmt = MiMFa_Table.ConcatTable(nmt, mdt);
                }
            }
            catch { }
            MiMFa_IOService.SaveSerializeFile(fetch_url.TableAddress, mdt);
            return(mdt);
        }
Exemplo n.º 8
0
 public void ShowDocument(IEnumerable <MiMFa_XMLElement> htmlElements)
 {
     ShowDocument(MiMFa_XMLElement.GetOuter(htmlElements));
 }