Exemple #1
0
        public static UriItemToken AttachIcqContent(Pullenti.Ner.Token t0)
        {
            if (!(t0 is Pullenti.Ner.NumberToken))
            {
                return(null);
            }
            UriItemToken res = AttachISBN(t0);

            if (res == null)
            {
                return(null);
            }
            if (res.Value.Contains("-"))
            {
                res.Value = res.Value.Replace("-", "");
            }
            foreach (char ch in res.Value)
            {
                if (!char.IsDigit(ch))
                {
                    return(null);
                }
            }
            if ((res.Value.Length < 6) || res.Value.Length > 10)
            {
                return(null);
            }
            return(res);
        }
Exemple #2
0
        public static UriItemToken AttachUriContent(Pullenti.Ner.Token t0, bool afterHttp)
        {
            UriItemToken res = _AttachUriContent(t0, ".;:-_=+&%#@/\\?[]()!~", afterHttp);

            if (res == null)
            {
                return(null);
            }
            if (res.EndToken.IsCharOf(".;-:") && res.EndChar > 3)
            {
                res.EndToken = res.EndToken.Previous;
                res.Value    = res.Value.Substring(0, res.Value.Length - 1);
            }
            if (res.Value.EndsWith("/"))
            {
                res.Value = res.Value.Substring(0, res.Value.Length - 1);
            }
            if (res.Value.EndsWith("\\"))
            {
                res.Value = res.Value.Substring(0, res.Value.Length - 1);
            }
            if (res.Value.IndexOf('\\') > 0)
            {
                res.Value = res.Value.Replace('\\', '/');
            }
            return(res);
        }
Exemple #3
0
        public static UriItemToken AttachSkype(Pullenti.Ner.Token t0)
        {
            if (t0.Chars.IsCyrillicLetter)
            {
                return(null);
            }
            UriItemToken res = _AttachUriContent(t0, "._", false);

            if (res == null)
            {
                return(null);
            }
            if (res.Value.Length < 5)
            {
                return(null);
            }
            return(res);
        }
Exemple #4
0
        public static UriItemToken AttachUrl(Pullenti.Ner.Token t0)
        {
            UriItemToken srv = AttachDomainName(t0, true, false);

            if (srv == null)
            {
                return(null);
            }
            StringBuilder txt = new StringBuilder(srv.Value);

            Pullenti.Ner.Token t1 = srv.EndToken;
            if (t1.Next != null && t1.Next.IsChar(':') && (t1.Next.Next is Pullenti.Ner.NumberToken))
            {
                t1 = t1.Next.Next;
                txt.AppendFormat(":{0}", (t1 as Pullenti.Ner.NumberToken).Value);
            }
            else if ((srv.Value == "vk.com" && t1.Next != null && t1.Next.IsHiphen) && t1.Next.Next != null)
            {
                t1 = t1.Next.Next;
                UriItemToken dat = _AttachUriContent(t1, ".-_+%", false);
                if (dat != null)
                {
                    t1 = dat.EndToken;
                    txt.AppendFormat("/{0}", dat.Value);
                }
            }
            for (Pullenti.Ner.Token t = t1.Next; t != null; t = t.Next)
            {
                if (t.IsWhitespaceBefore)
                {
                    break;
                }
                if (!t.IsChar('/'))
                {
                    break;
                }
                if (t.IsWhitespaceAfter)
                {
                    t1 = t;
                    break;
                }
                UriItemToken dat = _AttachUriContent(t.Next, ".-_+%", false);
                if (dat == null)
                {
                    t1 = t;
                    break;
                }
                t = (t1 = dat.EndToken);
                txt.AppendFormat("/{0}", dat.Value);
            }
            if ((t1.Next != null && t1.Next.IsChar('?') && !t1.Next.IsWhitespaceAfter) && !t1.IsWhitespaceAfter)
            {
                UriItemToken dat = _AttachUriContent(t1.Next.Next, ".-_+%=&", false);
                if (dat != null)
                {
                    t1 = dat.EndToken;
                    txt.AppendFormat("?{0}", dat.Value);
                }
            }
            if ((t1.Next != null && t1.Next.IsChar('#') && !t1.Next.IsWhitespaceAfter) && !t1.IsWhitespaceAfter)
            {
                UriItemToken dat = _AttachUriContent(t1.Next.Next, ".-_+%", false);
                if (dat != null)
                {
                    t1 = dat.EndToken;
                    txt.AppendFormat("#{0}", dat.Value);
                }
            }
            int i;

            for (i = 0; i < txt.Length; i++)
            {
                if (char.IsLetter(txt[i]))
                {
                    break;
                }
            }
            if (i >= txt.Length)
            {
                return(null);
            }
            return(new UriItemToken(t0, t1)
            {
                Value = txt.ToString()
            });
        }
Exemple #5
0
        static UriItemToken _AttachUriContent(Pullenti.Ner.Token t0, string chars, bool canBeWhitespaces = false)
        {
            StringBuilder txt = new StringBuilder();

            Pullenti.Ner.Token t1  = t0;
            UriItemToken       dom = AttachDomainName(t0, true, canBeWhitespaces);

            if (dom != null)
            {
                if (dom.Value.Length < 3)
                {
                    return(null);
                }
            }
            char openChar = (char)0;

            Pullenti.Ner.Token t = t0;
            if (dom != null)
            {
                t = dom.EndToken.Next;
            }
            for (; t != null; t = t.Next)
            {
                if (t != t0 && t.IsWhitespaceBefore)
                {
                    if (t.IsNewlineBefore || !canBeWhitespaces)
                    {
                        break;
                    }
                    if (dom == null)
                    {
                        break;
                    }
                    if (t.Previous.IsHiphen)
                    {
                    }
                    else if (t.Previous.IsCharOf(",;"))
                    {
                        break;
                    }
                    else if (t.Previous.IsChar('.') && t.Chars.IsLetter && t.LengthChar == 2)
                    {
                    }
                    else
                    {
                        bool ok = false;
                        Pullenti.Ner.Token tt1 = t;
                        if (t.IsCharOf("\\/"))
                        {
                            tt1 = t.Next;
                        }
                        Pullenti.Ner.Token tt0 = tt1;
                        for (; tt1 != null; tt1 = tt1.Next)
                        {
                            if (tt1 != tt0 && tt1.IsWhitespaceBefore)
                            {
                                break;
                            }
                            if (tt1 is Pullenti.Ner.NumberToken)
                            {
                                continue;
                            }
                            if (!(tt1 is Pullenti.Ner.TextToken))
                            {
                                break;
                            }
                            string term1 = (tt1 as Pullenti.Ner.TextToken).Term;
                            if (((term1 == "HTM" || term1 == "HTML" || term1 == "SHTML") || term1 == "ASP" || term1 == "ASPX") || term1 == "JSP")
                            {
                                ok = true;
                                break;
                            }
                            if (!tt1.Chars.IsLetter)
                            {
                                if (tt1.IsCharOf("\\/"))
                                {
                                    ok = true;
                                    break;
                                }
                                if (!tt1.IsCharOf(chars))
                                {
                                    break;
                                }
                            }
                            else if (!tt1.Chars.IsLatinLetter)
                            {
                                break;
                            }
                        }
                        if (!ok)
                        {
                            break;
                        }
                    }
                }
                if (t is Pullenti.Ner.NumberToken)
                {
                    Pullenti.Ner.NumberToken nt = t as Pullenti.Ner.NumberToken;
                    txt.Append(nt.GetSourceText());
                    t1 = t;
                    continue;
                }
                Pullenti.Ner.TextToken tt = t as Pullenti.Ner.TextToken;
                if (tt == null)
                {
                    Pullenti.Ner.ReferentToken rt = t as Pullenti.Ner.ReferentToken;
                    if (rt != null && rt.BeginToken.IsValue("РФ", null))
                    {
                        if (txt.Length > 0 && txt[txt.Length - 1] == '.')
                        {
                            txt.Append(rt.BeginToken.GetSourceText());
                            t1 = t;
                            continue;
                        }
                    }
                    if (rt != null && rt.Chars.IsLatinLetter && rt.BeginToken == rt.EndToken)
                    {
                        txt.Append(rt.BeginToken.GetSourceText());
                        t1 = t;
                        continue;
                    }
                    break;
                }
                string src = tt.GetSourceText();
                char   ch  = src[0];
                if (!char.IsLetter(ch))
                {
                    if (chars.IndexOf(ch) < 0)
                    {
                        break;
                    }
                    if (ch == '(' || ch == '[')
                    {
                        openChar = ch;
                    }
                    else if (ch == ')')
                    {
                        if (openChar != '(')
                        {
                            break;
                        }
                        openChar = (char)0;
                    }
                    else if (ch == ']')
                    {
                        if (openChar != '[')
                        {
                            break;
                        }
                        openChar = (char)0;
                    }
                }
                txt.Append(src);
                t1 = t;
            }
            if (txt.Length == 0)
            {
                return(dom);
            }
            int i;

            for (i = 0; i < txt.Length; i++)
            {
                if (char.IsLetterOrDigit(txt[i]))
                {
                    break;
                }
            }
            if (i >= txt.Length)
            {
                return(dom);
            }
            if (txt[txt.Length - 1] == '.' || txt[txt.Length - 1] == '/')
            {
                txt.Length--;
                t1 = t1.Previous;
            }
            if (dom != null)
            {
                txt.Insert(0, dom.Value);
            }
            string tmp = txt.ToString();

            if (tmp.StartsWith("\\\\"))
            {
                txt.Replace("\\\\", "//");
                tmp = txt.ToString();
            }
            if (tmp.StartsWith("//"))
            {
                tmp = tmp.Substring(2);
            }
            if (string.Compare(tmp, "WWW", true) == 0)
            {
                return(null);
            }
            UriItemToken res = new UriItemToken(t0, t1)
            {
                Value = txt.ToString()
            };

            return(res);
        }