Example #1
0
        public void Parse4Test()
        {
            const string InputQuery = "site:reddit.com OR site:wikipedia.org OR site:news.com.au";

            var r1 = new WorkspaceResult()
            {
                Uri = "https://www.reddit.com"
            };
            var r2 = new WorkspaceResult()
            {
                Uri = "https://en.wikipedia.org"
            };
            var r3 = new WorkspaceResult()
            {
                Uri = "https://www.news.com.au"
            };
            var r4 = new WorkspaceResult()
            {
                Uri = "https://www.example.com.au"
            };

            var q = new Query(InputQuery);

            Assert.IsTrue(q.Evaluate(r1, null, null).Action == EvaluationResultAction.Include);
            Assert.IsTrue(q.Evaluate(r2, null, null).Action == EvaluationResultAction.Include);
            Assert.IsTrue(q.Evaluate(r3, null, null).Action == EvaluationResultAction.Include);
            Assert.IsFalse(q.Evaluate(r4, null, null).Action == EvaluationResultAction.Include);
        }
Example #2
0
        public override string[] IsMatch(WorkspaceResult result, string fragment, Stream stream)
        {
            object o = null;

            if (result.PropertyCache.ContainsKey(HeaderKey))
            {
                o = result.PropertyCache[HeaderKey];
            }

            if (o == null)
            {
                return(EmptySet);
            }

            if (string.IsNullOrWhiteSpace(SearchText) || o.ToString().Contains(SearchText))
            {
                return new string[1] {
                           Utility.MakeTag(o?.ToString())
                }
            }
            ;
            else
            {
                return(EmptySet);
            }
        }
Example #3
0
 private string GetFieldValue(string fieldName, WorkspaceResult result)
 {
     if (fieldName == "domain")
     {
         return(new Uri(result.Uri).Host);
     }
     if (fieldName == "host")
     {
         return(new Uri(result.Uri).Host);
     }
     if (fieldName == "ip")
     {
         return(Utility.GetHostIPAddress(new Uri(result.Uri)).ToString());
     }
     if (fieldName == "title")
     {
         return(result.Title);
     }
     if (fieldName == "referer")
     {
         return(result.RefererUri);
     }
     if (fieldName == "datahash")
     {
         return(result.DataHash);
     }
     if (fieldName == "tags")
     {
         return(result.Tags.Aggregate("", (x, y) => x + y));
     }
     return(string.Empty);
 }
Example #4
0
        public override string[] IsMatch(WorkspaceResult result, string fragment, Stream stream)
        {
            var match = matcher.Match(fragment);

            if (match.Success)
            {
                int idx  = match.Index;
                var frag = fragment.Fragment(idx, 40, 40);
                if (seenFragments.Contains(frag))
                {
                    // we've seen this fragment recently, nerf it even if it matches
                    // should get rid of menu links referring to the same link over and over
                    seenFragments.Enqueue(frag);
                    return(EmptySet);
                }
                else
                {
                    // we haven't seen this yet
                    seenFragments.Enqueue(frag);

                    return(Utility.MakeTags(match.Groups.OfType <object>().Select(x => x.ToString())).Distinct().ToArray());
                }
            }
            else
            {
                // no matches
                return(EmptySet);
            }
        }
Example #5
0
 ContentType GetContentType(WorkspaceResult result)
 {
     if (!result.ResponseProperties.ContainsKey("content-type"))
     {
         return(ContentType.Unknown);
     }
     return(new ContentType(result.ResponseProperties["content-type"]));
 }
Example #6
0
        public override string[] IsMatch(WorkspaceResult result, string fragment, Stream stream)
        => result.RequestProperties.ContainsKey(HeaderKey) &&
        (string.IsNullOrWhiteSpace(SearchText) || result.RequestProperties[HeaderKey].Contains(SearchText)) ?
        new string[1]
        {
            Utility.MakeTag(result.RequestProperties[HeaderKey])
        }

                                                                                           : EmptySet;
Example #7
0
        private string MakeCategory(WorkspaceResult result)
        {
            string tags = result.Tags.FirstOrDefault();

            if (string.IsNullOrWhiteSpace(tags) || UseNameForCategory)
            {
                return(Name);
            }
            return(tags);
        }
Example #8
0
        void ProcessMetaTag(HtmlReader reader, WorkspaceResult result)
        {
            var propertyName = reader.GetAttribute("property").ToLower();

            if (!string.IsNullOrWhiteSpace(propertyName))
            {
                propertyName = SanitiseProperty(propertyName);
                var content = SanitiseAttribute(reader.GetAttribute("content"));

                if (!result.PropertyCache.ContainsKey(propertyName))
                {
                    result.PropertyCache.Add(propertyName, reader.GetAttribute("content"));
                }

                switch (propertyName)
                {
                case "og_title":     // og:title
                    if (!result.PropertyCache.ContainsKey("title"))
                    {
                        result.PropertyCache.Add("title", content);
                    }
                    result.PropertyCache["title"] = content;
                    result.Title = content;
                    break;

                case "og_description":     // og:description
                    result.Description = content;
                    if (!result.PropertyCache.ContainsKey("description"))
                    {
                        result.PropertyCache.Add("description", content);
                    }
                    result.PropertyCache["description"] = content;
                    break;

                default:
                    break;
                }
            }
            else
            {
                // other random historical meta tags here
                var metaname = reader.GetAttribute("name").ToLower();

                switch (metaname)
                {
                case "description":
                    var value = SanitiseAttribute(reader.GetAttribute("content"));
                    if (!result.PropertyCache.ContainsKey("description"))
                    {
                        result.PropertyCache.Add("description", value);
                    }
                    break;
                }
            }
        }
Example #9
0
        public void Parse3Test()
        {
            const string InputQuery = "uri:reddit.com";

            var r = new WorkspaceResult
            {
                Uri = InputQuery
            };

            var f = new UriFilter(InputQuery);

            Assert.IsTrue(f.IsMatch(r, "", null).Any());
        }
Example #10
0
        public override string[] IsMatch(WorkspaceResult result, string fragment, Stream stream)
        {
            var tags = Left.IsMatch(result, fragment, stream);

            if (tags.Any())
            {
                return(tags);
            }
            else
            {
                tags = Right.IsMatch(result, fragment, stream);
            }
            return(tags);
        }
Example #11
0
        /// <summary>
        /// Try and match the fragment from the file
        /// </summary>
        /// <param name="fragment"></param>
        /// <returns></returns>
        public override string[] IsMatch(WorkspaceResult result, string fragment, Stream stream)
        {
            string value = GetFieldValue(FieldName, result);

            if (seenWindow.Contains(value))
            {
                return(EmptySet);
            }
            else
            {
                seenWindow.Enqueue(value);
                return(new string[1]);
            }
        }
    static void Main(string[] args)
    {
        // Find your Account Sid and Auth Token at twilio.com/user/account
        string AccountSid   = "ACXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
        string AuthToken    = "your_auth_token";
        string WorkspaceSid = "WSXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
        var    client       = new TaskRouterClient(AccountSid, AuthToken);

        WorkspaceResult workspaceResult = client.ListWorkspaces();

        foreach (Workspace workspace in workspaceResult.Workspaces)
        {
            Console.WriteLine(workspace.FriendlyName);
        }
    }
Example #13
0
        public override string[] IsMatch(WorkspaceResult result, string fragment, Stream stream)
        {
            var tags = Left.IsMatch(result, fragment, stream);

            if (!tags.Any())
            {
                return(EmptySet);
            }
            var tags2 = Right.IsMatch(result, fragment, stream);

            if (!tags2.Any())
            {
                return(EmptySet);
            }
            return(tags.Concat(tags2).ToArray());
        }
Example #14
0
        void ProcessHeaders(WorkspaceResult result, string requestString, string responseHeaders)
        {
            if (!string.IsNullOrWhiteSpace(requestString))
            {
                var lines = requestString.Split('\n');
                foreach (var line in lines)
                {
                    int idx = line.IndexOf(':');
                    if (idx < 0)
                    {
                        continue;
                    }
                    string key   = line.Substring(0, idx).Trim().ToLower();
                    string value = line.Substring(idx + 1).Trim();

                    if (!string.IsNullOrWhiteSpace(key) && !result.RequestProperties.ContainsKey(key))
                    {
                        result.RequestProperties.Add(key, value);
                    }
                }
            }

            if (!string.IsNullOrWhiteSpace(responseHeaders))
            {
                var lines = responseHeaders.Split('\n');
                foreach (var line in lines)
                {
                    int idx = line.IndexOf(':');
                    if (idx < 0)
                    {
                        continue;
                    }
                    string key   = line.Substring(0, idx).Trim().ToLower();
                    string value = line.Substring(idx + 1).Trim();

                    if (!string.IsNullOrWhiteSpace(key) && !result.ResponseProperties.ContainsKey(key))
                    {
                        result.ResponseProperties.Add(key, value);
                    }
                }
            }
        }
Example #15
0
        public override string[] IsMatch(WorkspaceResult result, string fragment, Stream stream)
        {
            const string HostIPCacheKey = "hostip";

            try
            {
                var uri = new Uri(result.RequestProperties["uri"]);

                if (!result.PropertyCache.ContainsKey(HostIPCacheKey))
                {
                    result.PropertyCache.Add(HostIPCacheKey, Utility.GetHostIPAddress(uri).GetAwaiter().GetResult());
                }

                IPAddress ip = result.PropertyCache[HostIPCacheKey] as IPAddress;

                var c = database.City(ip);

                return(GetTags(c).Where(x => !string.IsNullOrWhiteSpace(x) && (string.IsNullOrWhiteSpace(FilterData) || x.Contains(FilterData))).ToArray());
            }
            catch (AddressNotFoundException ex)
            {
                return(EmptySet);
            }
        }
        public override string[] IsMatch(WorkspaceResult result, string fragment, Stream stream)
        {
            var engine = LoadPredictionEngineFromCache();

            if (result.DataHash != lastDataHash)
            {
                lastPrediction = engine.Predict(new PageData {
                    TextData = fragment
                });
                lastDataHash = result.DataHash;
            }

            System.Diagnostics.Debug.WriteLine("Label '{0}', {1}, max(Score) = {2}", lastPrediction.PredictedLabels, ModelName, lastPrediction.Score.Max());

            if (lastPrediction.HasPrediction &&
                lastPrediction.IsMatchingPrediction(SearchText) &&
                lastPrediction.MaxScore > ConfidenceThreshold)
            {
                return(new string[1] {
                    Utility.MakeTag(lastPrediction.PredictedLabels)
                });
            }
            return(EmptySet);
        }
Example #17
0
        public WorkspaceResult Build(Stream stream, string requestString, string responseHeaders, out string evalText)
        {
            WorkspaceResult result = new WorkspaceResult();

            result.SourceServerId = FetchoConfiguration.Current.CurrentServerNode.ServerId;

            if (!stream.CanSeek)
            {
                throw new FetchoException("WorkspaceResultBuilder needs a seekable stream");
            }

            ProcessHeaders(result, requestString, responseHeaders);
            result.DataHash = MD5Hash.Compute(stream).ToString();
            result.PageSize = stream.Length;
            stream.Seek(0, SeekOrigin.Begin);

            ContentType contentType = GetContentType(result);
            int         titleness   = 4;

            if (contentType != null)
            {
                if (contentType.SubType.Contains("html"))
                {
                    using (var reader = new HtmlReader(stream))
                    {
                        while (!reader.EOF)
                        {
                            var node = reader.NextNode();
                            if (node.Type == HtmlTokenType.Text)
                            {
                                evaluationText.Append(node.Value);
                                evaluationText.Append(' ');
                            }

                            if (node.Value == "script")
                            {
                                ReadToEndTag(reader, "script");
                            }
                            else if (node.Value == "style")
                            {
                                ReadToEndTag(reader, "style");
                            }
                            else if (node.Value == "title" && titleness > 1 && !result.PropertyCache.ContainsKey("title"))
                            {
                                string title = ReadTitle(reader, "title");
                                result.PropertyCache.Add("title", title);
                                titleness = 1;
                            }
                            else if (node.Value == "h1" && titleness > 2 && !result.PropertyCache.ContainsKey("title"))
                            {
                                string title = ReadTitle(reader, "h1");
                                result.PropertyCache.Add("title", title);
                                titleness = 2;
                            }
                            else if (node.Value == "h2" && titleness > 3 && !result.PropertyCache.ContainsKey("title"))
                            {
                                string title = ReadTitle(reader, "h2");
                                result.PropertyCache.Add("title", title);
                                titleness = 3;
                            }
                            else if (node.Value == "meta")
                            {
                                ProcessMetaTag(reader, result);
                            }
                        }
                    }
                }
                else if (contentType.IsTextType || ContentType.IsJavascriptContentType(contentType))
                {
                    // leave the stream open so other tasks can reset it and use it
                    using (var reader = new StreamReader(stream, Encoding.Default, true, 1024, true))
                    {
                        evaluationText.Append(reader.ReadToEnd());
                    }
                }
            }

            result.UriHash     = MD5Hash.Compute(result.RequestProperties.SafeGet("uri") ?? "").ToString();
            result.RefererUri  = result.RequestProperties.SafeGet("referer");
            result.Uri         = result.RequestProperties.SafeGet("uri") ?? "";
            result.Title       = result.PropertyCache.SafeGet("title")?.ToString();
            result.Description = result.PropertyCache.SafeGet("description")?.ToString();
            result.Created     = DateTime.UtcNow;
            result.Updated     = DateTime.UtcNow;

            evalText = evaluationText.ToString();

            return(result);
        }