public void Parse4Test() { const string InputQuery = "site:reddit.com OR site:wikipedia.org OR site:news.com.au"; var r1 = new WorkspaceResult() { Uri = "https://www.reddit.com" }; var r2 = new WorkspaceResult() { Uri = "https://en.wikipedia.org" }; var r3 = new WorkspaceResult() { Uri = "https://www.news.com.au" }; var r4 = new WorkspaceResult() { Uri = "https://www.example.com.au" }; var q = new Query(InputQuery); Assert.IsTrue(q.Evaluate(r1, null, null).Action == EvaluationResultAction.Include); Assert.IsTrue(q.Evaluate(r2, null, null).Action == EvaluationResultAction.Include); Assert.IsTrue(q.Evaluate(r3, null, null).Action == EvaluationResultAction.Include); Assert.IsFalse(q.Evaluate(r4, null, null).Action == EvaluationResultAction.Include); }
public override string[] IsMatch(WorkspaceResult result, string fragment, Stream stream) { object o = null; if (result.PropertyCache.ContainsKey(HeaderKey)) { o = result.PropertyCache[HeaderKey]; } if (o == null) { return(EmptySet); } if (string.IsNullOrWhiteSpace(SearchText) || o.ToString().Contains(SearchText)) { return new string[1] { Utility.MakeTag(o?.ToString()) } } ; else { return(EmptySet); } }
private string GetFieldValue(string fieldName, WorkspaceResult result) { if (fieldName == "domain") { return(new Uri(result.Uri).Host); } if (fieldName == "host") { return(new Uri(result.Uri).Host); } if (fieldName == "ip") { return(Utility.GetHostIPAddress(new Uri(result.Uri)).ToString()); } if (fieldName == "title") { return(result.Title); } if (fieldName == "referer") { return(result.RefererUri); } if (fieldName == "datahash") { return(result.DataHash); } if (fieldName == "tags") { return(result.Tags.Aggregate("", (x, y) => x + y)); } return(string.Empty); }
public override string[] IsMatch(WorkspaceResult result, string fragment, Stream stream) { var match = matcher.Match(fragment); if (match.Success) { int idx = match.Index; var frag = fragment.Fragment(idx, 40, 40); if (seenFragments.Contains(frag)) { // we've seen this fragment recently, nerf it even if it matches // should get rid of menu links referring to the same link over and over seenFragments.Enqueue(frag); return(EmptySet); } else { // we haven't seen this yet seenFragments.Enqueue(frag); return(Utility.MakeTags(match.Groups.OfType <object>().Select(x => x.ToString())).Distinct().ToArray()); } } else { // no matches return(EmptySet); } }
ContentType GetContentType(WorkspaceResult result) { if (!result.ResponseProperties.ContainsKey("content-type")) { return(ContentType.Unknown); } return(new ContentType(result.ResponseProperties["content-type"])); }
public override string[] IsMatch(WorkspaceResult result, string fragment, Stream stream) => result.RequestProperties.ContainsKey(HeaderKey) && (string.IsNullOrWhiteSpace(SearchText) || result.RequestProperties[HeaderKey].Contains(SearchText)) ? new string[1] { Utility.MakeTag(result.RequestProperties[HeaderKey]) } : EmptySet;
private string MakeCategory(WorkspaceResult result) { string tags = result.Tags.FirstOrDefault(); if (string.IsNullOrWhiteSpace(tags) || UseNameForCategory) { return(Name); } return(tags); }
void ProcessMetaTag(HtmlReader reader, WorkspaceResult result) { var propertyName = reader.GetAttribute("property").ToLower(); if (!string.IsNullOrWhiteSpace(propertyName)) { propertyName = SanitiseProperty(propertyName); var content = SanitiseAttribute(reader.GetAttribute("content")); if (!result.PropertyCache.ContainsKey(propertyName)) { result.PropertyCache.Add(propertyName, reader.GetAttribute("content")); } switch (propertyName) { case "og_title": // og:title if (!result.PropertyCache.ContainsKey("title")) { result.PropertyCache.Add("title", content); } result.PropertyCache["title"] = content; result.Title = content; break; case "og_description": // og:description result.Description = content; if (!result.PropertyCache.ContainsKey("description")) { result.PropertyCache.Add("description", content); } result.PropertyCache["description"] = content; break; default: break; } } else { // other random historical meta tags here var metaname = reader.GetAttribute("name").ToLower(); switch (metaname) { case "description": var value = SanitiseAttribute(reader.GetAttribute("content")); if (!result.PropertyCache.ContainsKey("description")) { result.PropertyCache.Add("description", value); } break; } } }
public void Parse3Test() { const string InputQuery = "uri:reddit.com"; var r = new WorkspaceResult { Uri = InputQuery }; var f = new UriFilter(InputQuery); Assert.IsTrue(f.IsMatch(r, "", null).Any()); }
public override string[] IsMatch(WorkspaceResult result, string fragment, Stream stream) { var tags = Left.IsMatch(result, fragment, stream); if (tags.Any()) { return(tags); } else { tags = Right.IsMatch(result, fragment, stream); } return(tags); }
/// <summary> /// Try and match the fragment from the file /// </summary> /// <param name="fragment"></param> /// <returns></returns> public override string[] IsMatch(WorkspaceResult result, string fragment, Stream stream) { string value = GetFieldValue(FieldName, result); if (seenWindow.Contains(value)) { return(EmptySet); } else { seenWindow.Enqueue(value); return(new string[1]); } }
static void Main(string[] args) { // Find your Account Sid and Auth Token at twilio.com/user/account string AccountSid = "ACXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; string AuthToken = "your_auth_token"; string WorkspaceSid = "WSXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; var client = new TaskRouterClient(AccountSid, AuthToken); WorkspaceResult workspaceResult = client.ListWorkspaces(); foreach (Workspace workspace in workspaceResult.Workspaces) { Console.WriteLine(workspace.FriendlyName); } }
public override string[] IsMatch(WorkspaceResult result, string fragment, Stream stream) { var tags = Left.IsMatch(result, fragment, stream); if (!tags.Any()) { return(EmptySet); } var tags2 = Right.IsMatch(result, fragment, stream); if (!tags2.Any()) { return(EmptySet); } return(tags.Concat(tags2).ToArray()); }
void ProcessHeaders(WorkspaceResult result, string requestString, string responseHeaders) { if (!string.IsNullOrWhiteSpace(requestString)) { var lines = requestString.Split('\n'); foreach (var line in lines) { int idx = line.IndexOf(':'); if (idx < 0) { continue; } string key = line.Substring(0, idx).Trim().ToLower(); string value = line.Substring(idx + 1).Trim(); if (!string.IsNullOrWhiteSpace(key) && !result.RequestProperties.ContainsKey(key)) { result.RequestProperties.Add(key, value); } } } if (!string.IsNullOrWhiteSpace(responseHeaders)) { var lines = responseHeaders.Split('\n'); foreach (var line in lines) { int idx = line.IndexOf(':'); if (idx < 0) { continue; } string key = line.Substring(0, idx).Trim().ToLower(); string value = line.Substring(idx + 1).Trim(); if (!string.IsNullOrWhiteSpace(key) && !result.ResponseProperties.ContainsKey(key)) { result.ResponseProperties.Add(key, value); } } } }
public override string[] IsMatch(WorkspaceResult result, string fragment, Stream stream) { const string HostIPCacheKey = "hostip"; try { var uri = new Uri(result.RequestProperties["uri"]); if (!result.PropertyCache.ContainsKey(HostIPCacheKey)) { result.PropertyCache.Add(HostIPCacheKey, Utility.GetHostIPAddress(uri).GetAwaiter().GetResult()); } IPAddress ip = result.PropertyCache[HostIPCacheKey] as IPAddress; var c = database.City(ip); return(GetTags(c).Where(x => !string.IsNullOrWhiteSpace(x) && (string.IsNullOrWhiteSpace(FilterData) || x.Contains(FilterData))).ToArray()); } catch (AddressNotFoundException ex) { return(EmptySet); } }
public override string[] IsMatch(WorkspaceResult result, string fragment, Stream stream) { var engine = LoadPredictionEngineFromCache(); if (result.DataHash != lastDataHash) { lastPrediction = engine.Predict(new PageData { TextData = fragment }); lastDataHash = result.DataHash; } System.Diagnostics.Debug.WriteLine("Label '{0}', {1}, max(Score) = {2}", lastPrediction.PredictedLabels, ModelName, lastPrediction.Score.Max()); if (lastPrediction.HasPrediction && lastPrediction.IsMatchingPrediction(SearchText) && lastPrediction.MaxScore > ConfidenceThreshold) { return(new string[1] { Utility.MakeTag(lastPrediction.PredictedLabels) }); } return(EmptySet); }
public WorkspaceResult Build(Stream stream, string requestString, string responseHeaders, out string evalText) { WorkspaceResult result = new WorkspaceResult(); result.SourceServerId = FetchoConfiguration.Current.CurrentServerNode.ServerId; if (!stream.CanSeek) { throw new FetchoException("WorkspaceResultBuilder needs a seekable stream"); } ProcessHeaders(result, requestString, responseHeaders); result.DataHash = MD5Hash.Compute(stream).ToString(); result.PageSize = stream.Length; stream.Seek(0, SeekOrigin.Begin); ContentType contentType = GetContentType(result); int titleness = 4; if (contentType != null) { if (contentType.SubType.Contains("html")) { using (var reader = new HtmlReader(stream)) { while (!reader.EOF) { var node = reader.NextNode(); if (node.Type == HtmlTokenType.Text) { evaluationText.Append(node.Value); evaluationText.Append(' '); } if (node.Value == "script") { ReadToEndTag(reader, "script"); } else if (node.Value == "style") { ReadToEndTag(reader, "style"); } else if (node.Value == "title" && titleness > 1 && !result.PropertyCache.ContainsKey("title")) { string title = ReadTitle(reader, "title"); result.PropertyCache.Add("title", title); titleness = 1; } else if (node.Value == "h1" && titleness > 2 && !result.PropertyCache.ContainsKey("title")) { string title = ReadTitle(reader, "h1"); result.PropertyCache.Add("title", title); titleness = 2; } else if (node.Value == "h2" && titleness > 3 && !result.PropertyCache.ContainsKey("title")) { string title = ReadTitle(reader, "h2"); result.PropertyCache.Add("title", title); titleness = 3; } else if (node.Value == "meta") { ProcessMetaTag(reader, result); } } } } else if (contentType.IsTextType || ContentType.IsJavascriptContentType(contentType)) { // leave the stream open so other tasks can reset it and use it using (var reader = new StreamReader(stream, Encoding.Default, true, 1024, true)) { evaluationText.Append(reader.ReadToEnd()); } } } result.UriHash = MD5Hash.Compute(result.RequestProperties.SafeGet("uri") ?? "").ToString(); result.RefererUri = result.RequestProperties.SafeGet("referer"); result.Uri = result.RequestProperties.SafeGet("uri") ?? ""; result.Title = result.PropertyCache.SafeGet("title")?.ToString(); result.Description = result.PropertyCache.SafeGet("description")?.ToString(); result.Created = DateTime.UtcNow; result.Updated = DateTime.UtcNow; evalText = evaluationText.ToString(); return(result); }