/// <summary> /// Create new action and adds it into the parameter's ActionsToDo list. /// </summary> /// <param name="method"></param> public static void AddAction(ExtractionMethod method, DataContext context, LocalVariables parameters) { // create the action ActionItem newAction = new ActionItem(method, context, parameters); // add the action to all objects specified within the action parameters parameters.AddActionToParameters(newAction); }
/// <summary> /// Initialization of new extraction action. /// </summary> /// <param name="method"></param> /// <param name="datacontext"></param> /// <param name="parameters"></param> public ActionItem(ExtractionMethod method, DataContext datacontext, LocalVariables parameters) { Debug.Assert(method != null); Debug.Assert(datacontext != null); Debug.Assert(parameters != null); this.method = method; this.datacontext = datacontext; this.parameters = parameters; }
/// <summary> /// Extracts the most important features from a data set. /// </summary> /// <param name="matrix">Input matrix</param> /// <param name="extractionMethod">Algorithm to use for feature extraction</param> /// <returns>Transformed matrix with reduced number of dimensions</returns> public static InsightMatrix ExtractFeatures(this InsightMatrix matrix, ExtractionMethod extractionMethod) { switch (extractionMethod) { case ExtractionMethod.LinearDiscriminantAnalysis: return new LinearDiscriminantAnalysis().ExtractFeatures(matrix); case ExtractionMethod.PrincipalComponentAnalysis: return new PrincipalComponentsAnalysis().ExtractFeatures(matrix); default: return new SingularValueDecomposition().ExtractFeatures(matrix); } }
/// <summary> /// Recurses through pages where images need to be extracted. Invokes a extraction method on each Uri found. /// </summary> /// <param name="method">Method which extracts images from speicific Uri.</param> /// <param name="uri">Uri to start recursion from.</param> /// <param name="settings">Extraction Settings.</param> /// <param name="cancellationToken">Cancellation Token.</param> /// <returns>Returns Images extracted from explored Uris.</returns> public static async Task <IEnumerable <WebImage> > Recurse(ExtractionMethod method, string uri, ExtractionSettings settings, CancellationToken cancellationToken) { stopAlg = false; exploredUris = new List <string>(); FoundUris = new List <string>(); if (!settings.RecurseHyperlinks) { settings.HyperlinkRecursionDepth = 0; } else { if (settings.HyperlinkRecursionDepth < 0) { settings.HyperlinkRecursionDepth = 0; } } // Extract images for start Uri and linked pages. Uri extractUri = new Uri(uri); List <WebImage> images = await HyperlinkRecurse(method, extractUri, settings, 0, cancellationToken); // If enabled, recurse through Uris by removing segments from the end if (settings.RecurseUri) { while (extractUri.AbsoluteUri != "/" && !stopAlg) { if (cancellationToken.IsCancellationRequested) { return(null); } extractUri = extractUri.RemoveLastSegment(); List <WebImage> moreImages = await HyperlinkRecurse(method, extractUri, settings, 0, cancellationToken); images.AddRange(moreImages); } } if (!settings.LazyDownload) { // Images have already been downloaded, so remove any which failed return(images.Where(i => i.GetImageIfDownloaded() != null)); } return(images); }
/// <summary> /// Extracts images for a page and linked pages. /// </summary> /// <param name="method">Method which extracts images from speicific Uri.</param> /// <param name="uri">Uri to extract images and start hyperlink recursion from.</param> /// <param name="settings">Extraction Settings.</param> /// <param name="depth">Depth to recurse hyperlinks to.</param> /// <param name="cancellationToken">Cancellation Token.</param> /// <returns>Returns extracted images for given Uri and linked pages.</returns> public static async Task <List <WebImage> > HyperlinkRecurse(ExtractionMethod method, Uri uri, ExtractionSettings settings, int depth, CancellationToken cancellationToken) { HtmlDocument doc = null; bool gotDoc = false; List <WebImage> images = new List <WebImage>(); if (!exploredUris.Contains(uri.ToString())) { exploredUris.Add(uri.ToString()); if (settings.OnStartNewPage != null) { if (cancellationToken.IsCancellationRequested) { return(null); } await settings.OnStartNewPage.Invoke(uri.ToString()); } if (cancellationToken.IsCancellationRequested) { return(null); } gotDoc = true; doc = await GetDocumnent(uri, cancellationToken); images = await method.Invoke(uri, doc, settings); if (cancellationToken.IsCancellationRequested) { return(null); } if (settings.OnEndNewPage != null) { await settings.OnEndNewPage.Invoke(uri.ToString(), images); } if (settings.ShouldStopOnFoundImage != null) { // Take all images up to the point where should stop int index = images.TakeWhile(i => !settings.ShouldStopOnFoundImage.Invoke(i)).Count(); if (index != images.Count) { images.RemoveRange(index + 1, images.Count - index - 1); stopAlg = true; } } if (!settings.LazyDownload) { if (cancellationToken.IsCancellationRequested) { return(null); } await Task.WhenAll(images.Select(i => i.GetImageAsync(cancellationToken)).ToArray()); } if (settings.OnFoundImage != null) { images.ForEach(i => settings.OnFoundImage.Invoke(i)); } } if (!stopAlg && settings.RecurseHyperlinks && depth < settings.HyperlinkRecursionDepth) { if (!gotDoc) { if (cancellationToken.IsCancellationRequested) { return(null); } doc = await GetDocumnent(uri, cancellationToken); } if (doc != null) { IEnumerable <HtmlATag> aTags = HtmlExtractor.ExtractATags(doc); foreach (HtmlATag aTag in aTags) { if (cancellationToken.IsCancellationRequested) { return(null); } Uri newUri = uri.AddHtmlLink(aTag.Href); List <WebImage> moreImages = await HyperlinkRecurse(method, newUri, settings, depth + 1, cancellationToken); images.AddRange(moreImages); } } } return(images); }