Exemple #1
0
        /// <summary>
        /// Create new action and adds it into the parameter's ActionsToDo list.
        /// </summary>
        /// <param name="method"></param>
        public static void AddAction(ExtractionMethod method, DataContext context, LocalVariables parameters)
        {
            // create the action
            ActionItem newAction = new ActionItem(method, context, parameters);

            // add the action to all objects specified within the action parameters
            parameters.AddActionToParameters(newAction);
        }
Exemple #2
0
        /// <summary>
        /// Initialization of new extraction action.
        /// </summary>
        /// <param name="method"></param>
        /// <param name="datacontext"></param>
        /// <param name="parameters"></param>
        public ActionItem(ExtractionMethod method, DataContext datacontext, LocalVariables parameters)
        {
            Debug.Assert(method != null);
            Debug.Assert(datacontext != null);
            Debug.Assert(parameters != null);

            this.method      = method;
            this.datacontext = datacontext;
            this.parameters  = parameters;
        }
 /// <summary>
 /// Extracts the most important features from a data set.
 /// </summary>
 /// <param name="matrix">Input matrix</param>
 /// <param name="extractionMethod">Algorithm to use for feature extraction</param>
 /// <returns>Transformed matrix with reduced number of dimensions</returns>
 public static InsightMatrix ExtractFeatures(this InsightMatrix matrix, ExtractionMethod extractionMethod)
 {
     switch (extractionMethod)
     {
         case ExtractionMethod.LinearDiscriminantAnalysis:
             return new LinearDiscriminantAnalysis().ExtractFeatures(matrix);
         case ExtractionMethod.PrincipalComponentAnalysis:
             return new PrincipalComponentsAnalysis().ExtractFeatures(matrix);
         default:
             return new SingularValueDecomposition().ExtractFeatures(matrix);
     }
 }
Exemple #4
0
        /// <summary>
        /// Recurses through pages where images need to be extracted. Invokes a extraction method on each Uri found.
        /// </summary>
        /// <param name="method">Method which extracts images from speicific Uri.</param>
        /// <param name="uri">Uri to start recursion from.</param>
        /// <param name="settings">Extraction Settings.</param>
        /// <param name="cancellationToken">Cancellation Token.</param>
        /// <returns>Returns Images extracted from explored Uris.</returns>
        public static async Task <IEnumerable <WebImage> > Recurse(ExtractionMethod method, string uri, ExtractionSettings settings, CancellationToken cancellationToken)
        {
            stopAlg      = false;
            exploredUris = new List <string>();
            FoundUris    = new List <string>();

            if (!settings.RecurseHyperlinks)
            {
                settings.HyperlinkRecursionDepth = 0;
            }
            else
            {
                if (settings.HyperlinkRecursionDepth < 0)
                {
                    settings.HyperlinkRecursionDepth = 0;
                }
            }

            // Extract images for start Uri and linked pages.
            Uri             extractUri = new Uri(uri);
            List <WebImage> images     = await HyperlinkRecurse(method, extractUri, settings, 0, cancellationToken);

            // If enabled, recurse through Uris by removing segments from the end
            if (settings.RecurseUri)
            {
                while (extractUri.AbsoluteUri != "/" && !stopAlg)
                {
                    if (cancellationToken.IsCancellationRequested)
                    {
                        return(null);
                    }

                    extractUri = extractUri.RemoveLastSegment();
                    List <WebImage> moreImages = await HyperlinkRecurse(method, extractUri, settings, 0, cancellationToken);

                    images.AddRange(moreImages);
                }
            }

            if (!settings.LazyDownload)
            {
                // Images have already been downloaded, so remove any which failed
                return(images.Where(i => i.GetImageIfDownloaded() != null));
            }

            return(images);
        }
Exemple #5
0
        /// <summary>
        /// Extracts images for a page and linked pages.
        /// </summary>
        /// <param name="method">Method which extracts images from speicific Uri.</param>
        /// <param name="uri">Uri to extract images and start hyperlink recursion from.</param>
        /// <param name="settings">Extraction Settings.</param>
        /// <param name="depth">Depth to recurse hyperlinks to.</param>
        /// <param name="cancellationToken">Cancellation Token.</param>
        /// <returns>Returns extracted images for given Uri and linked pages.</returns>
        public static async Task <List <WebImage> > HyperlinkRecurse(ExtractionMethod method, Uri uri, ExtractionSettings settings, int depth, CancellationToken cancellationToken)
        {
            HtmlDocument    doc    = null;
            bool            gotDoc = false;
            List <WebImage> images = new List <WebImage>();

            if (!exploredUris.Contains(uri.ToString()))
            {
                exploredUris.Add(uri.ToString());

                if (settings.OnStartNewPage != null)
                {
                    if (cancellationToken.IsCancellationRequested)
                    {
                        return(null);
                    }

                    await settings.OnStartNewPage.Invoke(uri.ToString());
                }

                if (cancellationToken.IsCancellationRequested)
                {
                    return(null);
                }

                gotDoc = true;
                doc    = await GetDocumnent(uri, cancellationToken);

                images = await method.Invoke(uri, doc, settings);

                if (cancellationToken.IsCancellationRequested)
                {
                    return(null);
                }

                if (settings.OnEndNewPage != null)
                {
                    await settings.OnEndNewPage.Invoke(uri.ToString(), images);
                }

                if (settings.ShouldStopOnFoundImage != null)
                {
                    // Take all images up to the point where should stop
                    int index = images.TakeWhile(i => !settings.ShouldStopOnFoundImage.Invoke(i)).Count();
                    if (index != images.Count)
                    {
                        images.RemoveRange(index + 1, images.Count - index - 1);
                        stopAlg = true;
                    }
                }

                if (!settings.LazyDownload)
                {
                    if (cancellationToken.IsCancellationRequested)
                    {
                        return(null);
                    }

                    await Task.WhenAll(images.Select(i => i.GetImageAsync(cancellationToken)).ToArray());
                }

                if (settings.OnFoundImage != null)
                {
                    images.ForEach(i => settings.OnFoundImage.Invoke(i));
                }
            }

            if (!stopAlg && settings.RecurseHyperlinks && depth < settings.HyperlinkRecursionDepth)
            {
                if (!gotDoc)
                {
                    if (cancellationToken.IsCancellationRequested)
                    {
                        return(null);
                    }

                    doc = await GetDocumnent(uri, cancellationToken);
                }

                if (doc != null)
                {
                    IEnumerable <HtmlATag> aTags = HtmlExtractor.ExtractATags(doc);
                    foreach (HtmlATag aTag in aTags)
                    {
                        if (cancellationToken.IsCancellationRequested)
                        {
                            return(null);
                        }

                        Uri             newUri     = uri.AddHtmlLink(aTag.Href);
                        List <WebImage> moreImages = await HyperlinkRecurse(method, newUri, settings, depth + 1, cancellationToken);

                        images.AddRange(moreImages);
                    }
                }
            }

            return(images);
        }