/// <summary>
        /// Filters the space model features.
        /// </summary>
        /// <param name="spaceModel">The space model.</param>
        /// <param name="selectedFeatures">The selected features.</param>
        /// <param name="log">The log.</param>
        /// <returns></returns>
        public static Int32 FilterSpaceModelFeatures(this SpaceModel spaceModel, WeightDictionary selectedFeatures, ILogBuilder log)
        {
            Int32 i = 0;
            Int32 s = spaceModel.documents.Count() / 5;

            Int32         c_filter_out = 0;
            List <String> keys         = selectedFeatures.GetKeys();


            List <String> termsToRemove = spaceModel.terms.GetTokensOtherThan(keys);

            for (int i2 = 0; i2 < spaceModel.documents.Count; i2++)
            {
                c_filter_out += spaceModel.documents[i2].FilterSelectedFeatures(termsToRemove, false);


                if (i > s)
                {
                    Double r = i2.GetRatio(spaceModel.documents.Count());
                    log.log("Filter SelectedFeatures [" + r.ToString("P2") + "]");
                    i = 0;
                }
                i++;
            }


            spaceModel.terms_known_label.FilterTokens(termsToRemove, false);
            spaceModel.terms_unknown_label.FilterTokens(termsToRemove, false);

            return(c_filter_out);
        }
Exemplo n.º 2
0
 /// <summary>
 /// Adds the label if not already declared within the context
 /// </summary>
 /// <param name="text">The text.</param>
 /// <param name="context">The context.</param>
 public void AddLabel(String text, SpaceModel context)
 {
     if (!context.labels.Any(x => x.name == text))
     {
         SpaceLabel label = new SpaceLabel();
         label.name = text;
         context.labels.Add(label);
     }
 }
Exemplo n.º 3
0
        /// <summary>
        /// Gets the label instances for given list of label names
        /// </summary>
        /// <param name="labelIds">The label ids.</param>
        /// <param name="context">The context.</param>
        /// <returns></returns>
        public List <SpaceLabel> GetLabels(IEnumerable <String> labelIds, SpaceModel context)
        {
            List <SpaceLabel> output = new List <SpaceLabel>();

            foreach (String lbl in labelIds)
            {
                var tmp = context.labels.FirstOrDefault(x => x.name == lbl);
                if (tmp != null)
                {
                    output.Add(tmp);
                }
            }

            return(output);
        }
        /// <summary>
        ///
        /// </summary>
        /// <param name="model">The model.</param>
        /// <param name="labeled">if set to <c>true</c> [labeled].</param>
        /// <param name="unlabeled">if set to <c>true</c> [unlabeled].</param>
        /// <returns></returns>
        public static TokenDictionary GetTerms(this SpaceModel model, Boolean labeled, Boolean unlabeled)
        {
            TokenDictionary output = new TokenDictionary();

            if (labeled)
            {
                output.MergeDictionary(model.terms_known_label);
            }

            if (unlabeled)
            {
                output.MergeDictionary(model.terms_unknown_label);
            }

            return(output);
        }
        /// <summary>
        /// Sets the label.
        /// </summary>
        /// <param name="model">The model.</param>
        /// <param name="label">The label.</param>
        /// <param name="space">The space.</param>
        public static void SetLabel(this SpaceDocumentModel model, SpaceLabel label, SpaceModel space)
        {
            space.LabelToDocumentLinks.Add(label, model, 1.0);

            var ms = model.Children.ToList();

            while (ms.Any())
            {
                List <SpaceDocumentModel> nestMs = new List <SpaceDocumentModel>();
                foreach (var m in ms)
                {
                    nestMs.AddRange(m.Children);
                    space.LabelToDocumentLinks.Add(label, m, 1.0);
                }
                ms = nestMs;
            }
        }
        /// <summary>
        ///
        /// </summary>
        /// <param name="model">The model.</param>
        /// <param name="labeled">if set to <c>true</c> [labeled].</param>
        /// <param name="unlabeled">if set to <c>true</c> [unlabeled].</param>
        /// <returns></returns>
        public static List <String> GetTokens(this SpaceModel model, Boolean labeled, Boolean unlabeled)
        {
            List <String> tokens = new List <string>();

            if (labeled)
            {
                tokens.AddRange(model.terms_known_label.GetTokens(), true);
            }

            if (unlabeled)
            {
                tokens.AddRange(model.terms_unknown_label.GetTokens(), true);
            }



            return(tokens);
        }
Exemplo n.º 7
0
        public SpaceModel Clone()
        {
            SpaceModel output = new SpaceModel();

            output.terms               = terms.Clone();
            output.terms_known_label   = terms_known_label.Clone();
            output.terms_unknown_label = terms_unknown_label.Clone();
            output.documents           = documents.Clone <SpaceDocumentModel>(false);
            output.categories          = categories.Clone <SpaceCategoryModel>(false);
            output.topics              = topics.CloneTerm <SpaceTopic>();
            output.labels              = labels.CloneTerm <SpaceLabel>();

            foreach (var label in labels)
            {
                var linked = LabelToDocumentLinks.GetAllLinked(label);
                var label2 = output.labels.First(x => x.name == label.name);
                foreach (var l in linked)
                {
                    SpaceDocumentModel doc2 = output.documents.First(x => x.name == l.name);

                    output.LabelToDocumentLinks.Add(label2, doc2, l.weight);
                }
            }

            foreach (var label in labels)
            {
                var linked = LabelToCategoryLinks.GetAllLinked(label);
                var label2 = output.labels.First(x => x.name == label.name);
                foreach (var l in linked)
                {
                    SpaceCategoryModel doc2 = output.categories.First(x => x.name == l.name);

                    output.LabelToCategoryLinks.Add(label2, doc2, l.weight);
                }
            }



            return(output);
            //output.categories
        }
        public static List <SpaceDocumentModel> GetDocumentsOfLabel(this SpaceModel model, String labelName)
        {
            List <SpaceDocumentModel> output = model.documents.Where(x => x.labels.Contains(labelName)).ToList();

            return(output);
        }
        /// <summary>
        /// Gets the documents by label.
        /// </summary>
        /// <param name="model">The model.</param>
        /// <param name="includeUnlabeled">if set to <c>true</c> [include unlabeled].</param>
        /// <returns></returns>
        public static Dictionary <String, List <SpaceDocumentModel> > GetDocumentsByLabel(this SpaceModel model, Boolean includeUnlabeled = false)
        {
            var labels = model.labels.Select(x => x.name).ToList();

            if (!includeUnlabeled)
            {
                labels.Remove(SpaceLabel.UNKNOWN);
            }


            Dictionary <String, List <SpaceDocumentModel> > output = new Dictionary <string, List <SpaceDocumentModel> >();

            foreach (String l in labels)
            {
                output.Add(l, model.documents.Where(x => x.labels.Contains(l)).ToList());
            }


            return(output);
        }
Exemplo n.º 10
0
        /// <summary>
        /// Constructs a document model
        /// </summary>
        /// <param name="text">The text.</param>
        /// <param name="name">The name.</param>
        /// <param name="context">The context.</param>
        /// <param name="stemmContext">The stemm context.</param>
        /// <param name="tokenizer">The tokenizer.</param>
        /// <param name="metrics">The metrics.</param>
        /// <returns></returns>
        public SpaceDocumentModel ConstructDocument(string text, String name, SpaceModel context, StemmingContext stemmContext, ITokenizer tokenizer, Boolean isKnownDocument, ContentMetrics metrics = null)
        {
            var tokens = tokenizer.Tokenize(text);

            if (metrics != null)
            {
                metrics.TokensDoc += tokens.Length;                                // <----- token length
            }
            TokenDictionary tokenDictionary = new TokenDictionary(tokens);


            if (metrics != null)
            {
                metrics.UniqueTokensDoc += tokenDictionary.Count;                  // <---- unique tokens
            }
            TokenDictionary stemmDictionary = new TokenDictionary();

            List <String> tkn = tokenDictionary.GetTokens();

            for (int i2 = 0; i2 < tkn.Count; i2++)
            {
                String stk = stemmContext.Stem(tkn[i2]);
                stemmDictionary.CountToken(stk, tokenDictionary.GetTokenFrequency(tkn[i2]));
            }

            //  context.terms.MergeDictionary(stemmDictionary);

            if (metrics != null)
            {
                metrics.StemmedTokensDoc += stemmDictionary.Count;                  // <---- stemmed
            }
            SpaceDocumentModel document = new SpaceDocumentModel();

            document.name   = name;
            document.terms  = stemmDictionary;
            document.Length = tokens.Length;

            if (spaceSettings.DoMaintainWordIndex)
            {
                document.Words = new int[document.Length];
            }

            Int32 c = 0;

            for (int i = 0; i < tokens.Length; i++)
            {
                String stk = stemmContext.Stem(tokens[i]);

                if (isKnownDocument)
                {
                    context.terms_known_label.AddToken(stk);
                }
                else
                {
                    context.terms_unknown_label.AddToken(stk);
                }

                if (spaceSettings.DoMaintainWordIndex)
                {
                    document.Words[c] = context.terms.GetTokenID(stk);
                }
                c++;
            }

            document.name = name;

            // context.documents.Add(document);



            return(document);
        }