private static void AddTermsFromPlainText(ISet <string> target, string text)
        {
            if (string.IsNullOrEmpty(text))
            {
                return;
            }

            var tokens = TextTokenizer.Tokenize(text);

            target.UnionWith(tokens);
        }
Ejemplo n.º 2
0
        private static void Apply(IssueFilter result, BoundTextQuery expression)
        {
            var terms = TextTokenizer.Tokenize(expression.Text);

            foreach (var term in terms)
            {
                if (expression.IsNegated)
                {
                    result.ExcludedTerms.Add(term);
                }
                else
                {
                    result.IncludedTerms.Add(term);
                }
            }
        }
Ejemplo n.º 3
0
        private static IEnumerable<string> GetFileTokens(string fileName, bool ignoreCase)
        {
            string text = null;
            try
            {
                text = File.ReadAllText(fileName);
            }
            catch (IOException ex)
            {
                Debug.WriteLine(ex.Message + ex.StackTrace);
            }

            if (text != null)
            {
                var tokenizer = new TextTokenizer(text);
                var tokens = tokenizer.Tokenize();

                var filteredWords = tokens.Select(token => ignoreCase ? token.ToLower() : token);

                foreach (var filteredWord in filteredWords)
                {
                    yield return filteredWord;
                }
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Performs clustering for the selected language using the current parameters.
        /// </summary>
        private void Cluster()
        {
            try
            {
                TernaryIndex = new TernaryIndex(ClusterVectorSize, ClusterVectorDensity);

                Parallel.ForEach(DocFiles, docFile =>
                {
                    string text;
                    try
                    {
                        text = File.ReadAllText(Path.Combine(CommonFiles.DocsPath(LanguageId), docFile));
                    }
                    catch (IOException ex)
                    {
                        Debug.WriteLine(ex.Message + ex.StackTrace);
                        return;
                    }

                    var tokenizer = new TextTokenizer(text);
                    var tokens = tokenizer.Tokenize();
                    TernaryIndex.ReadSequence(tokens, ClusterPreWindow, ClusterPostWindow);
                });

                TernaryIndex.SimilarityIndex.AddRange(TernaryIndex.BaseWords.ToArray());

                //TernaryIndex.SimilarityIndex.BuildIndex(TernaryIndex.Contexts.Cast<ITernaryVector>().ToArray());
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message + ex.StackTrace);
            }
        }
Ejemplo n.º 5
0
        private static List <IToken> Tokenize(string text)
        {
            ITextTokenizer tokenizer = new TextTokenizer(new TextHelper());

            return(tokenizer.Tokenize(text).ToList());
        }