Example #1
0
 private static void PushTermOnStack(string term, TermIndexAccessor index, Stack opStack)
 {
     if (!FullTextIndexer.isValuableToken(term))
     {
         opStack.Push(new StopwordTerm());
         if (Stopwords.IndexOf(term) == -1)
         {
             Stopwords.Add(term);
         }
     }
     else
     {
         TermIndexRecord record = index.GetRecord(term);
         if (record != null)
         {
             int order = Lexemes.IndexOf(term);
             if (order == -1)
             {
                 Lexemes.Add(term);
                 order = Lexemes.Count - 1;
             }
             record.PopulateRecordID((ushort)order);
         }
         opStack.Push(record);
     }
 }
 void IUtf8JsonSerializable.Write(Utf8JsonWriter writer)
 {
     writer.WriteStartObject();
     if (Stopwords != null && Stopwords.Any())
     {
         writer.WritePropertyName("stopwords");
         writer.WriteStartArray();
         foreach (var item in Stopwords)
         {
             writer.WriteStringValue(item);
         }
         writer.WriteEndArray();
     }
     if (StopwordsList != null)
     {
         writer.WritePropertyName("stopwordsList");
         writer.WriteStringValue(StopwordsList.Value.ToSerialString());
     }
     if (IgnoreCase != null)
     {
         writer.WritePropertyName("ignoreCase");
         writer.WriteBooleanValue(IgnoreCase.Value);
     }
     if (RemoveTrailingStopWords != null)
     {
         writer.WritePropertyName("removeTrailing");
         writer.WriteBooleanValue(RemoveTrailingStopWords.Value);
     }
     writer.WritePropertyName("@odata.type");
     writer.WriteStringValue(ODataType);
     writer.WritePropertyName("name");
     writer.WriteStringValue(Name);
     writer.WriteEndObject();
 }
Example #3
0
        public virtual List <string> Preprocess(string doc)
        {
            if (string.IsNullOrWhiteSpace(doc))
            {
                return(null);
            }

            var result = new List <string>();

            var tokens = Tokenizer.Tokenize(doc);

            foreach (var token in tokens)
            {
                if (Stopwords.Contains(token))
                {
                    continue;
                }
                var normalized = Normalizer.Normalize(token);
                if (string.IsNullOrWhiteSpace(normalized))
                {
                    continue;
                }

                var stem = Stemmer.Stem(normalized);

                result.Add(stem);
            }

            return(result);
        }
Example #4
0
 void IUtf8JsonSerializable.Write(Utf8JsonWriter writer)
 {
     writer.WriteStartObject();
     if (LowerCaseTerms != null)
     {
         writer.WritePropertyName("lowercase");
         writer.WriteBooleanValue(LowerCaseTerms.Value);
     }
     if (Pattern != null)
     {
         writer.WritePropertyName("pattern");
         writer.WriteStringValue(Pattern);
     }
     if (FlagsInternal != null)
     {
         writer.WritePropertyName("flags");
         writer.WriteStringValue(FlagsInternal);
     }
     if (Stopwords != null && Stopwords.Any())
     {
         writer.WritePropertyName("stopwords");
         writer.WriteStartArray();
         foreach (var item in Stopwords)
         {
             writer.WriteStringValue(item);
         }
         writer.WriteEndArray();
     }
     writer.WritePropertyName("@odata.type");
     writer.WriteStringValue(ODataType);
     writer.WritePropertyName("name");
     writer.WriteStringValue(Name);
     writer.WriteEndObject();
 }
Example #5
0
        private void Top_10_NoStopsButton_Click_1(object sender, EventArgs e)
        {
            Top_10NoStops.Clear();

            string        raw_input = this.InputTextBox.Text;
            List <string> stopwords = new List <string>();

            if (Stopwords.CheckedItems.Count == 0)
            {
                MessageBox.Show("No Stopwords selected!", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }
            else
            {
                for (int i = 0; i < Stopwords.Items.Count; i++)
                {
                    if (Stopwords.GetItemChecked(i))
                    {
                        stopwords.Add((string)Stopwords.Items[i]);
                    }
                }
                var stopwords_docs = Program.GetDocs_NoStopwords(raw_input, stopwords);
                var stoplist_docs  = Program.TF_IDF(stopwords_docs);

                var top10_noStops = stoplist_docs.SelectMany(x => x).OrderBy(x => x.Value).Select(x => x.Key).Distinct().Take(10).ToList();

                for (int i = 0; i < top10_noStops.Count; i++)
                {
                    Top_10NoStops.Items.Add(top10_noStops[i]);
                }
            }
        }
Example #6
0
        public async Task <ActionResult <Stopwords> > CreateStopword([FromBody] Stopwords stopword)
        {
            if (string.IsNullOrEmpty(stopword.Id))
            {
                stopword.Id = ObjectId.GenerateNewId().ToString();
            }

            await _stopwordsRepository.CreateStopword(stopword);

            return(CreatedAtRoute("GetStopwordById", new { id = stopword.Id }, stopword));
        }
        private void process_tokens()
        {
            IPluralize pluralizer = new Pluralizer();

            //string[] stopwords = System.IO.File.ReadAllLines(@"C:\Users\Thomas\source\repos\RE_Comparison\RE_Comparison\stopwords.txt");
            string[] stopwords = Stopwords.getStopwords();

            foreach (string token in input_text.Split(chars_to_ignore, StringSplitOptions.RemoveEmptyEntries))
            {
                if (!stopwords.Contains(token.ToLower()))
                {
                    filtered_tokens.Add(pluralizer.Singularize(token.ToLower()));
                }
            }
        }
Example #8
0
        public static Entry[]  ProcessQuery(QueryPostfixForm postfixForm, TermIndexAccessor termIndex, bool appendIdMappings)
        {
            Stack opStack = new Stack();

            Entry[] result = null;
            Error = ErrorStatus.NoError;
            MappedInstances.Clear();

            if (!appendIdMappings)
            {
                Lexemes.Clear();
                Stopwords.Clear();
            }

            try
            {
                IteratePostfixExpression(postfixForm, termIndex, opStack);

                //-----------------------------------------------------------------
                //  Now only one Entry[] must remain on the top of the stack. It may
                //  be null if no document correspond to the query
                //-----------------------------------------------------------------
                if (opStack.Count != 1)
                {
                    throw new ApplicationException("QueryParser -- Illegal query statement found");
                }

                if (!(opStack.Peek() is StopwordTerm))
                {
                    result = ExtractOperandFromStack(opStack);
                    if (result != null)
                    {
                        Array.Sort(result, new CompareByTfIdf());
                    }
                }
            }
            catch (Exception exc)
            {
                Trace.WriteLine("QueryProcessor -- exception [" + exc.Message + "] occured.");
                //  Exception is raised if the expression was constructed with
                //  the syntactic errors.
                //  Clear the stack and put special marker on the top of it
                Error  = ErrorStatus.IllegalQuerySyntax;
                result = null;
            }
            opStack.Clear();
            return(result);
        }
 void IUtf8JsonSerializable.Write(Utf8JsonWriter writer)
 {
     writer.WriteStartObject();
     if (Stopwords != null && Stopwords.Any())
     {
         writer.WritePropertyName("stopwords");
         writer.WriteStartArray();
         foreach (var item in Stopwords)
         {
             writer.WriteStringValue(item);
         }
         writer.WriteEndArray();
     }
     writer.WritePropertyName("@odata.type");
     writer.WriteStringValue(ODataType);
     writer.WritePropertyName("name");
     writer.WriteStringValue(Name);
     writer.WriteEndObject();
 }
Example #10
0
        public async Task Check()
        {
            string userText = Stopwords.RemoveStopwords(AnsweredText);
            string ansText  = Stopwords.RemoveStopwords(Question.Answer);

            var l          = new NormalizedLevenshtein();
            var similarity = 1 - l.Distance(userText, ansText);

            if (similarity >= 0.8)
            {
                UpdateBuzzCorrect();
            }
            else
            {
                UpdateBuzzIncorrect();
            }

            await UpdateAnswered();
        }
        public override void Compute()
        {
            List <CSMR13DataSet> ListOfDatasets = (List <CSMR13DataSet>)Workspace.Load("ListOfDatasets");
            CSMR13DataSet        CurrentDataset = ListOfDatasets[(int)Workspace.Load("CurrentDataset")];

            // import artifacts
            string[] sourceInfo = CurrentDataset.SourceArtifacts.Split(new char[] { '#' });
            string[] targetInfo = CurrentDataset.TargetArtifacts.Split(new char[] { '#' });
            Workspace.Store("SourceArtifacts", Artifacts.ImportDirectory(sourceInfo[0], sourceInfo[1]));
            Workspace.Store("TargetArtifacts", Artifacts.ImportDirectory(targetInfo[0], targetInfo[1]));

            // import oracle
            Workspace.Store("Oracle", Oracle.Import(CurrentDataset.Oracle));

            // import relationships
            Workspace.Store("Relationships", Oracle.Import(CurrentDataset.Relationships));

            // import stopwords
            Workspace.Store("Stopwords", Stopwords.Import(CurrentDataset.Stopwords));
        }
Example #12
0
 private void button1_Click(object sender, EventArgs e)
 {
     Stopwords.ClearSelected();
 }
Example #13
0
 private void comboClearStops_SelectedIndexChanged(object sender, EventArgs e)
 {
     Stopwords.ClearSelected();
 }
Example #14
0
        private void comboNoStops_SelectedIndexChanged(object sender, EventArgs e)
        {
            Stem_NoStops.Clear();

#pragma warning disable CS0252 // Possible unintended reference comparison; left hand side needs cast
            if (comboNoStops.SelectedItem == "Stemmed Text_Eng")
#pragma warning restore CS0252 // Possible unintended reference comparison; left hand side needs cast
            {
                string        raw_input = this.InputTextBox.Text;
                List <string> stopwords = new List <string>();

                if (Stopwords.CheckedItems.Count == 0)
                {
                    MessageBox.Show("No Stopwords selected!", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
                }
                else
                {
                    for (int i = 0; i < Stopwords.Items.Count; i++)
                    {
                        if (Stopwords.GetItemChecked(i))
                        {
                            stopwords.Add((string)Stopwords.Items[i]);
                        }
                    }
                    var stopwords_docs   = Program.GetDocs_NoStopwords(raw_input, stopwords);
                    var stopStem_docs    = Program.EnglishStemming(stopwords_docs);
                    var stoplist_docStem = Program.TF_IDF(stopStem_docs);

                    var top10_StemEngNoStops = stoplist_docStem.SelectMany(x => x).OrderBy(x => x.Value).Select(x => x.Key).Distinct().Take(10).ToList();

                    for (int i = 0; i < top10_StemEngNoStops.Count; i++)
                    {
                        Stem_NoStops.Items.Add(top10_StemEngNoStops[i]);
                    }
                }
            }
#pragma warning disable CS0252 // Possible unintended reference comparison; left hand side needs cast
            else if (comboNoStops.SelectedItem == "Stemmed Text_Pt")
#pragma warning restore CS0252 // Possible unintended reference comparison; left hand side needs cast
            {
                string        raw_input = this.InputTextBox.Text;
                List <string> stopwords = new List <string>();

                if (Stopwords.CheckedItems.Count == 0)
                {
                    MessageBox.Show("No Stopwords selected!", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
                }
                else
                {
                    for (int i = 0; i < Stopwords.Items.Count; i++)
                    {
                        if (Stopwords.GetItemChecked(i))
                        {
                            stopwords.Add((string)Stopwords.Items[i]);
                        }
                    }
                    var stopwords_docs   = Program.GetDocs_NoStopwords(raw_input, stopwords);
                    var stopStem_docs    = Program.PortugueseStemming(stopwords_docs);
                    var stoplist_docStem = Program.TF_IDF(stopStem_docs);

                    var top10_StemPtNoStops = stoplist_docStem.SelectMany(x => x).OrderBy(x => x.Value).Select(x => x.Key).Distinct().Take(10).ToList();

                    for (int i = 0; i < top10_StemPtNoStops.Count; i++)
                    {
                        Stem_NoStops.Items.Add(top10_StemPtNoStops[i]);
                    }
                }
            }
        }
Example #15
0
 public async Task <IActionResult> UpdateStopword([FromBody] Stopwords stopword)
 {
     return(Ok(await _stopwordsRepository.UpdateStopwords(stopword)));
 }