private static void PushTermOnStack(string term, TermIndexAccessor index, Stack opStack) { if (!FullTextIndexer.isValuableToken(term)) { opStack.Push(new StopwordTerm()); if (Stopwords.IndexOf(term) == -1) { Stopwords.Add(term); } } else { TermIndexRecord record = index.GetRecord(term); if (record != null) { int order = Lexemes.IndexOf(term); if (order == -1) { Lexemes.Add(term); order = Lexemes.Count - 1; } record.PopulateRecordID((ushort)order); } opStack.Push(record); } }
void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) { writer.WriteStartObject(); if (Stopwords != null && Stopwords.Any()) { writer.WritePropertyName("stopwords"); writer.WriteStartArray(); foreach (var item in Stopwords) { writer.WriteStringValue(item); } writer.WriteEndArray(); } if (StopwordsList != null) { writer.WritePropertyName("stopwordsList"); writer.WriteStringValue(StopwordsList.Value.ToSerialString()); } if (IgnoreCase != null) { writer.WritePropertyName("ignoreCase"); writer.WriteBooleanValue(IgnoreCase.Value); } if (RemoveTrailingStopWords != null) { writer.WritePropertyName("removeTrailing"); writer.WriteBooleanValue(RemoveTrailingStopWords.Value); } writer.WritePropertyName("@odata.type"); writer.WriteStringValue(ODataType); writer.WritePropertyName("name"); writer.WriteStringValue(Name); writer.WriteEndObject(); }
public virtual List <string> Preprocess(string doc) { if (string.IsNullOrWhiteSpace(doc)) { return(null); } var result = new List <string>(); var tokens = Tokenizer.Tokenize(doc); foreach (var token in tokens) { if (Stopwords.Contains(token)) { continue; } var normalized = Normalizer.Normalize(token); if (string.IsNullOrWhiteSpace(normalized)) { continue; } var stem = Stemmer.Stem(normalized); result.Add(stem); } return(result); }
void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) { writer.WriteStartObject(); if (LowerCaseTerms != null) { writer.WritePropertyName("lowercase"); writer.WriteBooleanValue(LowerCaseTerms.Value); } if (Pattern != null) { writer.WritePropertyName("pattern"); writer.WriteStringValue(Pattern); } if (FlagsInternal != null) { writer.WritePropertyName("flags"); writer.WriteStringValue(FlagsInternal); } if (Stopwords != null && Stopwords.Any()) { writer.WritePropertyName("stopwords"); writer.WriteStartArray(); foreach (var item in Stopwords) { writer.WriteStringValue(item); } writer.WriteEndArray(); } writer.WritePropertyName("@odata.type"); writer.WriteStringValue(ODataType); writer.WritePropertyName("name"); writer.WriteStringValue(Name); writer.WriteEndObject(); }
private void Top_10_NoStopsButton_Click_1(object sender, EventArgs e) { Top_10NoStops.Clear(); string raw_input = this.InputTextBox.Text; List <string> stopwords = new List <string>(); if (Stopwords.CheckedItems.Count == 0) { MessageBox.Show("No Stopwords selected!", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error); } else { for (int i = 0; i < Stopwords.Items.Count; i++) { if (Stopwords.GetItemChecked(i)) { stopwords.Add((string)Stopwords.Items[i]); } } var stopwords_docs = Program.GetDocs_NoStopwords(raw_input, stopwords); var stoplist_docs = Program.TF_IDF(stopwords_docs); var top10_noStops = stoplist_docs.SelectMany(x => x).OrderBy(x => x.Value).Select(x => x.Key).Distinct().Take(10).ToList(); for (int i = 0; i < top10_noStops.Count; i++) { Top_10NoStops.Items.Add(top10_noStops[i]); } } }
public async Task <ActionResult <Stopwords> > CreateStopword([FromBody] Stopwords stopword) { if (string.IsNullOrEmpty(stopword.Id)) { stopword.Id = ObjectId.GenerateNewId().ToString(); } await _stopwordsRepository.CreateStopword(stopword); return(CreatedAtRoute("GetStopwordById", new { id = stopword.Id }, stopword)); }
private void process_tokens() { IPluralize pluralizer = new Pluralizer(); //string[] stopwords = System.IO.File.ReadAllLines(@"C:\Users\Thomas\source\repos\RE_Comparison\RE_Comparison\stopwords.txt"); string[] stopwords = Stopwords.getStopwords(); foreach (string token in input_text.Split(chars_to_ignore, StringSplitOptions.RemoveEmptyEntries)) { if (!stopwords.Contains(token.ToLower())) { filtered_tokens.Add(pluralizer.Singularize(token.ToLower())); } } }
public static Entry[] ProcessQuery(QueryPostfixForm postfixForm, TermIndexAccessor termIndex, bool appendIdMappings) { Stack opStack = new Stack(); Entry[] result = null; Error = ErrorStatus.NoError; MappedInstances.Clear(); if (!appendIdMappings) { Lexemes.Clear(); Stopwords.Clear(); } try { IteratePostfixExpression(postfixForm, termIndex, opStack); //----------------------------------------------------------------- // Now only one Entry[] must remain on the top of the stack. It may // be null if no document correspond to the query //----------------------------------------------------------------- if (opStack.Count != 1) { throw new ApplicationException("QueryParser -- Illegal query statement found"); } if (!(opStack.Peek() is StopwordTerm)) { result = ExtractOperandFromStack(opStack); if (result != null) { Array.Sort(result, new CompareByTfIdf()); } } } catch (Exception exc) { Trace.WriteLine("QueryProcessor -- exception [" + exc.Message + "] occured."); // Exception is raised if the expression was constructed with // the syntactic errors. // Clear the stack and put special marker on the top of it Error = ErrorStatus.IllegalQuerySyntax; result = null; } opStack.Clear(); return(result); }
void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) { writer.WriteStartObject(); if (Stopwords != null && Stopwords.Any()) { writer.WritePropertyName("stopwords"); writer.WriteStartArray(); foreach (var item in Stopwords) { writer.WriteStringValue(item); } writer.WriteEndArray(); } writer.WritePropertyName("@odata.type"); writer.WriteStringValue(ODataType); writer.WritePropertyName("name"); writer.WriteStringValue(Name); writer.WriteEndObject(); }
public async Task Check() { string userText = Stopwords.RemoveStopwords(AnsweredText); string ansText = Stopwords.RemoveStopwords(Question.Answer); var l = new NormalizedLevenshtein(); var similarity = 1 - l.Distance(userText, ansText); if (similarity >= 0.8) { UpdateBuzzCorrect(); } else { UpdateBuzzIncorrect(); } await UpdateAnswered(); }
public override void Compute() { List <CSMR13DataSet> ListOfDatasets = (List <CSMR13DataSet>)Workspace.Load("ListOfDatasets"); CSMR13DataSet CurrentDataset = ListOfDatasets[(int)Workspace.Load("CurrentDataset")]; // import artifacts string[] sourceInfo = CurrentDataset.SourceArtifacts.Split(new char[] { '#' }); string[] targetInfo = CurrentDataset.TargetArtifacts.Split(new char[] { '#' }); Workspace.Store("SourceArtifacts", Artifacts.ImportDirectory(sourceInfo[0], sourceInfo[1])); Workspace.Store("TargetArtifacts", Artifacts.ImportDirectory(targetInfo[0], targetInfo[1])); // import oracle Workspace.Store("Oracle", Oracle.Import(CurrentDataset.Oracle)); // import relationships Workspace.Store("Relationships", Oracle.Import(CurrentDataset.Relationships)); // import stopwords Workspace.Store("Stopwords", Stopwords.Import(CurrentDataset.Stopwords)); }
private void button1_Click(object sender, EventArgs e) { Stopwords.ClearSelected(); }
private void comboClearStops_SelectedIndexChanged(object sender, EventArgs e) { Stopwords.ClearSelected(); }
private void comboNoStops_SelectedIndexChanged(object sender, EventArgs e) { Stem_NoStops.Clear(); #pragma warning disable CS0252 // Possible unintended reference comparison; left hand side needs cast if (comboNoStops.SelectedItem == "Stemmed Text_Eng") #pragma warning restore CS0252 // Possible unintended reference comparison; left hand side needs cast { string raw_input = this.InputTextBox.Text; List <string> stopwords = new List <string>(); if (Stopwords.CheckedItems.Count == 0) { MessageBox.Show("No Stopwords selected!", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error); } else { for (int i = 0; i < Stopwords.Items.Count; i++) { if (Stopwords.GetItemChecked(i)) { stopwords.Add((string)Stopwords.Items[i]); } } var stopwords_docs = Program.GetDocs_NoStopwords(raw_input, stopwords); var stopStem_docs = Program.EnglishStemming(stopwords_docs); var stoplist_docStem = Program.TF_IDF(stopStem_docs); var top10_StemEngNoStops = stoplist_docStem.SelectMany(x => x).OrderBy(x => x.Value).Select(x => x.Key).Distinct().Take(10).ToList(); for (int i = 0; i < top10_StemEngNoStops.Count; i++) { Stem_NoStops.Items.Add(top10_StemEngNoStops[i]); } } } #pragma warning disable CS0252 // Possible unintended reference comparison; left hand side needs cast else if (comboNoStops.SelectedItem == "Stemmed Text_Pt") #pragma warning restore CS0252 // Possible unintended reference comparison; left hand side needs cast { string raw_input = this.InputTextBox.Text; List <string> stopwords = new List <string>(); if (Stopwords.CheckedItems.Count == 0) { MessageBox.Show("No Stopwords selected!", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error); } else { for (int i = 0; i < Stopwords.Items.Count; i++) { if (Stopwords.GetItemChecked(i)) { stopwords.Add((string)Stopwords.Items[i]); } } var stopwords_docs = Program.GetDocs_NoStopwords(raw_input, stopwords); var stopStem_docs = Program.PortugueseStemming(stopwords_docs); var stoplist_docStem = Program.TF_IDF(stopStem_docs); var top10_StemPtNoStops = stoplist_docStem.SelectMany(x => x).OrderBy(x => x.Value).Select(x => x.Key).Distinct().Take(10).ToList(); for (int i = 0; i < top10_StemPtNoStops.Count; i++) { Stem_NoStops.Items.Add(top10_StemPtNoStops[i]); } } } }
public async Task <IActionResult> UpdateStopword([FromBody] Stopwords stopword) { return(Ok(await _stopwordsRepository.UpdateStopwords(stopword))); }