/// <summary> /// Gets the match term. /// </summary> /// <param name="term">The term.</param> /// <param name="termOnNotFound">if set to <c>true</c> [term on not found].</param> /// <returns></returns> public IWeightTableTerm GetMatchTerm(IWeightTableTerm term, bool termOnNotFound = false) { IWeightTableTerm mt = null; /* * List<String> keys = terms.Keys.ToList(); * for (int i = 0; i < keys.Count; i++) * { * var t = terms[keys[i]]; * if (t.isMatch(term)) * { * return t; * } * }*/ foreach (var tpair in terms) { var t = tpair.Value; if (t.isMatch(term)) { mt = t; return(mt); } } if (termOnNotFound) { mt = term; } return(mt); }
/// <summary> /// Determines what kind of match this term might be to this table /// </summary> /// <param name="term">The term.</param> /// <returns></returns> public weightTableMatchResultEnum isMatchBySemantics(IWeightTableTerm term) { weightTableMatchResultEnum output = weightTableMatchResultEnum.none; if (terms.ContainsKey(term.nominalForm)) { return(weightTableMatchResultEnum.hostTermName_and_needleTermName); } foreach (var t in this) { if (t.isMatch(term)) { // <--------------------- nije potpuna implementacija --- jer mozda bude i hostTermInstance_and_needleTermInstance return(weightTableMatchResultEnum.hostTermInstance_and_needleTermName); } } var allForms = term.GetAllForms(); foreach (string form in allForms) { if (terms.ContainsKey(form)) { return(weightTableMatchResultEnum.hostTermName_and_needleTermInstance); } } return(output); }
/// <summary> /// Determines whether the specified <c>other</c> <see cref="IWeightTableTerm" /> is match with this one (meaning their frequencies are summed) /// </summary> /// <param name="other">The other term to compare with</param> /// <returns> /// <c>true</c> if the specified other is match; otherwise, <c>false</c>. /// </returns> public bool isMatch(IWeightTableTerm other) { var allMyForms = GetAllForms(); var allHisForms = other.GetAllForms(); return(allMyForms.ContainsAny(allHisForms)); }
/// <summary> /// Gets the idf - inverse document frequency /// </summary> /// <param name="term">The term.</param> /// <returns></returns> public virtual double GetIDF(IWeightTableTerm term) { if (parent != null) { return(parent.GetIDF(term)); } return(1); }
/// <summary> /// Implementation for serialization /// </summary> /// <param name="term">The term.</param> public void Add(object term) { if (term is IWeightTableTerm) { IWeightTableTerm term_IWeightTableTerm = (IWeightTableTerm)term; Add(term_IWeightTableTerm, -1); } }
public bool RemoveTerm(string name) { IWeightTableTerm removed = null; int remScore = 0; terms.TryRemove(name, out removed); InvokeChanged(); return(termsAFreq.TryRemove(name, out remScore)); }
/// <summary> /// Gets the cumulative weight for the term /// </summary> /// <param name="term">The term.</param> /// <returns></returns> public virtual double GetWeight(IWeightTableTerm term) { IWeightTableTerm t = GetMatchTerm(term); if (t == null) { return(0); } return(t.weight); }
public double GetBDFreq(IWeightTableTerm term) { var t = AggregateDocument.GetMatchTerm(term); if (t == null) { return(0); } return(Enumerable.Count <KeyValuePair <IWeightTable, int> >(counter[t])); }
/// <summary> /// Gets all <see cref="IWeightTable"/> containing matching terms /// </summary> /// <param name="term">The term.</param> /// <returns></returns> public List <IWeightTable> GetTablesWithTerm(IWeightTableTerm term) { List <IWeightTable> output = new List <IWeightTable>(); foreach (IWeightTable table in this[term].Keys) { output.Add(table); } return(output); }
public double GetWeight(IWeightTableTerm term) { var cterm = GetIfExists(term.nominalForm); if (cterm == null) { return(TERMNOTFOUND_WEIGHT); } return(cterm.weight); }
/// <summary> /// Gets the normalized frequency of the specified term /// </summary> /// <param name="term">The term to get frequency for</param> /// <returns> /// Double ratio number with value from 0 to 1 /// </returns> public virtual double GetNFreq(IWeightTableTerm term) { int abs = GetAFreq(term); if (HasChanges) { updateMaxValues(); } return(((double)abs) / ((double)max)); }
/// <summary> /// Gets the absolute frequency of the specified term /// </summary> /// <param name="term">The term to get frequency for</param> /// <returns></returns> public virtual int GetAFreq(IWeightTableTerm term) { IWeightTableTerm t = GetMatchTerm(term); if (t != null) { return(termsAFreq[t.name]); } return(0); }
/// <summary> /// Determines whether the specified term is contained within the document /// </summary> /// <param name="term">The term.</param> /// <returns> /// <c>true</c> if the specified term is match; otherwise, <c>false</c>. /// </returns> public bool isMatch(IWeightTableTerm term) { foreach (var t in this) { if (t.isMatch(term)) { return(true); } } return(false); }
public double GetIDF(IWeightTableTerm term) { double bd = GetBDFreq((string)term.name); if (bd == 0) { return(0); } double idf = Math.Log(documents.Count() / bd); return(idf); }
/// <summary> /// Gets Apsolute frequency accross all document (summary) /// </summary> /// <param name="term">The term.</param> /// <returns></returns> public int GetAFreq(IWeightTableTerm term) { int A = 0; foreach (var t in this[term]) { A += t.Value; } //this[term] //this[term].ForEach(x => A += x.Value); return(A); }
/// <summary> /// Gets the match term by the name . /// </summary> /// <param name="term">The term.</param> /// <param name="termOnNotFound">if set to <c>true</c> if will return the same term supplied</param> /// <returns></returns> public IWeightTableTerm GetMatchTermByName(IWeightTableTerm term, bool termOnNotFound = false) { if (terms.ContainsKey(term.name)) { return(terms[term.name]); } if (termOnNotFound) { return(term); } return(null); }
public bool isMatch(IWeightTableTerm other) { if (lemmaForm.Equals(other.name, StringComparison.InvariantCultureIgnoreCase)) { return(true); } if (otherForms.ContainsAny(other.GetAllForms(true))) { return(true); } return(false); }
public IWeightTableTerm Add(string term, int AFreqPoints = -1) { IWeightTableTerm cterm = CompileTerm(term, AFreqPoints); if (AddOrUpdate(cterm)) { return(cterm); } else { return(null); } }
public int GetAFreq(IWeightTableTerm term) { if (term is weightTableTermCompiled) { return(((weightTableTermCompiled)term).freqAbs); } var cterm = GetIfExists(term.nominalForm); if (cterm == null) { return(TERMNOTFOUND_FREQ); } return(cterm.freqAbs); }
protected weightTableTermCompiled GetCompiledTerm(IWeightTableTerm term) { weightTableTermCompiled cterm = new weightTableTermCompiled(); cterm.termName = term.nominalForm; cterm.termInflections = term.GetAllForms(false).toCsvInLine(); cterm.df = GetBDFreq((string)term.name); cterm.idf = GetIDF((string)term.name); cterm.freqAbs = termsAFreq[term.name]; cterm.freqNorm = ((double)cterm.freqAbs / (double)max); cterm.tf_idf = cterm.idf * cterm.freqNorm; cterm.cw = cterm.weight; cterm.ncw = cterm.weight / maxWeight; return(cterm); }
public void Add(object item) { if (item is IWeightTable) { IWeightTable item_IWeightTable = (IWeightTable)item; documents.Add(item_IWeightTable.name, item_IWeightTable); } else if (item is IWeightTableTerm) { IWeightTableTerm item_IWeightTableTerm = (IWeightTableTerm)item; //item_IWeightTableTerm. } }
/// <summary> /// Adds the specified term string. /// </summary> /// <param name="term_str">The term string.</param> /// <param name="AFreqPoints">a freq points to be added. Leave -1 for default (1)</param> public IWeightTableTerm Add(string term_str, int AFreqPoints = -1) { IWeightTableTerm term = GetMatchTermByName(term_str); if (term == null) { TWeightTableTerm tmp_termp = new TWeightTableTerm(); tmp_termp.name = term_str; tmp_termp.Define(term_str, term_str); return(Add(tmp_termp, AFreqPoints)); } Add(term, AFreqPoints); return(term); }
/// <summary> /// Non semantic matching /// </summary> /// <param name="table">The table.</param> /// <param name="term">The term.</param> /// <param name="callTableLevelAdd">Add supplied table, usully you don't want that</param> public void Add(IWeightTable table, IWeightTableTerm term, bool callTableLevelAdd = false) //, Int32 DFPoints = -1) { if (callTableLevelAdd) { table.Add(term); } if (table != AggregateDocument) { var t = AggregateDocument.Add(term); counter.AddVote(table, t); } else { } }
public bool isMatch(IWeightTableTerm other) { if (other is termSparkArm) { termSparkArm other_termSparkArm = (termSparkArm)other; if (other_termSparkArm.name.ToLower() == name.ToLower()) { return(true); } else { return(false); } } if (other is termSpark) { termSpark other_termSpark = (termSpark)other; if (other_termSpark.isMatch(this)) { return(true); } else { return(false); } } if (other is weightTableGenericTerm) { weightTableGenericTerm other_weightTableGenericTerm = (weightTableGenericTerm)other; if (other_weightTableGenericTerm.name.ToLower() == name.ToLower()) { return(true); } else { return(false); } } return(other.name.ToLower() == name.ToLower()); }
/// <summary> /// Updates the maximum AFreq and CWeight - if chagnes occured since last call. /// </summary> public void updateMaxValues() { _sum = 0; _max = int.MinValue; maxWeight = 0; _sumWeights = 0; foreach (var pair in terms.toList()) { IWeightTableTerm t = pair.Value; _sum = _sum + termsAFreq[t.name]; _max = Math.Max(_max, termsAFreq[t.name]); maxWeight = Math.Max(maxWeight, t.weight); _sumWeights = _sumWeights + t.weight; } Accept(); }
public IWeightTableTerm Add(IWeightTableTerm term, int AFreqPoints = -1) { weightTableTermCompiled cterm = CompileTerm(term); if (AFreqPoints > -1) { cterm.AFreqPoints = AFreqPoints; } if (AddOrUpdate(cterm)) { return(cterm); } else { return(null); } }
/// <summary> /// Adds the specified term - or updates existing /// </summary> /// <param name="term">The term.</param> /// <param name="AFreqPoints">a freq points.</param> /// <returns></returns> public virtual IWeightTableTerm Add(IWeightTableTerm term, int AFreqPoints = -1) { if (term == null) { return(null); } var t = GetMatchTerm(term); if (t == null) { // if (DFPoints == -1) DFPoints = 1; terms.TryAdd(term.name, term); if (AFreqPoints == -1) { AFreqPoints = term.AFreqPoints; } termsAFreq.TryAdd(term.name, AFreqPoints); t = term; } else { // if (DFPoints == -1) DFPoints = 0; // if (DFPoints > 1) DFPoints--; if (!termSingleAddAllowed) { t.weight += term.weight; if (AFreqPoints == -1) { AFreqPoints = 1; } termsAFreq[t.name] = termsAFreq[t.name] + AFreqPoints; } } if (parent != null) { parent.Add(this, term, false); } InvokeChanged(); return(t); }
private weightTableTermCompiled CompileTerm(IWeightTableTerm term) { weightTableTermCompiled termCompiled = null; if (term is weightTableTermCompiled) { termCompiled = term as weightTableTermCompiled; } else { termCompiled = new weightTableTermCompiled(); termCompiled.freqAbs = term.AFreqPoints; termCompiled.weight = term.weight; termCompiled.termName = term.name; List <string> __forms = term.GetAllForms(false); termCompiled.termInflections = __forms.toCsvInLine(); //termCompiled.termInflectionList.AddRange(__forms, true); } return(termCompiled); }
/// <summary> /// Gets the normalized cumulative weight for the term /// </summary> /// <param name="term">The term.</param> /// <returns></returns> public virtual double GetNWeight(string term) { IWeightTableTerm t = GetMatchTermByName(term); if (t == null) { return(0); } if (HasChanges) { updateMaxValues(); } if (t.weight == 0) { return(0); } if (maxWeight == 0) { return(0); } return(t.weight / maxWeight); }
/// <summary> /// Determines what kind of match this term might be to this table /// </summary> /// <param name="term">The term.</param> /// <returns></returns> public weightTableMatchResultEnum isMatchBySemantics(IWeightTableTerm term) { weightTableMatchResultEnum output = weightTableMatchResultEnum.none; if (ContainsKey(term.nominalForm)) { return(weightTableMatchResultEnum.hostTermName_and_needleTermName); } var cterm = GetIfExists(term.nominalForm, true); if (cterm != null) { return(weightTableMatchResultEnum.hostTermInstance_and_needleTermName); } var allForms = term.GetAllForms(); foreach (string form in allForms) { if (ContainsKey(form)) { return(weightTableMatchResultEnum.hostTermName_and_needleTermInstance); } } foreach (string form in allForms) { cterm = GetIfExists(form, true); if (cterm != null) { return(weightTableMatchResultEnum.hostTermInstance_and_needleTermName); } } return(output); }