/// <summary> /// Initializes a new instance of the <see cref="termExploreModel"/> class. /// </summary> /// <param name="lemmaSource">The lemma source.</param> public termExploreModel(ITermLemma lemmaSource) { inputForm = lemmaSource.name; modelSource = termExploreModelSource.fromLemma; lemma = new termExploreItem(lemmaSource.name, lemmaSource.gramSet); foreach (TermInstance ins in lemmaSource.instances) { instances.Add(new termExploreItem(ins.name, ins.gramSet)); } }
public bool discoverGram(termExploreItem item, ILogBuilder loger, bool debug = true) { //List<termExploreItem> inst = new List<termExploreItem>(); //exploreModel.instances.ForEach(x => inst.Add(x)); //inst.Add(exploreModel); // instanceCountCollection<pos_type> pct = new instanceCountCollection<pos_type>(); bool failed = false; //// <--------------- Trying to resolve alone //foreach (termExploreItem item in inst) //{ if (loger != null) { loger.AppendLine("Item:" + item.inputForm); } instanceCountCollection <object> res = termDiscoveryResolver.resolveQuery(item.inputForm); res.reCalculate(); if (res.Count > 0) { List <object> sorted = res.getSorted(); if (item.gramSet.getPosType() != pos_type.none) { sorted.RemoveAll(x => x is pos_type); } gramFlags gf = new gramFlags(); if (sorted.Any(x => x is pos_type)) { gf.Set((pos_type)sorted.First(x => x is pos_type)); } //pct.AddInstance(gf.type, 1); var tl = posConverter.posTypeVsPattern[gf.type]; sorted.RemoveAll(x => !tl.Contains(x.GetType())); if (loger != null) { loger.AppendLine("Votes:"); for (int i = 0; i < Math.Max(sorted.Count(), 20); i++) { loger.Append(sorted[i].ToString() + "; "); } } if (sorted.Any(x => x is pos_gender)) { gf.Set((pos_gender)sorted.First(x => x is pos_gender)); } if (sorted.Any(x => x is pos_gramaticalCase)) { gf.Set((pos_gramaticalCase)sorted.First(x => x is pos_gramaticalCase)); } if (sorted.Any(x => x is pos_verbform)) { gf.Set((pos_verbform)sorted.First(x => x is pos_verbform)); } if (sorted.Any(x => x is pos_number)) { gf.Set((pos_number)sorted.First(x => x is pos_number)); } if (sorted.Any(x => x is pos_degree)) { gf.Set((pos_degree)sorted.First(x => x is pos_degree)); } if (sorted.Any(x => x is pos_person)) { gf.Set((pos_person)sorted.First(x => x is pos_person)); } if (loger != null) { loger.AppendLine("Final gram:" + gf.ToString()); } item.gramSet.Add(gf); } else { if (item.inputForm.Length < 4) { return(false); } //item.flags = termExploreItemEnumFlag.none; failed = true; } return(failed); }
/// <summary> /// Explores the with hunspell. /// </summary> /// <param name="item">The item.</param> /// <param name="log">The log.</param> /// <returns></returns> public static termExploreModel exploreWithHunspell(this termExploreItem item, ILogBuilder log) { termExploreModel output = new termExploreModel(); List <string> terms = new List <string>(); terms.Add(item.inputForm); throw new NotImplementedException(); //List<string> suggest = imbLanguageFrameworkManager.serbian.basic.hunspellEngine.Suggest(item.inputForm); //List<string> sug2 = new List<string>(); //suggest.ForEach(x=>sug2.Add(x.Replace("\\-", "-"))); //suggest = sug2; //if (Enumerable.Any(suggest)) //{ // int min_l = Enumerable.Min(suggest, x => x.Length); // List<string> possibleTerm = new List<string>(); // int tocut = min_l - item.inputForm.Length; // string start = item.inputForm; // if (tocut != 0) start = start.substring(tocut); // string rootComposite = ""; // int rootCompositeSplit = 0; // foreach (string sug in suggest) // { // if (!sug.Contains(" ")) // { // if (sug.Contains("-")) // { // int rcSplit = sug.IndexOf("-"); // if (rcSplit > rootCompositeSplit) // { // rootCompositeSplit = rcSplit; // rootComposite = sug.Substring(0, rootCompositeSplit).Trim(Enumerable.ToArray("-")); // } // } // else // { // if (sug.StartsWith(start)) // { // possibleTerm.Add(sug); // } // } // } // } // if (tocut == 0) // { // if (possibleTerm.Count == 0) // { // possibleTerm.AddRange(suggest); // } // } // possibleTerm.Add(item.inputForm); // if (rootCompositeSplit == 0) // { // rootComposite = possibleTerm.MinItem(x => x.Length); // } // suggest = possibleTerm.Clone(); // possibleTerm.Clear(); // string lemmaForm = ""; // foreach (string sug in suggest) // { // if (sug.Contains(rootComposite, StringComparison.CurrentCultureIgnoreCase)) // { // possibleTerm.Add(sug); // if (lemmaForm.isNullOrEmpty()) // { // lemmaForm = sug; // } // if (sug.Length < lemmaForm.Length) // { // lemmaForm = sug; // } // } // } // output.lemmaForm = lemmaForm; // output.rootWord = rootComposite; // output.inputForm = item.inputForm; // foreach (string sug in possibleTerm) // { // output.instances.Add(sug); // //log.log(sug); // } //} else //{ // output.lemmaForm = item.inputForm; // output.rootWord = item.inputForm; // output.inputForm = item.inputForm; //} ////log.log("Input term: " + item.inputForm); ////log.log("Root: " + output.rootWord); ////log.log("Lemma: " + output.lemmaForm); ////log.log("Instances: "); //output.ToString(log); return(output); }