public industryLemmaRankTable GetTable() { industryLemmaRankTable output = null; return(output); }
public industryLemmaRankTable process(webLemmaTermTable chunkTable, webLemmaTermTable termTable, industryLemmaRankTable output) { List <webLemmaTerm> allChunks = chunkTable.GetList(); var docSetFreq = allChunks.Where(x => x.documentSetFrequency > 1); instanceCountCollection <String> termCounter = new instanceCountCollection <string>(); aceDictionarySet <String, String> dict = new aceDictionarySet <string, string>(); foreach (webLemmaTerm chunk in docSetFreq) { var lemmas = chunk.nominalForm.SplitSmart(textMapBase.SEPARATOR, "", true, true); lemmas = lemmas.Where(x => x.Length > 2).ToList(); termCounter.AddInstanceRange(lemmas); foreach (String lm in lemmas) { foreach (String lmi in lemmas) { if (lmi != lm) { dict[lm].AddUnique(lmi); } } } } List <String> primaries = new List <string>(); foreach (var pair in termCounter) { if (termCounter[pair] > 1) { primaries.Add(pair); industryLemmaTerm lemma = output.GetOrCreate(pair); lemma.termType = industryLemmaTermType.primary; lemma.weight = settings.PrimaryTermFactor * termTable[lemma.name].weight; lemma.nominalForm = pair; output.AddOrUpdate(lemma); if (dict.ContainsKey(lemma.nominalForm)) { foreach (String secLemmas in dict[lemma.nominalForm]) { industryLemmaTerm lemmaSec = output.GetOrCreate(secLemmas); if (lemmaSec.termType == industryLemmaTermType.none) { lemmaSec.termType = industryLemmaTermType.secondary; lemmaSec.weight = settings.SecondaryTermFactor * termTable[lemmaSec.name].weight; lemmaSec.nominalForm = secLemmas; output.AddOrUpdate(lemmaSec); } } } } } //var reserveChunks = allChunks.Where(x => x.nominalForm.ContainsAny(primaries)); //aceDictionarySet<String, String> dictReserve = new aceDictionarySet<string, string>(); //foreach (webLemmaTerm chunk in reserveChunks) //{ // var lemmas = chunk.nominalForm.SplitSmart(textMapBase.SEPARATOR, "", true, true); // lemmas = lemmas.Where(x => x.Length > 2).ToList(); // String prim = lemmas.FirstOrDefault(x => primaries.Contains(x)); // if (!prim.isNullOrEmpty()) // { // foreach (String lm in lemmas) // { // if (prim != lm) // { // dictReserve[prim].AddUnique(lm); // } // } // } //} //foreach (String prim in primaries) //{ // if (dictReserve.ContainsKey(prim)) // { // foreach (String res in dictReserve[prim]) // { // industryLemmaTerm resLemma = output.GetOrCreate(res); // if (resLemma.termType == industryLemmaTermType.none) // { // resLemma.nominalForm = res; // resLemma.weight = settings.ReserveTermFactor *termTable[resLemma.name].weight; // resLemma.termType = industryLemmaTermType.reserve; // } // output.AddOrUpdate(resLemma); // } // } //} return(output); }