public void genEEgraph(string reportPath, int kwLimitThreshold = 350, int entityOccurThreshold = 2, double polarityThresh = 0.45, bool PoleInvariant = false) { if (EkwEgg == null) { EkwEgg = new Dictionary <string, EEVertex>(); } DateTime startTime = DateTime.Now; foreach (KeyValuePair <string, EkwO> ekwo in EkwDict) { string E = ekwo.Key; EkwO ekwp = ekwo.Value; double pScore = ekwp.PScore; if (ekwp.Occur <= entityOccurThreshold) { continue; } EkwEgg.Add(E, new EEVertex(E)); foreach (string keyword in ekwp.kw.Keys) { //got the linker if (kwLexicon.ContainsKey(keyword) && kwLexicon[keyword].E.Count < kwLimitThreshold) { Dictionary <string, Meta> PotentialNeighbors = kwLexicon[keyword].E; foreach (string nborE in PotentialNeighbors.Keys) { double nborPScore = EkwDict[nborE].PScore; polarity pole = polarity.opn; if (nborPScore >= polarityThresh && pScore >= polarityThresh) { pole = polarity.pos; } else if (nborPScore <= -polarityThresh && pScore <= -polarityThresh) { pole = polarity.neg; } else if (nborPScore > 0 && pScore > 0) { if (nborPScore + pScore > 2 * polarityThresh) { pole = polarity.pos; } else { pole = polarity.obj; } } else if (nborPScore < 0 && pScore < 0) { if (nborPScore + pScore > 2 * polarityThresh) { pole = polarity.neg; } else { pole = polarity.obj; } } else if (nborPScore == 0 || pScore == 0) { pole = polarity.obj; } if (pole != polarity.opn || PoleInvariant) { EkwEgg[E].AddNeighbor(nborE, keyword, pole); } //added nbor } //finding nbors by kw } //this kw is in lex } //iterating over kw } //iterating over EkwDict DateTime endTime = DateTime.Now; TimeSpan ts = endTime - startTime; Console.WriteLine("EkwEG time to generate EE graph: " + ts); StreamWriter fs = new StreamWriter(reportPath + ".EkwEGg.csv", false); foreach (KeyValuePair <string, EEVertex> evert in EkwEgg) { fs.Write(evert.Key + "," + evert.Value.Neighbors.Count + ","); foreach (string nbor in evert.Value.Neighbors.Keys) { fs.Write(nbor + ";"); } fs.WriteLine(); } }
public void genEkwBigraph(string TEkwPpath, string reportPath = "a") { DateTime startTime = DateTime.Now; XmlDocument xDoc = new XmlDocument(); xDoc.Load(TEkwPpath); xDoc.Normalize(); XmlNode root = xDoc.DocumentElement; StreamWriter fse = new StreamWriter(reportPath + ".E.csv", false); StreamWriter fsk = new StreamWriter(reportPath + ".kw.csv", false); XmlNodeList TEkwPNodeList = root.SelectNodes("//TEkwP"); Console.WriteLine("E-kw"); Console.WriteLine("echo: building E-kw bigraph with E-kw matrix and kw-E matrix + kwLexicon"); int tekwp_i = 0; foreach (XmlNode tekwpNode in TEkwPNodeList) { if ((tekwp_i++) % 100 == 0) { Console.Write("\r" + tekwp_i); } //get E of this TEkwP string ENodeInText = tekwpNode.SelectSingleNode("E").InnerText; string kwNodeInText = tekwpNode.SelectSingleNode("kw").InnerText; string pScoreVal = tekwpNode.Attributes["pScore"].Value; double pScore = 0; double.TryParse(pScoreVal, out pScore); //get kw for this string[] kw = kwNodeInText.Split(",".ToCharArray()); //process each entity string[] E = ENodeInText.Split(",".ToCharArray()); //E-kw matrix foreach (string entity in E) { if (entity != null && entity != "") { if (!EkwDict.ContainsKey(entity)) { EkwDict.Add(entity, new EkwO(entity)); } EkwDict[entity].Occur++; EkwDict[entity].PScore += pScore; //register kws foreach (string keyw in kw) { if (keyw != null && keyw != "") { EkwDict[entity].AddKW(keyw, pScore); } } } }//built E-kw //kw-E matrix foreach (string keyword in kw) { if (keyword != null && keyword != "") { if (!kwEDict.ContainsKey(keyword)) { kwEDict.Add(keyword, new kwEo(keyword)); } kwEDict[keyword].Occur++; kwEDict[keyword].PScore += pScore; //register kws foreach (string en in E) { if (en != null && en != "") { kwEDict[keyword].AddEntity(en, pScore); } } } } //built kw-E } //E-kw bigraph with E-kw and kw-E Console.WriteLine("\r" + tekwp_i); //freshen up E-kw to remove lowfreq kw int kwCount_uf = 0, kwCount_f = 0; foreach (string entity in EkwDict.Keys) { kwCount_uf += EkwDict[entity].kw.Count; EkwDict[entity].freshen(0.35); kwCount_f += EkwDict[entity].kw.Count; } //keyword lexicon foreach (KeyValuePair <string, EkwO> ekwo in EkwDict) { EkwO this_Ekw = ekwo.Value; foreach (string kwl in this_Ekw.kw.Keys) { if (kwl != null && kwl != "") { if (!kwLexicon.ContainsKey(kwl)) { kwLexicon.Add(kwl, new kwEo(kwl)); } //kwLexicon[kwl].Occur++; //kwLexicon[kwl].PScore += this_Ekw.kw[kwl].PScore * this_Ekw.kw[kwl].Occur; kwLexicon[kwl].AddEntity(this_Ekw.E, this_Ekw.PScore); } } } foreach (string kwl in kwLexicon.Keys) { int occur = kwLexicon[kwl].Occur = kwEDict[kwl].Occur; kwLexicon[kwl].PScore = kwEDict[kwl].PScore / occur; if (occur > 1) { fsk.WriteLine(kwl + "," + occur + "," + Math.Log(occur) + "," + kwLexicon[kwl].PScore + "," + kwLexicon[kwl].E.Count); } } Console.WriteLine(kwCount_f + " kw of " + kwCount_uf); Console.WriteLine(kwLexicon.Count + " kw in lexicon"); //review int i = 0; foreach (KeyValuePair <string, EkwO> e_ekwo in EkwDict) { int occur = e_ekwo.Value.Occur; if (occur > 1) { fse.WriteLine(e_ekwo.Key + "," + occur + "," + Math.Log(occur) + "," + e_ekwo.Value.PScore + "," + e_ekwo.Value.kw.Count); i++; } } fse.Close(); fsk.Close(); Console.WriteLine(i + " of " + EkwDict.Count + " E occured more than once"); DateTime endTime = DateTime.Now; TimeSpan ts = endTime - startTime; Console.WriteLine("echo time to calculate: " + ts); }