public void Load(String loc) { this.loc = loc; documentList = new Dictionary<string, Document>(); String nextLine = ""; StreamReader reader = new StreamReader(loc); int count = 0; int newsIndex = 0; while ((nextLine = reader.ReadLine()) != null) { Document newsData = JsonConvert.DeserializeObject<Document>(nextLine); TFIDF_Document tiarticle = new TFIDF_Document(); tiarticle.Title = newsData.Title; tiarticle.Author = newsData.Author; tiarticle.Time = newsData.Time; tiarticle.Tag = newsData.Tag; tiarticle.Content = newsData.Content; tiarticle.NewsID = "" + newsIndex++; documentList.Add(""+count, tiarticle); count++; } reader.Close(); }
private void MergeList() { mergedList.Clear(); tokenOringinalFormList.Clear(); for (int i = 0; i < docs.Length; i++) { TFIDF_Document tfidDoc = docs[i] as TFIDF_Document; foreach (Token t in tfidDoc.Tokens) { if (mergedList.ContainsKey(t.ProcessedContent)) { mergedList[t.ProcessedContent].ranking += t.Ranking; mergedList[t.ProcessedContent].rankingList[i] = t.Ranking; } else { Node node = new Node(); node.rankingList = new double[docs.Length]; node.userFactor = new double[4]; node.ranking = t.Ranking; node.rankingList[i] = t.Ranking; mergedList.Add(t.ProcessedContent, node); string rootWord = Stemmer.GetRootForm(t.OringinalContent); tokenOringinalFormList.Add(t.ProcessedContent, rootWord); } } } var tokenWeight = mergedList.OrderByDescending(pair => pair.Value.ranking).Take(BUFFER_SIZE); Dictionary <string, Node> tempList = new Dictionary <string, Node>(); foreach (KeyValuePair <string, Node> pair in tokenWeight) { tempList.Add(pair.Key, pair.Value); } mergedList.Clear(); mergedList = tempList; }
public void Load(String loc) { this.loc = loc; documentList = new Dictionary <string, Document>(); String nextLine = ""; StreamReader reader = new StreamReader(loc); int count = 0; int newsIndex = 0; while ((nextLine = reader.ReadLine()) != null) { Document newsData = JsonConvert.DeserializeObject <Document>(nextLine); TFIDF_Document tiarticle = new TFIDF_Document(); tiarticle.Title = newsData.Title; tiarticle.Author = newsData.Author; tiarticle.Time = newsData.Time; tiarticle.Tag = newsData.Tag; tiarticle.Content = newsData.Content; tiarticle.NewsID = "" + newsIndex++; documentList.Add("" + count, tiarticle); count++; } reader.Close(); }