public static header_map step1(doc_reader docs) { header_map ret = new header_map(); foreach (doc_reader.doc doc in docs) { ret.push(doc.title, new doc_reader.doc.location(doc.id)); //TODO 同义词 } return ret; }
public static header_map step1(doc_reader docs) { header_map ret = new header_map(); foreach (doc_reader.doc doc in docs) { ret.push(doc.title, new doc_reader.doc.location(doc.id)); //TODO 同义词 } return(ret); }
public static link_map step2(doc_reader docs, header_map links) { link_map ret = new link_map(); //双循环可以改成多线程做 foreach(doc_reader.doc doc in docs){ foreach(KeyValuePair<string, doc_reader.doc.location> link in links){ Regex law_regex = new Regex(link.Key); link_map temp= step2_each(doc, link, law_regex); ret.union(temp); } } return ret; }
public static link_map step2_each(doc_reader.doc doc, KeyValuePair<string, doc_reader.doc.location> link, Regex law_regex) { link_map ret = new link_map(); MatchCollection mc= law_regex.Matches(doc.text); foreach(Match iter in mc) { doc_reader.doc.location match_at= new doc_reader.doc.location(doc.id, iter.Index, iter.Length); if (step2_flip(doc, match_at, iter)) ret.push(match_at, link.Value); //TODO push <XXX>XX章 } return ret; }
public static link_map step2(doc_reader docs, header_map links) { link_map ret = new link_map(); //双循环可以改成多线程做 foreach (doc_reader.doc doc in docs) { foreach (KeyValuePair <string, doc_reader.doc.location> link in links) { Regex law_regex = new Regex(link.Key); link_map temp = step2_each(doc, link, law_regex); ret.union(temp); } } return(ret); }
public static bool step2_flip(doc_reader.doc doc, doc_reader.doc.location match_at, Match _match) { //TODO 过滤 return true; }
public void push(doc_reader.doc.location from, doc_reader.doc.location dest) { links.Add(from, dest); }
public void push(string name, doc_reader.doc.location dest) { try { links.Add(name, dest); }catch(ArgumentException e){ Console.WriteLine(e.Message+" "+name); } }