static void RetrieveDocuments(string sourceUrl, string lbl, string corpusFileOut, string cacheFileOut, string timeStart, string timeEnd) { Debug.Assert(!lbl.Contains("\t")); StreamWriter corpus = new StreamWriter(corpusFileOut, /*append=*/Utils.VerifyFileNameOpen(corpusFileOut), Encoding.UTF8); StreamWriter cache = new StreamWriter(cacheFileOut, /*append=*/Utils.VerifyFileNameOpen(cacheFileOut)); DataService service = new DataService(); Console.WriteLine("Retrieving document references ..."); string[][] docRefs = service.GetDocRefs(sourceUrl, timeStart, timeEnd); int i = 0; foreach (string[] row in docRefs) { string time = row[0]; string corpusId = row[1]; string docId = row[2]; i++; string cacheKey = corpusId + "\t" + docId + "\t" + lbl; if (!mCache.Contains(cacheKey)) { Console.WriteLine("Retrieving document # {0} / {1} ...", i, docRefs.Length); try { string txt = service.GetDoc(corpusId, docId, "txt", false/*ignored*/, /*changesOnly=*/false, time); if (!txt.StartsWith("*** ")) { txt = Utils.ToOneLine(txt, /*compact=*/true).Replace('\t', ' '); corpus.WriteLine(lbl + "\t" + txt); corpus.Flush(); cache.WriteLine(cacheKey); cache.Flush(); mCache.AddRange(GenerateCacheKeys(corpusId, docId, lbl)); } else { Console.WriteLine(txt); // error message from the service } } catch (Exception e) { Console.WriteLine(e.Message); Console.WriteLine(e.StackTrace); } } else { Console.WriteLine("*** Document found in cache."); } } corpus.Close(); cache.Close(); }