internal void GetInstanceInternal(string denseStructFileName, ParallelOptions parallelOptions)
        {
            using (TextReader textReader = FileUtils.OpenTextStripComments(denseStructFileName))
            {
                //!!!similar code in mergedense method
                string header = textReader.ReadLine();
                ColSerialNumbers = ColSerialNumbersFromHeader(header, denseStructFileName);

                CounterWithMessages counterWithMessages = new CounterWithMessages("Reading " + denseStructFileName + " {0}", 1000, null, true);

                //We use ReadEachIndexedLine so that we can process the lines out of order (which is fastest) put still recover the original order of the rowKeys
                var indexRowKeyStructListQuery =
                    from lineAndIndex in FileUtils.ReadEachIndexedLine(textReader)//!!!05/18/2009 .AsParallel().WithDegreeOfParallelism(parallelOptions.DegreeOfParallelism)
                    select new { index = lineAndIndex.Value, rowKeyAndStructList = CreateRowKeyAndStructList(lineAndIndex.Key, denseStructFileName, counterWithMessages) };

                RowKeyToStoreList = new Dictionary <string, List <TStore> >(counterWithMessages.Index + 1);
                _indexOfRowKey    = new Dictionary <string, int>();
                foreach (var rowKeyAndStructList in indexRowKeyStructListQuery)
                {
                    RowKeyToStoreList.Add(rowKeyAndStructList.rowKeyAndStructList.Key, rowKeyAndStructList.rowKeyAndStructList.Value);
                    _indexOfRowKey.Add(rowKeyAndStructList.rowKeyAndStructList.Key, rowKeyAndStructList.index);
                }

                _rowKeys =
                    (from rowKeyAndIndex in _indexOfRowKey
                     orderby rowKeyAndIndex.Value
                     select rowKeyAndIndex.Key)
                    .ToList();
            }
        }
Пример #2
0
        private IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleEnumerable(HashSet <string> cidExcludeList, HashSet <string> snpExcludeSet)
        {
            CounterWithMessages counterWithMessages = new CounterWithMessages(SnpFile, compressionRatio: GZ ? .9 : 0);

            using (TextReader textReader =
#if !SILVERLIGHT
                       GZ ? SnpFile.UnGZip()  :
#endif
                       SnpFile.OpenText())
            {
                string line;
                while (null != (line = textReader.ReadLine()))
                {
                    counterWithMessages.Increment();
                    string[] field = line.Split('\t');
                    Helper.CheckCondition(field.Length == 4, "Expect lines of snp file to have four fields. " + line);
                    string snp        = field[0];
                    string cid        = field[1];
                    string value      = field[2];
                    double confidence = double.Parse(field[3]);
                    Helper.CheckCondition(value.Length == 2 && value.All(c => "ACTG".Contains(c)), () => "Expect values in snp file to be a pair of ACT or G. " + value);
                    if (cidExcludeList.Contains(cid) || confidence < MissingThreshold || snpExcludeSet.Contains(snp))
                    {
                        continue; //not break;
                    }

                    yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create(value[0], value[1])));
                }
            }

            counterWithMessages.Finished();
        }
        internal void InternalGetInstance(Matrix <string, string, TValue> inputMatrix, ParallelOptions parallelOptions)
        {
            var selectRowsAndColsView = inputMatrix as SelectRowsAndColsView <string, string, TValue>;

            if (null != selectRowsAndColsView && selectRowsAndColsView.ParentMatrix is DenseStructMatrix <TStore, TValue> ) //We optimize this case
            {
                var parentMatrix = (DenseStructMatrix <TStore, TValue>)selectRowsAndColsView.ParentMatrix;
                Parallel.ForEach(RowKeys, parallelOptions, rowKey =>
                {
                    List <TStore> oldStoreList = parentMatrix.RowKeyToStoreList[rowKey];
                    List <TStore> newStoreList = RowKeyToStoreList[rowKey];
                    foreach (int oldColIndex in selectRowsAndColsView.IndexOfParentColKey)
                    {
                        newStoreList.Add(oldStoreList[oldColIndex]);
                    }
                });
            }
            else
            {
                CounterWithMessages counterWithMessages = new CounterWithMessages("Creating new DenseStructMatrix, working on row #{0} of {1}", 1000, RowCount);
                Parallel.ForEach(RowKeys, parallelOptions, rowKey =>
                {
                    counterWithMessages.Increment();
                    foreach (string colKey in ColKeys)
                    {
                        TValue value;
                        if (inputMatrix.TryGetValue(rowKey, colKey, out value))
                        {
                            this[rowKey, colKey] = value;
                        }
                    }
                });
            }
        }
Пример #4
0
        //!!!similar to GetInstanceFromDenseStructFileNameInternal


        /// <summary>
        /// Get a instance from a file in a RowKeys format
        /// </summary>
        /// <param name="rowKeysStructFileName">The rowKeys file</param>
        /// <param name="parallelOptions">A ParallelOptions instance that configures the multithreaded behavior of this operation.</param>
        /// <param name="fileAccess">A FileAccess value that specifies the operations that can be performed on the file. Defaults to 'Read'</param>
        /// <param name="fileShare">A FileShare value specifying the type of access other threads have to the file. Defaults to 'Read'</param>
        protected void GetInstanceFromRowKeysStructFileNameInternal(string rowKeysStructFileName, ParallelOptions parallelOptions, FileAccess fileAccess = FileAccess.Read, FileShare fileShare = FileShare.Read)
        {
            lock (this)
            {
                string firstLineOrNull = FileUtils.ReadLine(rowKeysStructFileName);
                Helper.CheckCondition(null != firstLineOrNull, "Surprised by empty file. " + rowKeysStructFileName);
                Helper.CheckCondition(!firstLineOrNull.StartsWith(FileUtils.CommentHeader), "Comments are not supported in RowKeysAnsi and related files");


                RowKeyToFilePosition = new Dictionary <string, long>();
                FileAccess           = fileAccess;
                FileShare            = fileShare;


                using (TextReader textReader = File.OpenText(rowKeysStructFileName))
                {
                    string   colKeysLineOrNull = textReader.ReadLine();
                    string[] varAndColKeys     = colKeysLineOrNull.Split('\t');
                    if (!varAndColKeys[0].Equals("rowKey"))
                    {
                        throw new MatrixFormatException("Expect first row's first value to be 'rowKey'"); //!!!rowKey
                    }
                    ColSerialNumbers = new SerialNumbers <string>(varAndColKeys.Skip(1));
                    _rowKeys         = new List <string>();
                    if (null == colKeysLineOrNull)
                    {
                        throw new MatrixFormatException("Surprised by empty file. " + rowKeysStructFileName);
                    }


                    //!!!not really thread-safe
                    string denseStructFileNameInFile = textReader.ReadLine();
                    DenseStructFileName = Path.Combine(Path.GetDirectoryName(rowKeysStructFileName), denseStructFileNameInFile);

                    CounterWithMessages counterWithMessages = new CounterWithMessages("Reading rowKey file to find location of rows, #{0}", 10000, null);

                    string line = null;
                    while (null != (line = textReader.ReadLine()))
                    {
                        counterWithMessages.Increment();
                        string[] rowKeyAndPosition = line.Split('\t');
                        if (rowKeyAndPosition.Length != 2)
                        {
                            throw new MatrixFormatException("Expect rows to have two columns");
                        }
                        string rowKey   = rowKeyAndPosition[0];
                        long   position = long.Parse(rowKeyAndPosition[1]);
                        _rowKeys.Add(rowKey);
                        RowKeyToFilePosition.Add(rowKey, position);
                    }
                }
                Console.WriteLine("all lines read from " + rowKeysStructFileName);

                _indexOfRowKey = RowKeys.Select((key, index) => new { key, index }).ToDictionary(keyAndIndex => keyAndIndex.key, keyAndIndex => keyAndIndex.index);
                ValueTester(rowKeysStructFileName);
            }
        }
Пример #5
0
        public Matrix <string, string, double> ToKernel(Matrix <string, string, double> unnormalizedInput, int?cidInBatchCountOrNull = null)
        {
            if (null == cidInBatchCountOrNull)
            {
                return(ToKernel(unnormalizedInput));
            }

            var input = RowNormalizer.Normalize(unnormalizedInput);


            var cidListList = SpecialFunctions.DivideListIntoEqualChunksFromChunkSize <string>(input.ColKeys, cidInBatchCountOrNull.Value);

            Console.WriteLine("cids divided into {0} batches of about {1}", cidListList.Count, cidInBatchCountOrNull);
            Helper.CheckCondition(cidListList.Sum(l => l.Count) == input.ColCount, "real assert");


            var counterWithMessages = new CounterWithMessages("kernel combintations ", 1, (cidListList.Count * cidListList.Count + cidListList.Count) / 2);

            var kernelPieces2D = new Matrix <string, string, double> [cidListList.Count, cidListList.Count];

            for (int i = 0; i < cidListList.Count; ++i)
            {
                Console.WriteLine("Loading batch {0}, size {1}x{2}", i, cidListList[i].Count, unnormalizedInput.RowCount);
                var matrixI = input.SelectColsView(cidListList[i]).ToShoMatrix(verbose: true);

                Parallel.For(i, cidListList.Count, ParallelOptionsScope.Current, j =>
                {
                    Console.WriteLine("Loading batch {0}, size {1}x{2}", j, cidListList[j].Count, unnormalizedInput.RowCount);

                    if (i == j)
                    {
                        ShoMatrix kii        = JustKernel(matrixI);
                        kernelPieces2D[i, i] = kii;
                    }
                    else
                    {
                        var matrixJ          = input.SelectColsView(cidListList[j]).ToShoMatrix(verbose: true);
                        ShoMatrix kij        = JustKernel(matrixI, matrixJ);
                        kernelPieces2D[i, j] = kij;
                        kernelPieces2D[j, i] = kij.TransposeView().ToShoMatrix();
                    }
                    counterWithMessages.Increment();
                });
            }
            counterWithMessages.Finished();


            var output = MatrixExtensions.MergeRowsAndColsView(kernelPieces2D);

            Helper.CheckCondition(output.RowKeys.SequenceEqual(output.ColKeys) && output.ColKeys.SequenceEqual(unnormalizedInput.ColKeys), "Assert: MergeRows isn't working as expected");

            KernelNormalizeInPlace(ref output);
            return(output);
        }
Пример #6
0
        public static TMatrix StandardizeGToCreateX <TMatrix>(int maxValue, Matrix <string, string, double> gMatrix,
                                                              MatrixFactoryDelegate <TMatrix, string, string, double> zeroMatrixFractory,
                                                              ParallelOptions parallelOptions, bool onlyMeanCenter = false) where TMatrix : Matrix <string, string, double>
        {
            Console.WriteLine("StandardizeGToCreateX");
            //var xMatrix = DenseMatrix<string, string, double>.CreateDefaultInstance(gMatrix.RowKeys, gMatrix.ColKeys, double.NaN); //Inits to 0
            TMatrix xMatrix = zeroMatrixFractory(gMatrix.RowKeys, gMatrix.ColKeys, double.NaN);  //Inits to 0

            Helper.CheckCondition(xMatrix.GetValueOrMissing(0, 0) == 0, "xMatrix must start init'ed to zeros, but even at xMatrix[0,0] it is not 0");
            //First create x matrix
            //Parallel.ForEach(gMatrix.RowKeys, parallelOptions, var =>
            var counterWithMessages = new CounterWithMessages("StandardizeGToCreateX: row #{0}", 100, gMatrix.RowCount);

            foreach (string var in gMatrix.RowKeys)
            {
                counterWithMessages.Increment();

                //The paper divides by (2+2*Count) because its values are 0,1,2. Because ours are 0,1, we divide by (2+Count)
                var           row = gMatrix.SelectRowsView(var);
                List <double> nonMissingValues = row.Values.ToList();
                double        rowSum           = nonMissingValues.Sum();
                double        rowMean          = rowSum / (double)nonMissingValues.Count;
                double        piSmoothedCount  = (1.0 + rowSum)
                                                 / (2.0 + maxValue * (double)nonMissingValues.Count);
                double stdDevPi = Math.Sqrt(piSmoothedCount * (1 - piSmoothedCount));
                Helper.CheckCondition(!double.IsNaN(stdDevPi), "stdDevPi is NaN outside loop, likely because data input was not in 0/1/2 format as expected");

                Parallel.ForEach(row.RowKeyColKeyValues, parallelOptions, triple =>
                {
                    string cid    = triple.ColKey;
                    double gValue = triple.Value;
                    double xValue;
                    if (onlyMeanCenter)
                    {
                        xValue = (gValue - rowMean);
                    }
                    else
                    {
                        xValue = (gValue - rowMean) / stdDevPi;
                    }

                    xMatrix[var, cid] = xValue;
                    //if (doubleUp)
                    //{
                    //    xMatrix[new Pair<string, bool>(var, true), cid] = -xValue;
                    //}
                });
                //});
            }
            Console.WriteLine();
            return(xMatrix);
        }
        internal void Prenormalize(PidAndHlaSet pidAndHlaSet, Linkdis linkdis)
        {
            PhaseToLogProb       = new Dictionary <UOPair <LinkedList1 <HlaMsr1> >, double>();
            UnphaseToLogProb     = new Dictionary <LinkedList1 <UOPair <HlaMsr1> >, double>();
            LogTotal             = double.NegativeInfinity;
            BadHlaMsr1NameOrNull = null;
            UsedLowerResModel    = false;

            //CounterWithMessages abstractPhaseCounter = CounterWithMessages.GetInstance("\tabstract phase index = {0}", 1, null);

            try
            {
                foreach (var phaseAbstract in pidAndHlaSet.GetPhasedEnumeration())
                {
                    //abstractPhaseCounter.Increment();

                    var firstHlaListToProb  = linkdis.CreateHlaListToProb(phaseAbstract.First);
                    var secondHlaListToProb = linkdis.CreateHlaListToProb(phaseAbstract.Second);
                    if (firstHlaListToProb.Count * secondHlaListToProb.Count > linkdis.CombinationLimit)
                    {
                        throw new CombinationLimitException("The combinationLimit was exceeded. " + linkdis.CombinationLimit.ToString());
                    }

                    CounterWithMessages groundPhaseCounter = CounterWithMessages.GetInstance("\t\tground phase index = {0}", 1000, null);
                    foreach (var firstHlaListAndProb in firstHlaListToProb)
                    {
                        foreach (var secondHlaListAndProb in secondHlaListToProb)
                        {
                            groundPhaseCounter.Increment();

                            var phaseGrounded = UOPair <LinkedList1 <HlaMsr1> > .GetInstance(firstHlaListAndProb.Key, secondHlaListAndProb.Key);

                            var unphasedGrounded = MakeUnphased(phaseGrounded);

                            double prob = firstHlaListAndProb.Value.Key * secondHlaListAndProb.Value.Key;
                            UsedLowerResModel |= firstHlaListAndProb.Value.Value || secondHlaListAndProb.Value.Value;
                            double logProb = Math.Log(prob);


                            LogSum(PhaseToLogProb, phaseGrounded, logProb);
                            LogSum(UnphaseToLogProb, unphasedGrounded, logProb);
                            LogTotal = SpecialFunctions.LogSum(LogTotal, logProb);
                        }
                    }
                }
            }
            catch (HlaNotInModelException e)
            {
                CreateNoAnswerAnswer(pidAndHlaSet, e);
            }
        }
Пример #8
0
        public virtual Matrix <string, string, LinearTransform> LinearTransformMatrix(Matrix <string, string, double> inputMatrix)
        {
            var counterWithMessages   = new CounterWithMessages("RowNormalizing ", null, inputMatrix.RowCount, quiet: !Verbose);
            var linearTransformMatrix = DenseMatrix <string, string, LinearTransform> .CreateDefaultInstance(inputMatrix.RowKeys, new[] { "" }, null);

            Parallel.ForEach(inputMatrix.AppendIndex(), ParallelOptionsScope.Current, unnormalizedListAndIndex =>
            {
                counterWithMessages.Increment();
                var linearTransform = CreateLinearTransform(unnormalizedListAndIndex.Item1, inputMatrix.RowKeys[unnormalizedListAndIndex.Item2]);
                linearTransformMatrix[unnormalizedListAndIndex.Item2, 0] = linearTransform;
            });
            counterWithMessages.Finished();
            return(linearTransformMatrix);
        }
Пример #9
0
        private static Dictionary <string, List <double> > CreateCidToDoubleList(Matrix <string, string, double> matrix)
        {
            CounterWithMessages counterWithMessages = new CounterWithMessages("loading cid columns ", null, matrix.ColCount, quiet: matrix.ColCount < 10);
            var listOfLists     = ((IList <IList <double> >)matrix.TransposeView());
            var cidToDoubleList =
                (from colKey in matrix.ColKeys
                 .AsParallel().WithParallelOptionsScope()
                 let colIndex = matrix.IndexOfColKey[colKey]
                                let doubleList = CreateDoubleList(colIndex, listOfLists, counterWithMessages)
                                                 select Tuple.Create(colKey, doubleList)
                ).ToDictionary();

            counterWithMessages.Finished();
            return(cidToDoubleList);
        }
Пример #10
0
        virtual public Matrix <string, string, T> Filter <T>(Matrix <string, string, T> predictorIn, Matrix <string, string, T> target)
        {
            var counterWithMessages = new CounterWithMessages("rowFilter " + ToString(), null, predictorIn.RowCount, quiet: !Verbose);
            var goodRowKeySet       =
                (
                    from rowKey in predictorIn.RowKeys
                    .AsParallel().WithParallelOptionsScope()
                    where AlwaysTrue(counterWithMessages) && IsGood(predictorIn.SelectRowsView(rowKey), target)
                    select rowKey
                ).ToHashSet();

            counterWithMessages.Finished();

            var predictorOut = predictorIn.SelectRowsView(predictorIn.RowKeys.Intersect(goodRowKeySet));

            return(predictorOut);
        }
        /// <summary>
        /// Writes the matrix to textWriter.
        /// The first line is "var" TAB and then the tab-delimited col keys.
        /// Next is one line per row key. Each line is the row key TAB and then all the row's values with no delimiters.
        /// Delimiters are not needed because each value is represented with a fixed number of characters.
        /// Values may include the fixed-number-of-characters version of the special Missing value.
        /// </summary>
        /// <param name="textWriter">The textWriter to write to.</param>
        /// <param name="parallelOptions">Options for controlling any parallelism.</param>
        public void Write(TextWriter textWriter, ParallelOptions parallelOptions)
        {
            textWriter.WriteLine(Helper.CreateTabString("var", ColSerialNumbers.ItemList.StringJoin("\t")));

            var valueStringQuery =
                from rowKey in RowKeys.AsParallel().AsOrdered().WithDegreeOfParallelism(parallelOptions.MaxDegreeOfParallelism)
                let colIndexToVal = FullLengthStoreList(rowKey)
                                    let valString = StoreListToString(colIndexToVal)
                                                    select rowKey + "\t" + valString;

            CounterWithMessages counterWithMessages = new CounterWithMessages("Writing denseStructMatrix {0} of {1}", 1000, RowCount);

            foreach (var rowKeyAndValString in valueStringQuery)
            {
                counterWithMessages.Increment();
                textWriter.WriteLine(rowKeyAndValString);
            }
        }
Пример #12
0
        private IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleEnumerable()
        {
            CounterWithMessages counterWithMessages = new CounterWithMessages("Reading " + SnpFile.Name, messageIntervalOrNull: 1000);

            using (TextReader textReader = SnpFile.OpenText())
            {
                string headerLine = textReader.ReadLine();
                Helper.CheckCondition(headerLine != null, "Expect file to contain a first line");
                string[] headerFields = headerLine.Split('\t');
                Helper.CheckCondition(headerFields.Length > 0 && headerFields[0] == "", "Expect first column of first line to be blank");

                string line;
                while (null != (line = textReader.ReadLine()))
                {
                    counterWithMessages.Increment();
                    string[] fields = line.Split('\t');
                    Helper.CheckCondition(fields.Length == headerFields.Length, "Expect all lines to have the same # of columns");
                    string cid = fields[0];
                    for (int snpIndex = 1; snpIndex < headerFields.Length; ++snpIndex) // start at one to skip over 1st column
                    {
                        string        snp         = headerFields[snpIndex];
                        string        valueInFile = fields[snpIndex];
                        UOPair <char> uoPair;
                        if (valueInFile == "-")
                        {
                            continue; // not break;
                        }
                        else if (valueInFile.Length == 1)
                        {
                            char c = valueInFile[0];
                            Helper.CheckCondition("ACTG".Contains(c), () => "Expect values in snp file to be ACT or G. " + valueInFile);
                            uoPair = UOPair.Create(c, c);
                        }
                        else
                        {
                            Helper.CheckCondition(valueInFile.Length == 3 && valueInFile[1] == '/' && "ACTG".Contains(valueInFile[0]) && "ACTG".Contains(valueInFile[2]), () => "Expect longer values in snp file be of the form 'a/b' where a & b are ACT or G");
                            uoPair = UOPair.Create(valueInFile[0], valueInFile[2]);
                        }
                        yield return(RowKeyColKeyValue.Create(snp, cid, uoPair));
                    }
                }
            }
        }
Пример #13
0
        private IEnumerable <RowKeyColKeyValue <string, string, UOPair <char> > > TripleEnumerable()
        {
            //int? totalLineCountOrNull = null;
            //int? messageIntervalOrNull = 10000;
            //using (TextReader textReader = SnpFile.OpenText())
            //{
            //    string line = textReader.ReadLine();
            //    if (null != line || line.Length == 0)
            //    {
            //        totalLineCountOrNull = (int?)(SnpFile.Length / (long)(line.Length +  2 /*line end*/));
            //        messageIntervalOrNull = null;
            //    }
            //}


            CounterWithMessages counterWithMessages = new CounterWithMessages(SnpFile); //"Reading " + SnpFile.Name, messageIntervalOrNull, totalLineCountOrNull);

            using (TextReader textReader = SnpFile.OpenText())
            {
                string line;
                while (null != (line = textReader.ReadLine()))
                {
                    counterWithMessages.Increment();
                    string[] field = line.Split('\t');
                    Helper.CheckCondition(field.Length == 3, "Expect lines of snp file to have three fields. " + line);
                    string cid   = field[0];
                    string snp   = field[1];
                    string value = field[2];
                    if (value == "00")
                    {
                        continue; //not break;
                    }

                    Helper.CheckCondition(value.Length == 2 && value.All(c => "ACTG".Contains(c)), () => "Expect values in snp file to be a pair of ACTG or 00. " + value);

                    yield return(RowKeyColKeyValue.Create(snp, cid, UOPair.Create(value[0], value[1])));
                }
                counterWithMessages.Finished();
            }
        }
Пример #14
0
        /// <summary>
        /// Writes a matrix with char values in DenseAnsi format to a textWriter. Does not need to convert to DenseAnsi format.
        /// </summary>
        /// <param name="matrix">The matrix to write</param>
        /// <param name="textWriter">The stream to write to</param>
        /// <param name="parallelOptions">A ParallelOptions instance that configures the multithreaded behavior of this operation.</param>
        public static void WriteDenseAnsi(this Matrix <string, string, char> matrix, TextWriter textWriter, ParallelOptions parallelOptions, bool verbose = false)
        {
            DenseAnsi denseAnsi = matrix as DenseAnsi;

            if (null != denseAnsi)
            {
                denseAnsi.WriteDenseAnsi <char>(textWriter, parallelOptions, verbose);
                return;
            }

            var counterWithMessages = new CounterWithMessages("writeDenseAnsi {0} {1} ", 1000, matrix.RowCount, !verbose);
            var lineQuery           =
                from rowKey in matrix.RowKeys
                .AsParallel().AsOrdered().WithDegreeOfParallelism(parallelOptions.MaxDegreeOfParallelism)
                select CreateLine(matrix, rowKey, counterWithMessages);

            textWriter.WriteLine("var\t{0}", matrix.ColKeys.StringJoin("\t"));
            foreach (string line in lineQuery)
            {
                textWriter.WriteLine(line);
            }
        }
Пример #15
0
 //!!!seems too specific
 /// <summary>
 /// Returns the contents of a files in PaddedDouble format as a sequence of string arrays in sparse file format.
 /// Saves memory by never creating a PaddedDouble instance.
 /// </summary>
 /// <param name="filePattern">Files in PaddedDouble format</param>
 /// <param name="zeroIsOK">tells if it's OK if not files match parts of the file pattern.</param>
 /// <param name="fileMessageOrNull">A string containing '{0}' to write as each file is opened.</param>
 /// <param name="counterWithMessages">Send status messages to standard output</param>
 /// <returns>A sequence of string arrays. Each string array has three values: the var, the cid, and the val.</returns>
 public static IEnumerable <string[]> EachSparseLine(string filePattern,
                                                     bool zeroIsOK, string fileMessageOrNull, CounterWithMessages counterWithMessages)
 {
     return(DenseStructMatrix <double, double> .EachSparseLine(filePattern, StringToStoreList, StoreToSparseVal,
                                                               zeroIsOK, fileMessageOrNull, StaticStoreMissingValue, counterWithMessages));
 }
        static void Main(string[] args)
        {
            //HlaMsr1Factory.UnitTest();

            try
            {
                ArgCollection argCollection = ArgCollection.GetInstance(args);

                string ethnicityName = argCollection.ExtractOptional <string>("ethnicity", "").ToLowerInvariant();
                SpecialFunctions.CheckCondition(Linkdis.EthnicityNameLowerList().Contains(ethnicityName), string.Format("'-ethnicity ETHNICITY' is required, where ETHNICITY is " + Linkdis.EthnicityNameMixedList().StringJoin(", ")));
                int  outputLineLimit  = argCollection.ExtractOptional <int>("outputLineLimit", 100000);
                int  combinationLimit = argCollection.ExtractOptional <int>("combinationLimit", 10000);
                bool isSparse         = argCollection.ExtractOptionalFlag("sparse");

                argCollection.CheckNoMoreOptions(3);

                string inputFileName          = argCollection.ExtractNext <string>("inputFile");
                string phasedOutputFileName   = argCollection.ExtractNext <string>("phasedOutputFile");
                string unphasedOutputFileName = argCollection.ExtractNext <string>("unphasedOutputFile");
                argCollection.CheckThatEmpty();

                Linkdis linkdis = Linkdis.GetInstance(ethnicityName, combinationLimit);

                string versionName = string.Format("MSCompBio HLA Completion v. {0}", GetVersionString());


                CounterWithMessages pidCounter = CounterWithMessages.GetInstance("Pid index = {0}", 1, null);

                int outputLineIndex = -1;
                using (TextWriter phasedTextWriter = File.CreateText(phasedOutputFileName),
                       unphasedTextWriter = File.CreateText(unphasedOutputFileName))
                {
                    phasedTextWriter.WriteLine(versionName + "\n");
                    unphasedTextWriter.WriteLine(versionName + "\n");

                    phasedTextWriter.WriteLine("pid" + "\t" + PhasedExpansion.Header);
                    unphasedTextWriter.WriteLine("pid" + "\t" + UnphasedExpansion.Header);
                    outputLineIndex += 6;

                    HashSet <string> warningSet = new HashSet <string>();
                    using (TextReader textReader = File.OpenText(inputFileName))
                    {
                        foreach (PidAndHlaSet pidAndHlaSet in isSparse ? PidAndHlaSet.GetEnumerationSparse(textReader) : PidAndHlaSet.GetEnumerationDense(textReader))
                        {
                            pidCounter.Increment();
                            warningSet.UnionWith(pidAndHlaSet.WarningSet);

                            ExpansionCollection expansionCollectionOrNull = linkdis.ExpandOrNullIfTooMany(pidAndHlaSet);

                            if (null == expansionCollectionOrNull)
                            {
                                phasedTextWriter.WriteLine(pidAndHlaSet.Pid + "\t" + PhasedExpansion.TooManyCombinationsMessage());
                                unphasedTextWriter.WriteLine(pidAndHlaSet.Pid + "\t" + UnphasedExpansion.TooManyCombinationsMessage());
                                warningSet.Add(string.Format("Error: Too many combinations, case {0} skipped", pidAndHlaSet.Pid));
                                outputLineIndex += 2;
                                if (outputLineIndex > outputLineLimit)
                                {
                                    goto TOOMANYLINES;
                                }
                            }
                            else
                            {
                                foreach (PhasedExpansion phasedExpansion in expansionCollectionOrNull.Phased())
                                {
                                    string phasedLine = pidAndHlaSet.Pid + "\t" + phasedExpansion.ToString();
                                    phasedTextWriter.WriteLine(phasedLine);
                                    if (phasedExpansion.BadHlaNameOrNull != null)
                                    {
                                        warningSet.Add(phasedLine);
                                    }
                                    ++outputLineIndex;
                                    if (outputLineIndex > outputLineLimit)
                                    {
                                        goto TOOMANYLINES;
                                    }
                                }

                                foreach (UnphasedExpansion unphasedExpansion in expansionCollectionOrNull.Unphased())
                                {
                                    string unphasedLine = pidAndHlaSet.Pid + "\t" + unphasedExpansion.ToString();
                                    unphasedTextWriter.WriteLine(unphasedLine);
                                    if (unphasedExpansion.BadHlaNameOrNull != null)
                                    {
                                        warningSet.Add(unphasedLine);
                                    }

                                    ++outputLineIndex;
                                    if (outputLineIndex > outputLineLimit)
                                    {
                                        goto TOOMANYLINES;
                                    }
                                }
                            }
                        }
                    }

                    goto INANYCASE;
TOOMANYLINES:
                    string tooManyLinesMessage = string.Format("ERROR: The line limit of {0} was reached and output was ended early", outputLineLimit);
                    phasedTextWriter.WriteLine(tooManyLinesMessage);
                    unphasedTextWriter.WriteLine(tooManyLinesMessage);
                    warningSet.Add(tooManyLinesMessage);
INANYCASE:
                    Console.Error.WriteLine(warningSet.StringJoin("\n"));
                }
            }
            catch (Exception exception)
            {
                Console.WriteLine(exception.Message);
                if (exception.InnerException != null)
                {
                    Console.WriteLine(exception.InnerException.Message);
                }

                Console.Error.WriteLine(@"
 
USAGE 

HlaCompletion -ethnicity ETHNICITY [-outputLineLimit 100000] [-sparse] [-combinationLimit 10000] inputFile phaseFile unphaseFile 
where ETHNICITY is {0}
'outputLineLimit' limits the total lines of output. If it is reached, a warning message is written as the last line of the output.
'combinationLimit' limits the number of combinations of HLAs consider in one phase for one case.
        It is is reached, an error message is output for that case in place of results.
'-sparse' reads files in sparse format
 
", Linkdis.EthnicityNameMixedList().StringJoin(", "));

                System.Environment.Exit(-1);
            }
        }
        //!!!This seems too specific. Better would be one that returns RowKeyColKeyValue from a file and then that could be changed to string[] outside this class.
        internal static IEnumerable <string[]> EachSparseLine(string filePattern,
                                                              StaticStringToStoreListDelegate staticStringiToStoreListDelegate,
                                                              Converter <TStore, string> StoreToSparseValueDelegate,
                                                              bool zeroIsOK, string fileMessageOrNull, TStore storeMissingValue, CounterWithMessages counterWithMessages)
        {
            foreach (string fileName in FileUtils.GetFiles(filePattern, zeroIsOK))
            {
                if (null != fileMessageOrNull)
                {
                    Console.WriteLine(fileMessageOrNull, fileName);
                }

                using (TextReader textReader = FileUtils.OpenTextStripComments(fileName))
                {
                    string header = textReader.ReadLine();
                    SerialNumbers <string> colSerialNumberCollection = ColSerialNumbersFromHeader(header, fileName);

                    string line;
                    while (null != (line = textReader.ReadLine()))
                    {
                        var           rowKeyAndStructList = CreateRowKeyAndStructList(line, filePattern, colSerialNumberCollection.Count, staticStringiToStoreListDelegate, counterWithMessages);
                        string        rowKey     = rowKeyAndStructList.Key;
                        List <TStore> structList = rowKeyAndStructList.Value;
                        for (int colIndex = 0; colIndex < colSerialNumberCollection.Count; ++colIndex)
                        {
                            TStore store = structList[colIndex];
                            if (!store.Equals(storeMissingValue)) //OK to use Equals because TStore can't be null
                            {
                                string   val         = StoreToSparseValueDelegate(store);
                                string[] stringArray = new string[] { rowKey, colSerialNumberCollection.GetItem(colIndex), val };
                                yield return(stringArray);
                            }
                        }
                    }
                }
            }
        }
Пример #18
0
 //Create a method just so we can have the side effect of counting with counterWithMessages
 private static List <double> CreateDoubleList(int colIndex, IList <IList <double> > listOfLists, CounterWithMessages counterWithMessages)
 {
     counterWithMessages.Increment();
     return(listOfLists[colIndex].ToList());
 }
        static private KeyValuePair <string, List <TStore> > CreateRowKeyAndStructList(string line, string denseStructFileName,
                                                                                       int colCount, StaticStringToStoreListDelegate stringToStoreListDelegate,
                                                                                       CounterWithMessages counterWithMessages)
        {
            counterWithMessages.Increment();
            string valueString;
            string rowKey = SplitVarLine(line, denseStructFileName, colCount, out valueString);

            List <TStore> structList = stringToStoreListDelegate(valueString, colCount);

            if (structList.Count != colCount)
            {
                throw new MatrixFormatException("Every data string should have a value per col. " + rowKey);
            }
            return(new KeyValuePair <string, List <TStore> >(rowKey, structList));
        }
 //05/18/2009 seems slow to use the static methods
 private KeyValuePair <string, List <TStore> > CreateRowKeyAndStructList(string line, string denseStructFileName,
                                                                         CounterWithMessages counterWithMessages)
 {
     return(CreateRowKeyAndStructList(line, denseStructFileName, ColCount, (line1, colCount) => StringToStoreList(line1), counterWithMessages));
 }
Пример #21
0
 public bool AlwaysTrue(CounterWithMessages counterWithMessages)
 {
     counterWithMessages.Increment();
     return(true);
 }
Пример #22
0
        protected void GetInstanceFromRowKeysStructFileNameInternal(string rowKeysStructFileName, ParallelOptions parallelOptions, FileAccess fileAccess = FileAccess.Read, FileShare fileShare = FileShare.Read)
        {
            // parallelOptions is not currently used, but it is need so that this method will have the same signature as other, similar methods.
            lock (this)
            {
                string firstLineOrNull = FileUtils.ReadLine(rowKeysStructFileName);
                Helper.CheckCondition(null != firstLineOrNull, Properties.Resource.ExpectedFileToHaveData, rowKeysStructFileName);
                Helper.CheckCondition(!firstLineOrNull.StartsWith(FileUtils.CommentHeader, StringComparison.Ordinal), Properties.Resource.ExpectedNoCommentsInRowKeysAnsiFiles, rowKeysStructFileName);


                RowKeyToFilePosition = new Dictionary <string, long>();
                FileAccess           = fileAccess;
                FileShare            = fileShare;


                using (TextReader textReader = File.OpenText(rowKeysStructFileName))
                {
                    string   colKeysLineOrNull = textReader.ReadLine();
                    string[] varAndColKeys     = colKeysLineOrNull.Split('\t');
                    if (!varAndColKeys[0].Equals("rowKey"))
                    {
                        throw new MatrixFormatException("Expect first row's first value to be 'rowKey'"); //!!!rowKey
                    }
                    ColSerialNumbers = new SerialNumbers <string>(varAndColKeys.Skip(1));
                    _rowKeys         = new List <string>();
                    if (null == colKeysLineOrNull)
                    {
                        throw new MatrixFormatException("Surprised by empty file. " + rowKeysStructFileName);
                    }


                    //!!!not really thread-safe
                    string denseStructFileNameInFile = textReader.ReadLine();
                    DenseStructFileName = Path.Combine(Path.GetDirectoryName(rowKeysStructFileName), denseStructFileNameInFile);

                    CounterWithMessages counterWithMessages = new CounterWithMessages("Reading rowKey file to find location of rows, #{0}", 10000, null);

                    string line = null;
                    while (null != (line = textReader.ReadLine()))
                    {
                        counterWithMessages.Increment();
                        string[] rowKeyAndPosition = line.Split('\t');
                        if (rowKeyAndPosition.Length != 2)
                        {
                            throw new MatrixFormatException("Expect rows to have two columns");
                        }
                        string rowKey   = rowKeyAndPosition[0];
                        long   position = long.Parse(rowKeyAndPosition[1], CultureInfo.CurrentCulture);
                        _rowKeys.Add(rowKey);
                        RowKeyToFilePosition.Add(rowKey, position);
                    }
                }
                Console.WriteLine("all lines read from file [{0}]", rowKeysStructFileName);

                _indexOfRowKey = RowKeys.Select((key, index) => new { key, index }).ToDictionary(keyAndIndex => keyAndIndex.key, keyAndIndex => keyAndIndex.index);
                Console.WriteLine("Dictionary created. Now testing values");


                //Test that can really read values from data file
                if (RowCount > 0 && ColCount > 0)
                {
                    //!!!kludge - try up to 10 times to get a good value
                    GetValueOrMissing(0, 0);
                    //Console.WriteLine("GetValueOrMissing(0,0)={0} tested", value0);

                    int rowCount = RowCount;
                    //Console.WriteLine("rowCount is {0}", rowCount);
                    int colCount = ColCount;
                    //Console.WriteLine("colCount is {0}", colCount);
                    string rowKey = RowKeys[rowCount - 1];
                    //Console.WriteLine("rowKey is {0}", rowKey);
                    string colKey = ColKeys[colCount - 1];
                    //Console.WriteLine("colKey is {0}", colKey);
                    int colIndex = ColSerialNumbers.GetOld(colKey);
                    //Console.WriteLine("colIndex is {0}", colIndex);

                    byte[] byteArray = new byte[23]; //C# will init to 0's
                    //!!!kludge - try up to 10 times to get a good value
                    for (int i = 0; i < 10; ++i)
                    {
                        ThreadLocalStream.Position = 0;
                        ThreadLocalStream.Position = RowKeyToFilePosition[rowKey] + colIndex * BytesPerValue;
                        //Console.WriteLine("ThreadLocalStream.Position is {0}", ThreadLocalStream.Position);
                        byteArray = new byte[BytesPerValue];
                        int bytesRead = ThreadLocalStream.Read(byteArray, 0, BytesPerValue);
                        //Console.WriteLine("byteArray[0] is {0}", (int)byteArray[0]);
                        //Console.WriteLine("bytesRead is {0}", bytesRead);

                        if ((int)byteArray[0] != 0)
                        {
                            break;
                        }

                        //Console.WriteLine("Read a 0 instead of a 32, going to sleep for 10 seconds");
                        Thread.Sleep(10000);

                        Helper.CheckCondition(bytesRead == BytesPerValue, "Expected to read all the bytes of a value");
                        //Console.WriteLine("expected bytes read");
                    }

                    //string asString = System.Text.Encoding.Default.GetString(byteArray);
                    //Console.WriteLine("bytes to string is {0}", asString);
                    //TValue valueLast = Parser.Parse<TValue>(asString);
                    //Console.WriteLine("value is {0}", valueLast);
                    //Helper.CheckCondition(!valueLast.Equals(MissingValue), "Should not be missing"); //OK to use Equals because double can't be null

                    GetValueOrMissing(RowCount / 2, ColCount / 2);
                    //Console.WriteLine("GetValueOrMissing({0}, {1})={2} tested", RowCount / 2, ColCount / 2, valueMiddle);
                }

                //Console.WriteLine("Values tested. Done");
            }
        }
Пример #23
0
        private static string CreateLine(Matrix <string, string, char> matrix, string rowKey, CounterWithMessages counterWithMessages)
        {
            int         rowIndex  = matrix.IndexOfRowKey[rowKey];
            List <byte> storeList = new List <byte>(matrix.ColCount);

            for (int colIndex = 0; colIndex < matrix.ColCount; ++colIndex)
            {
                char value;
                byte store;
                if (matrix.TryGetValue(rowIndex, colIndex, out value))
                {
                    store = DenseAnsi.StaticValueToStore(value);
                }
                else
                {
                    store = DenseAnsi.StaticStoreMissingValue;
                }
                storeList.Add(store);
            }
            Helper.CheckCondition(storeList.Count == matrix.ColCount, "Assert");
            string s = rowKey + "\t" + DenseAnsi.StoreListToString(storeList, matrix.ColCount);

            counterWithMessages.Increment();
            return(s);
        }
Пример #24
0
        protected void GetInstanceFromDenseStructFileNameInternal(string denseStructFileName, ParallelOptions parallelOptions, FileAccess fileAccess = FileAccess.Read, FileShare fileShare = FileShare.Read)
        {
            // parallelOptions is not currently used, but it is need so that this method will have the same signature as other, similar methods.
            lock (this)
            {
                using (FileStream fileStream = File.Open(denseStructFileName, FileMode.Open, fileAccess, fileShare))
                {
                    using (TextReader textReader = new StreamReader(fileStream))
                    {
                        string firstLineOrNull = textReader.ReadLine();
                        Helper.CheckCondition(null != firstLineOrNull, Properties.Resource.ExpectedFileToHaveData, denseStructFileName);
                        Helper.CheckCondition(!firstLineOrNull.StartsWith(FileUtils.CommentHeader, StringComparison.Ordinal), Properties.Resource.ExpectedNoCommentsInRowKeysAnsiFiles, denseStructFileName);
                    }
                }

                RowKeyToFilePosition = new Dictionary <string, long>();

                DenseStructFileName = denseStructFileName;
                FileAccess          = fileAccess;
                FileShare           = fileShare;
                long position = 0;


                string colKeysLineOrNull = ThreadLocalTextReader.ReadLine();
                position += colKeysLineOrNull.Length + 2; //!!!const assuming 2 char newslines
                string[] varAndColKeys = colKeysLineOrNull.Split('\t');
                if (!varAndColKeys[0].Equals("var"))
                {
                    throw new MatrixFormatException("Expect first row's first value to be 'var'");
                }
                ColSerialNumbers = new SerialNumbers <string>(varAndColKeys.Skip(1));
                _rowKeys         = new List <string>();
                if (null == colKeysLineOrNull)
                {
                    throw new MatrixFormatException("Surprised by empty file. " + denseStructFileName);
                }
                CounterWithMessages counterWithMessages = new CounterWithMessages("Reading data file to find location of rows, #{0}", 10000, null);

                while (true)
                {
                    counterWithMessages.Increment();
                    ThreadLocalStream.Position = position;
                    StringBuilder sb = new StringBuilder();
                    while (true)
                    {
                        int i = ThreadLocalStream.ReadByte();
                        if (-1 == i)
                        {
                            goto END;
                        }
                        if ('\t' == (char)i)
                        {
                            break; // real break, not conintue
                        }
                        sb.Append((char)i);
                    }

                    string rowKey = sb.ToString();
                    if (RowKeyToFilePosition.ContainsKey(rowKey))
                    {
                        throw new MatrixFormatException(string.Format(CultureInfo.InvariantCulture, "The rowkey {0} appears more than once", rowKey));
                    }

                    _rowKeys.Add(rowKey);
                    position += rowKey.Length + 1;
                    RowKeyToFilePosition.Add(rowKey, position);
                    position += ColCount * BytesPerValue + 2;//!!!assumes two char newlines
                    if (position > ThreadLocalStream.Length)
                    {
                        throw new MatrixFormatException("File seems too short");
                    }
                }
                END :;

                _indexOfRowKey = RowKeys.Select((key, index) => new { key, index }).ToDictionary(keyAndIndex => keyAndIndex.key, keyAndIndex => keyAndIndex.index);
            }
        }
        //!!!similar to GetInstanceFromDenseStructFileNameInternal


        /// <summary>
        /// Get a instance from a file in a RowKeys format
        /// </summary>
        /// <param name="rowKeysStructFileName">The rowKeys file</param>
        /// <param name="parallelOptions">A ParallelOptions instance that configures the multithreaded behavior of this operation.</param>
        /// <param name="fileAccess">A FileAccess value that specifies the operations that can be performed on the file. Defaults to 'Read'</param>
        /// <param name="fileShare">A FileShare value specifying the type of access other threads have to the file. Defaults to 'Read'</param>
        /// <param name="verbose"></param>
        protected void GetInstanceFromRowKeysStructFileNameInternal(string rowKeysStructFileName, ParallelOptions parallelOptions, FileAccess fileAccess = FileAccess.Read, FileShare fileShare = FileShare.Read, bool verbose = true)
        {
            // parallelOptions is not currently used, but it is need so that this method will have the same signature as other, similar methods.
            lock (this)
            {
                string firstLineOrNull = FileUtils.ReadLine(rowKeysStructFileName);
                Helper.CheckCondition(null != firstLineOrNull, () => string.Format(CultureInfo.InvariantCulture, Properties.Resource.ExpectedFileToHaveData, rowKeysStructFileName));
                Helper.CheckCondition(!firstLineOrNull.StartsWith(FileUtils.CommentHeader, StringComparison.Ordinal), Properties.Resource.ExpectedNoCommentsInRowKeysAnsiFiles, rowKeysStructFileName);


                RowKeyToFilePosition = new Dictionary <string, long>();
                FileAccess           = fileAccess;
                FileShare            = fileShare;


                //using (TextReader textReader = File.OpenText(rowKeysStructFileName))
                using (TextReader textReader = FileUtils.OpenTextStripComments(rowKeysStructFileName))
                {
                    string colKeysLineOrNull = textReader.ReadLine();
                    if (null == colKeysLineOrNull)
                    {
                        throw new MatrixFormatException("Surprised by empty file. " + rowKeysStructFileName);
                    }

                    string[] varAndColKeys = colKeysLineOrNull.Split('\t');
                    if (!varAndColKeys[0].Equals("rowKey"))
                    {
                        throw new MatrixFormatException("Expect first row's first value to be 'rowKey'"); //!!!rowKey
                    }

                    ColSerialNumbers = new SerialNumbers <string>(varAndColKeys.Skip(1));
                    _rowKeys         = new List <string>();


                    //!!!not really thread-safe
                    string denseStructFileNameInFile = textReader.ReadLine();
                    DenseStructFileName = Path.Combine(Path.GetDirectoryName(rowKeysStructFileName), denseStructFileNameInFile);

                    CounterWithMessages counterWithMessages = verbose ? new CounterWithMessages("Reading rowKey file to find location of rows, #{0}", 10000, null) : null;

                    string line = null;
                    while (null != (line = textReader.ReadLine()))
                    {
                        if (verbose)
                        {
                            counterWithMessages.Increment();
                        }
                        string[] rowKeyAndPosition = line.Split('\t');
                        if (rowKeyAndPosition.Length != 2)
                        {
                            throw new MatrixFormatException("Expect rows to have two columns");
                        }
                        string rowKey   = rowKeyAndPosition[0];
                        long   position = long.Parse(rowKeyAndPosition[1], CultureInfo.CurrentCulture);
                        _rowKeys.Add(rowKey);
                        RowKeyToFilePosition.Add(rowKey, position);
                    }
                }
                //Console.WriteLine("all lines read from file [{0}]", rowKeysStructFileName);

                _indexOfRowKey = RowKeys.Select((key, index) => new { key, index }).ToDictionary(keyAndIndex => keyAndIndex.key, keyAndIndex => keyAndIndex.index);
                ValueTester(rowKeysStructFileName);
            }
        }