Beispiel #1
0
        /// <summary>
        /// Based on Donavan Cheng's R script (loessnormalize_dev_v2.R)
        /// </summary>
        /// <param name="bandwidth"></param>
        /// <param name="gc">GC content</param>
        /// <param name="coverage">coverage after variance stabilization. Assumed to be normally distributed</param>
        /// <returns></returns>
        private static double objective(double bandwidth, double[] gcs, double[] counts)
        {
            double medianY = Utilities.Median(counts);
            int    minGC   = (int)gcs.Min();
            int    maxGC   = (int)gcs.Max();

            LoessInterpolator loess = new LoessInterpolator(bandwidth, 0);

            // LOESS
            double[] normalized = new double[counts.Length];
            {
                var      model      = loess.Train(gcs, counts, 1, computeFitted: false);
                double[] fittedByGC = model.Predict(Enumerable.Range(minGC, maxGC).Select(i => (double)i));
                for (int i = 0; i < normalized.Length; i++)
                {
                    int gc = (int)gcs[i];
                    normalized[i] = counts[i] - fittedByGC[gc - minGC] + medianY;
                }
            }
            // another LOESS
            double[] fitted = new double[counts.Length];
            {
                var      model      = loess.Train(gcs, normalized, 1, computeFitted: false);
                double[] fittedByGC = model.Predict(Enumerable.Range(minGC, maxGC).Select(i => (double)i));
                for (int i = 0; i < fitted.Length; i++)
                {
                    int gc = (int)gcs[i];
                    fitted[i] = fittedByGC[gc - minGC];
                }
            }

            return(Utilities.StandardDeviation(fitted));
        }
Beispiel #2
0
        public void Normalize()
        {
            // Find the best bandwidth without chrY
            double[] gcsNoChrY = withoutChrY.Select(i => gcs[i]).ToArray();
            double[] countsNoChrY = withoutChrY.Select(i => counts[i]).ToArray();
            double bestBandwidth = findBestBandwith(0.3, 0.75, gcsNoChrY, countsNoChrY);

            // Fit LOESS
            double medianY = Utilities.Median(counts);
            int minGC = (int)gcs.Min();
            int maxGC = (int)gcs.Max();
            LoessInterpolator loess = new LoessInterpolator(bestBandwidth, 0);
            var model = loess.Train(gcs, counts, 1, computeFitted: false);
            double[] fittedByGC = model.Predict(Enumerable.Range(minGC, maxGC).Select(i => (double)i));
            // Smooth
            foreach (GenomicBin bin in bins)
            {
                int i = Math.Min(fittedByGC.Length - 1, Math.Max(0, bin.GC - minGC));
                double smoothed = countTransformer(bin.Count) - fittedByGC[i] + medianY;
                bin.Count = invCountTransformer(smoothed);
            }
        }
Beispiel #3
0
        public void Normalize()
        {
            // Find the best bandwidth without chrY
            double[] gcsNoChrY     = withoutChrY.Select(i => gcs[i]).ToArray();
            double[] countsNoChrY  = withoutChrY.Select(i => counts[i]).ToArray();
            double   bestBandwidth = findBestBandwith(0.3, 0.75, gcsNoChrY, countsNoChrY);

            // Fit LOESS
            double            medianY = Utilities.Median(counts);
            int               minGC   = (int)gcs.Min();
            int               maxGC   = (int)gcs.Max();
            LoessInterpolator loess   = new LoessInterpolator(bestBandwidth, 0);
            var               model   = loess.Train(gcs, counts, 1, computeFitted: false);

            double[] fittedByGC = model.Predict(Enumerable.Range(minGC, maxGC).Select(i => (double)i));
            // Smooth
            foreach (SampleGenomicBin bin in bins)
            {
                int    i        = Math.Min(fittedByGC.Length - 1, Math.Max(0, bin.GenomicBin.GC - minGC));
                double smoothed = countTransformer(bin.Count) - fittedByGC[i] + medianY;
                bin.Count = invCountTransformer(smoothed);
            }
        }
Beispiel #4
0
            private double predict(double x, LoessInterval interval)
            {
                var coeffs = LoessInterpolator.computeCoefficients(x, SortedXs, SortedYs, SortedRobustnessWeights, interval.BandwidthInterval);

                return(LoessInterpolator.predict(x, coeffs));
            }
        public void TestTrain()
        {
            double[] x = new double[] { 0.0628318530717959, 0.0942477796076938, 0.125663706143592, 0.15707963267949, 0.188495559215388, 0.219911485751286, 0.251327412287183, 0.282743338823081, 0.314159265358979, 0.345575191894877, 0.376991118430775, 0.408407044966673, 0.439822971502571, 0.471238898038469, 0.502654824574367, 0.534070751110265, 0.565486677646163, 0.596902604182061, 0.628318530717959, 0.659734457253857, 0.691150383789754, 0.722566310325652, 0.75398223686155, 0.785398163397448, 0.816814089933346, 0.848230016469244, 0.879645943005142, 0.91106186954104, 0.942477796076938, 0.973893722612836, 1.00530964914873, 1.03672557568463, 1.06814150222053, 1.09955742875643, 1.13097335529233, 1.16238928182822, 1.19380520836412, 1.22522113490002, 1.25663706143592, 1.28805298797181, 1.31946891450771, 1.35088484104361, 1.38230076757951, 1.41371669411541, 1.4451326206513, 1.4765485471872, 1.5079644737231, 1.539380400259, 1.5707963267949, 1.60221225333079, 1.63362817986669, 1.66504410640259, 1.69646003293849, 1.72787595947439, 1.75929188601028, 1.79070781254618, 1.82212373908208, 1.85353966561798, 1.88495559215388, 1.91637151868977, 1.94778744522567, 1.97920337176157, 2.01061929829747, 2.04203522483337, 2.07345115136926, 2.10486707790516, 2.13628300444106, 2.16769893097696, 2.19911485751286, 2.23053078404875, 2.26194671058465, 2.29336263712055, 2.32477856365645, 2.35619449019234, 2.38761041672824, 2.41902634326414, 2.45044226980004, 2.48185819633594, 2.51327412287183, 2.54469004940773, 2.57610597594363, 2.60752190247953, 2.63893782901543, 2.67035375555132, 2.70176968208722, 2.73318560862312, 2.76460153515902, 2.79601746169492, 2.82743338823081, 2.85884931476671, 2.89026524130261, 2.92168116783851, 2.95309709437441, 2.9845130209103, 3.0159289474462, 3.0473448739821, 3.078760800518, 3.1101767270539, 3.14159265358979, 3.17300858012569, 3.20442450666159, 3.23584043319749, 3.26725635973339, 3.29867228626928, 3.33008821280518, 3.36150413934108, 3.39292006587698, 3.42433599241287, 3.45575191894877, 3.48716784548467, 3.51858377202057, 3.54999969855647, 3.58141562509236, 3.61283155162826, 3.64424747816416, 3.67566340470006, 3.70707933123596, 3.73849525777185, 3.76991118430775, 3.80132711084365, 3.83274303737955, 3.86415896391545, 3.89557489045134, 3.92699081698724, 3.95840674352314, 3.98982267005904, 4.02123859659494, 4.05265452313083, 4.08407044966673, 4.11548637620263, 4.14690230273853, 4.17831822927443, 4.20973415581032, 4.24115008234622, 4.27256600888212, 4.30398193541802, 4.33539786195391, 4.36681378848981, 4.39822971502571, 4.42964564156161, 4.46106156809751, 4.4924774946334, 4.5238934211693, 4.5553093477052, 4.5867252742411, 4.618141200777, 4.64955712731289, 4.68097305384879, 4.71238898038469, 4.74380490692059, 4.77522083345649, 4.80663675999238, 4.83805268652828, 4.86946861306418, 4.90088453960008, 4.93230046613597, 4.96371639267187, 4.99513231920777, 5.02654824574367, 5.05796417227957, 5.08938009881546, 5.12079602535136, 5.15221195188726, 5.18362787842316, 5.21504380495906, 5.24645973149496, 5.27787565803085, 5.30929158456675, 5.34070751110265, 5.37212343763855, 5.40353936417444, 5.43495529071034, 5.46637121724624, 5.49778714378214, 5.52920307031804, 5.56061899685393, 5.59203492338983, 5.62345084992573, 5.65486677646163, 5.68628270299753, 5.71769862953342, 5.74911455606932, 5.78053048260522, 5.81194640914112, 5.84336233567702, 5.87477826221291, 5.90619418874881, 5.93761011528471, 5.96902604182061, 6.0004419683565, 6.0318578948924, 6.0632738214283, 6.0946897479642, 6.1261056745001, 6.15752160103599, 6.18893752757189, 6.22035345410779, 6.25176938064369, 6.28318530717959, 0.0314159265358979 };
            double[] y = new double[] { -0.438495007653319, -0.135116126211042, -0.163610751105257, 0.132863267635695, 0.0524643517312222, 0.400274090892978, 0.796771685251157, 0.279545359091931, 0.338340886238583, 0.144814288944755, 0.457971785116649, 0.274333907845178, 0.338461682691463, 0.472073789237583, 0.511866974863626, 0.818499335551353, 0.537817302617911, 0.294176095457207, 0.875674094323761, 0.380441772199637, 0.826762302413423, 0.435964676100846, 0.905908395714408, 0.570576448655549, 0.797046592362715, 0.68503666204753, 0.64235952706639, 0.532205527664163, 0.828286045845809, 0.937206322208813, 0.683811602083087, 1.05430944732177, 0.743184664860489, 0.794539428540312, 1.07730370064279, 1.098631949881, 0.672816293027005, 1.17211633677854, 0.877086204922403, 1.03004168421902, 0.902888896336148, 1.27023780545471, 1.07903919921838, 1.03274663949137, 1.02481738203823, 1.1836223335171, 0.915391655929794, 1.1431976206183, 0.692911211311207, 1.137939791608, 1.03199287571853, 1.01111737237107, 0.997136596106402, 1.21725721528292, 0.989210152559246, 0.660081800497204, 0.892556882879024, 1.09022380363092, 0.84839040168305, 0.477051979180911, 0.837353588073868, 0.397782608354934, 0.639138765126686, 0.565750324218709, 1.07613742914717, 1.03205285638333, 0.79746587619746, 1.12779873229747, 1.06705085875411, 0.861027339192553, 0.799413198375161, 1.11257883737409, 0.730629352970381, 0.943243268747865, 0.892469042740675, 1.00919353433288, 0.684034008520742, 0.432131022116734, 0.975472919665821, 0.589515692640518, 0.695531474836397, 0.521878739296515, 0.464546771322712, 0.358316892811621, 0.414633205381794, 0.517443957969762, 0.293016655398185, 0.235447400976673, -0.00926823575523461, 0.121164653569605, 0.256592021803055, 0.214658649346121, 0.33912112027486, 0.325958358074084, 0.285810470194787, 0.34655964723119, -0.17434988863584, 0.0602908853738413, 0.420528782359059, -0.138160595569191, 0.0579710448365516, 0.0943638713024924, -0.13719401837148, -0.410522506673889, -0.103789013834978, -0.15511647828956, -0.436421008359735, -0.283161261647352, -0.334563650030411, -0.722814928703856, -0.288063614619636, -0.336686829046373, -0.612717819457029, -0.41534563638804, -0.509962510892387, -0.506985032683713, -0.27484279063864, -0.842681862815581, -0.67959538709385, -0.551338930096661, -0.606565733481637, -0.569581290026929, -0.994233464441435, -0.423715224231713, -0.665099371429767, -0.638795785712847, -1.02738736532579, -0.999768123053881, -0.590147741420311, -0.621492108948908, -0.586554191614103, -0.905510400113783, -0.983665614065139, -0.754485292536081, -0.840905549105064, -1.17942254275345, -0.944955353222801, -0.742753352423879, -1.07603536675966, -1.01354368767099, -0.540782489063199, -1.17241450283963, -0.881256390720862, -0.681947475127311, -0.808741657511723, -0.846096476673637, -0.819695721244856, -0.98073546876888, -1.06943242085061, -1.24440498170571, -1.0168216694769, -0.908776907891879, -1.09154331785564, -0.91954683676508, -0.859946609253743, -0.863957311579265, -0.586493577257266, -1.07898174356077, -0.862006413164988, -0.950792351740666, -1.00435763998871, -0.902034513326301, -0.880374990240107, -0.736260805837202, -0.819522059473402, -0.935519941961085, -0.864540903606086, -0.435525069870341, -0.779770476819053, -1.00614622202368, -0.491313063228947, -0.693475264796651, -0.896569081270116, -0.76641371166947, -0.68780249579973, -0.494018668944984, -0.727421221819308, -1.08700976489548, -0.540502747058334, -0.821694618268451, -0.572952794712188, -0.540228013195778, -0.653305921106792, -0.340128693528457, 0.223794587029637, -0.549190844661796, -0.521926379128445, 0.0307549183334237, -0.223516825036478, -0.461927585211946, -0.0625733806235761, -0.146780172246082, -0.140314473043836, -0.373898592219125, -0.45352696158848, -0.149358794131913, 0.0410142262157374, 0.182483893948253, 0.0170008620485262, 0.0580341187809543 };
            // fittedR is obtained by the following command in R version 3.0.1 (2013-05-16):
            // loess(y ~ x, span=0.3, degree=1)
            double[] fittedR = new double[] { 0.0701670493925478, 0.0966642664442614, 0.123206484890627, 0.149573862592495, 0.175546557410716, 0.200904727206141, 0.226156537549996, 0.251799371357136, 0.27759721250386, 0.303314044866466, 0.328713852321254, 0.353560618744522, 0.378345507471646, 0.403621903019156, 0.429247121035075, 0.455078477167428, 0.480973287064239, 0.506788866373533, 0.532382530743334, 0.557611595821667, 0.582333377256556, 0.606405190696024, 0.629684351788098, 0.652028176180801, 0.674232281366688, 0.696896589186194, 0.719584871809167, 0.741860901405455, 0.763288450144904, 0.783431290197361, 0.801853193732674, 0.818061360507526, 0.83221097069007, 0.844837767067354, 0.85647749242643, 0.867665889554346, 0.878938701238153, 0.889772478321425, 0.899306551013233, 0.907570831148866, 0.914595230563612, 0.920409661092759, 0.925044034571598, 0.928528262835416, 0.930892257719504, 0.932165931059148, 0.932379194689639, 0.931561960446265, 0.929744140164316, 0.928223841497832, 0.927842412788768, 0.927944626904281, 0.92787525671153, 0.926979075077673, 0.924600854869868, 0.920085368955272, 0.91398998203441, 0.907341624397284, 0.900189417295261, 0.892582481979712, 0.884569939702007, 0.876200911713515, 0.866735657613299, 0.855524447234773, 0.842751417180835, 0.828600704054381, 0.813256444458307, 0.796902774995512, 0.779723832268891, 0.761903752881343, 0.743626673435763, 0.72507673053505, 0.706438060782099, 0.687894800779807, 0.669696098545702, 0.651665451868432, 0.633348522007581, 0.614290970222731, 0.594038457773466, 0.57213664591937, 0.548131195920024, 0.523505017083946, 0.499515744521094, 0.475371128193217, 0.450278918062061, 0.423446864089374, 0.394082716236902, 0.36291831438723, 0.331268186196037, 0.299214113284705, 0.266837877274619, 0.234221259787162, 0.201446042443718, 0.168594006865672, 0.135746934674407, 0.102986607491307, 0.0703948069377568, 0.0380533146351394, 0.00604391220483971, -0.0252717809321891, -0.0557514006269065, -0.0856420481657969, -0.115190824835346, -0.144644831922039, -0.174251170712361, -0.204256942492797, -0.233674202797518, -0.261696516167109, -0.288843247397681, -0.315633761285341, -0.342587422626199, -0.370223596216364, -0.398096782232609, -0.425383363724427, -0.452035529797007, -0.478005469555539, -0.503245372105214, -0.527707426551221, -0.55134382199875, -0.574106747552993, -0.595948392319138, -0.616820945402374, -0.636676595907894, -0.655467532940885, -0.673402960722769, -0.690706349395678, -0.707351286945873, -0.723311361359614, -0.738560160623162, -0.753071272722777, -0.766818285644721, -0.779787114705371, -0.792010051598803, -0.803530248877822, -0.814390859095234, -0.824635034803842, -0.834305928556454, -0.843292260085101, -0.851457539936741, -0.858807106886565, -0.865346299709765, -0.871080457181532, -0.876014918077057, -0.880155021171533, -0.883506105240151, -0.886073509058102, -0.887862571400578, -0.88887863104277, -0.889127026759871, -0.888722630032441, -0.887743892190172, -0.886141521781944, -0.883866227356638, -0.880868717463137, -0.877099700650321, -0.872509885467072, -0.867242457034182, -0.861426853142368, -0.854993161345405, -0.847871469197065, -0.839991864251121, -0.831284434061346, -0.821279613511578, -0.809656318848534, -0.796567359670311, -0.782165545575009, -0.766603686160722, -0.750034591025549, -0.732611069767586, -0.71448593198493, -0.695811987275678, -0.676742045237926, -0.657428915469773, -0.638025407569315, -0.617941961650816, -0.596719668311494, -0.574707312873871, -0.552253680660472, -0.529707556993818, -0.507417727196433, -0.485732976590841, -0.464137646888699, -0.442018267824757, -0.419576906756171, -0.397015631040102, -0.374536508033707, -0.352341605094147, -0.330120276800549, -0.307469991502247, -0.284454987211706, -0.261139501941389, -0.237587773703763, -0.21386404051129, -0.190032540376436, -0.166157511311666, -0.142303191329443, -0.118533818442232, -0.0949136306624986, -0.0715068660027062, 0.0439346758746353 };
            //double[] yNoiseFree = new double[] { 0.0627905195293134, 0.0941083133185143, 0.125333233564304, 0.156434465040231, 0.187381314585725, 0.218143241396543, 0.248689887164855, 0.278991106039229, 0.309016994374947, 0.338737920245291, 0.368124552684678, 0.397147890634781, 0.425779291565073, 0.453990499739547, 0.481753674101715, 0.509041415750371, 0.535826794978997, 0.562083377852131, 0.587785252292473, 0.612907053652976, 0.63742398974869, 0.661311865323652, 0.684547105928689, 0.707106781186547, 0.728968627421412, 0.750111069630459, 0.770513242775789, 0.79015501237569, 0.809016994374947, 0.827080574274562, 0.844327925502015, 0.860742027003944, 0.876306680043864, 0.891006524188368, 0.90482705246602, 0.917754625683981, 0.929776485888251, 0.940880768954225, 0.951056516295154, 0.960293685676943, 0.968583161128631, 0.975916761938747, 0.982287250728689, 0.987688340595138, 0.992114701314478, 0.99556196460308, 0.998026728428272, 0.999506560365732, 1, 0.999506560365732, 0.998026728428272, 0.99556196460308, 0.992114701314478, 0.987688340595138, 0.982287250728689, 0.975916761938747, 0.968583161128631, 0.960293685676943, 0.951056516295154, 0.940880768954225, 0.929776485888251, 0.917754625683981, 0.904827052466019, 0.891006524188368, 0.876306680043863, 0.860742027003944, 0.844327925502015, 0.827080574274562, 0.809016994374947, 0.79015501237569, 0.770513242775789, 0.75011106963046, 0.728968627421411, 0.707106781186548, 0.684547105928689, 0.661311865323652, 0.63742398974869, 0.612907053652976, 0.587785252292473, 0.562083377852131, 0.535826794978997, 0.509041415750371, 0.481753674101715, 0.453990499739547, 0.425779291565073, 0.397147890634781, 0.368124552684678, 0.338737920245291, 0.309016994374948, 0.27899110603923, 0.248689887164855, 0.218143241396543, 0.187381314585725, 0.156434465040231, 0.125333233564305, 0.0941083133185144, 0.0627905195293136, 0.0314107590781282, 1.22460635382238e-16, -0.031410759078128, -0.0627905195293133, -0.0941083133185141, -0.125333233564304, -0.156434465040231, -0.187381314585725, -0.218143241396543, -0.248689887164855, -0.278991106039229, -0.309016994374947, -0.338737920245291, -0.368124552684678, -0.39714789063478, -0.425779291565072, -0.453990499739547, -0.481753674101715, -0.509041415750371, -0.535826794978996, -0.56208337785213, -0.587785252292473, -0.612907053652977, -0.63742398974869, -0.661311865323652, -0.684547105928688, -0.707106781186547, -0.728968627421412, -0.750111069630459, -0.770513242775789, -0.79015501237569, -0.809016994374947, -0.827080574274562, -0.844327925502015, -0.860742027003944, -0.876306680043864, -0.891006524188368, -0.90482705246602, -0.917754625683981, -0.929776485888251, -0.940880768954225, -0.951056516295154, -0.960293685676943, -0.968583161128631, -0.975916761938747, -0.982287250728689, -0.987688340595138, -0.992114701314478, -0.99556196460308, -0.998026728428272, -0.999506560365732, -1, -0.999506560365732, -0.998026728428272, -0.99556196460308, -0.992114701314478, -0.987688340595138, -0.982287250728689, -0.975916761938748, -0.968583161128631, -0.960293685676943, -0.951056516295154, -0.940880768954226, -0.929776485888252, -0.917754625683981, -0.90482705246602, -0.891006524188368, -0.876306680043863, -0.860742027003943, -0.844327925502015, -0.827080574274562, -0.809016994374948, -0.790155012375691, -0.77051324277579, -0.75011106963046, -0.728968627421412, -0.707106781186548, -0.684547105928689, -0.661311865323652, -0.63742398974869, -0.612907053652976, -0.587785252292473, -0.562083377852131, -0.535826794978997, -0.509041415750372, -0.481753674101716, -0.453990499739547, -0.425779291565073, -0.39714789063478, -0.368124552684678, -0.338737920245291, -0.309016994374948, -0.27899110603923, -0.248689887164855, -0.218143241396543, -0.187381314585725, -0.156434465040231, -0.125333233564305, -0.0941083133185149, -0.0627905195293133, -0.0314107590781284, -2.44921270764475e-16, 0.0314107590781283 };

            {
                LoessInterpolator loess = new LoessInterpolator(0.3, 0);
                var model = loess.Train(x, y, 0.01);
                double[] predicted = new double[x.Length];
                for (int i = 0; i < x.Length; i++)
                {
                    predicted[i] = model.Predict(x[i]);
                }

                double diff = 0;
                double diff2 = 0;
                for (int i = 0; i < fittedR.Length; i++)
                {
                    diff += Math.Abs(fittedR[i] - model.Fitted.ElementAt(i));
                    diff2 += Math.Abs(fittedR[i] - predicted[i]);
                }

                // absolute difference per point: 0.00155
                Assert.IsTrue(diff < 0.31);
                Assert.IsTrue(diff2 < 0.31);

                // check the order
                for (int i = 0; i < x.Length; i++)
                {
                    Assert.AreEqual(x[i], model.Xs.ElementAt(i));
                }

                // predict a bunch of x's
                double[] predicted2 = model.Predict(x);
                for (int i = 0; i < x.Length; i++)
                {
                    Assert.AreEqual(predicted[i], predicted2[i]);
                }
            }

            // Do not compute fitted
            {
                LoessInterpolator loess = new LoessInterpolator(0.3, 0);
                var model = loess.Train(x, y, 0.01, computeFitted: false);

                Assert.IsNull(model.SortedFitted);
            }

            // computeFitted is ignored when robustnessIters is > 0
            {
                LoessInterpolator loess = new LoessInterpolator(0.3, 2);
                var model = loess.Train(x, y, 0.01, computeFitted: false);
                // loess(y ~ x, weights=weights, span=0.3, degree=1)
                // weights are generated by LoessInterpolator
                double[] weightedFittedR = new double[] { 0.067896131699893, 0.0937671603290653, 0.119682145495551, 0.145442877453464, 0.17085114645692, 0.195708742760033, 0.220464478322975, 0.245562548120809, 0.270797816929995, 0.29596514952699, 0.320859410688251, 0.345275465190237, 0.369610750792407, 0.394325196677632, 0.419307404757484, 0.444445976943533, 0.469629515147352, 0.494746621280512, 0.519685897254584, 0.544335944981141, 0.568585366371752, 0.59232276333799, 0.615436737791427, 0.637815891643633, 0.660140407108971, 0.682894544719337, 0.705673023862608, 0.728070563926657, 0.749681884299359, 0.770101704368589, 0.788924743522222, 0.805774801027258, 0.820828832623564, 0.834511292408314, 0.847246634478681, 0.859459312931839, 0.871573781864962, 0.883140902449995, 0.893449986058881, 0.902533162088794, 0.91042255993691, 0.9171503090004, 0.922748538676439, 0.9272493783622, 0.930684957454858, 0.933087405351585, 0.934488851449556, 0.934921425145945, 0.934417255837924, 0.93386684016055, 0.933826845289151, 0.933828656431312, 0.933403658794618, 0.932083237586653, 0.929398778015003, 0.924881665287252, 0.918788626883738, 0.911782143872259, 0.904002231841591, 0.895588906380511, 0.886682183077796, 0.877422077522222, 0.867226001958794, 0.855509124737541, 0.842408095604772, 0.828059564306795, 0.81260018058992, 0.796166594200456, 0.778895454884712, 0.760923412388997, 0.74238711645962, 0.723423216842891, 0.704168363285117, 0.684759205532608, 0.665704228075736, 0.647071289188464, 0.628336099292489, 0.608974368809508, 0.58846180816122, 0.56627412776932, 0.541887038055507, 0.5167431307807, 0.492179144112526, 0.467512008082737, 0.442058652723083, 0.415136008065318, 0.386061004141194, 0.355475366574083, 0.324518351459999, 0.293253184539662, 0.26174309155379, 0.230051298243102, 0.198241030348316, 0.166375513610151, 0.134517973769327, 0.10273163656656, 0.0710797277425703, 0.0396254730380762, 0.00843209819379684, -0.0221363023187925, -0.0519129540622342, -0.0811306113468359, -0.110022028482906, -0.138819959780753, -0.167757159550685, -0.197066382103009, -0.225864619213675, -0.253409037166875, -0.280142652035283, -0.306508479891574, -0.332949536808421, -0.3599088388585, -0.387007180010091, -0.413544998261562, -0.439484775730711, -0.464788994535339, -0.489420136793244, -0.513340684622227, -0.536513120140087, -0.558899925464625, -0.580463582713639, -0.601166574004929, -0.620971381456295, -0.639840487185537, -0.657884776748385, -0.675243793415906, -0.691923005878904, -0.707927882828182, -0.723263892954545, -0.737936504948795, -0.751951187501738, -0.765242045964663, -0.777775662355465, -0.789606220364853, -0.800787903683539, -0.811374896002231, -0.82142138101164, -0.83084815892713, -0.839534223672416, -0.847471049500735, -0.854650110665327, -0.861062881419428, -0.866700836016277, -0.871555448709111, -0.875618193751169, -0.878880545395688, -0.881333977895905, -0.882969965505061, -0.88377998247639, -0.883857057939925, -0.883274262969783, -0.881993134735335, -0.879975210405955, -0.877182027151013, -0.873575122139883, -0.869116032541934, -0.863986931781332, -0.858341370021714, -0.852097949541662, -0.845175272619755, -0.837491941534576, -0.828966558564705, -0.819205445067475, -0.807960959377884, -0.795370759241012, -0.781572502401939, -0.766703846605743, -0.750902449597504, -0.734305969122301, -0.717052062925214, -0.699278388751322, -0.681122604345704, -0.66272236745344, -0.644215335819608, -0.624870908793012, -0.604155742677471, -0.582503380650589, -0.560347365889973, -0.538121241573225, -0.516258550877951, -0.495192836981755, -0.474286317509687, -0.452773881223414, -0.430898078005473, -0.408901457738401, -0.387026570304731, -0.365515965587, -0.34404085641848, -0.322152063553529, -0.29991154875632, -0.277381273791024, -0.254623200421811, -0.231699290412853, -0.208671505528321, -0.185601807532386, -0.162552158189219, -0.139584519262992, -0.116760852517875, -0.0941431197180395, 0.0422672693539192 };

                Assert.IsNotNull(model.SortedFitted); // Fitted y values are computed

                double diff = 0;
                for (int i = 0; i < fittedR.Length; i++)
                {
                    diff += Math.Abs(weightedFittedR[i] - model.Fitted.ElementAt(i));
                }

                // absolute difference per point: 0.00155
                Assert.IsTrue(diff < 0.31);
            }
        }
Beispiel #6
0
        /// <summary>
        /// Based on Donavan Cheng's R script (loessnormalize_dev_v2.R)
        /// </summary>
        /// <param name="bandwidth"></param>
        /// <param name="gc">GC content</param>
        /// <param name="coverage">coverage after variance stabilization. Assumed to be normally distributed</param>
        /// <returns></returns>
        private static double objective(double bandwidth, double[] gcs, double[] counts)
        {
            double medianY = Utilities.Median(counts);
            int minGC = (int)gcs.Min();
            int maxGC = (int)gcs.Max();

            LoessInterpolator loess = new LoessInterpolator(bandwidth, 0);
            // LOESS
            double[] normalized = new double[counts.Length];
            {
                var model = loess.Train(gcs, counts, 1, computeFitted: false);
                double[] fittedByGC = model.Predict(Enumerable.Range(minGC, maxGC).Select(i => (double)i));
                for (int i = 0; i < normalized.Length; i++)
                {
                    int gc = (int)gcs[i];
                    normalized[i] = counts[i] - fittedByGC[gc - minGC] + medianY;
                }
            }
            // another LOESS
            double[] fitted = new double[counts.Length];
            {
                var model = loess.Train(gcs, normalized, 1, computeFitted: false);
                double[] fittedByGC = model.Predict(Enumerable.Range(minGC, maxGC).Select(i => (double)i));
                for (int i = 0; i < fitted.Length; i++)
                {
                    int gc = (int)gcs[i];
                    fitted[i] = fittedByGC[gc - minGC];
                }
            }

            return Utilities.StandardDeviation(fitted);
        }