diff --git a/src/FSharp.Stats/FSharp.Stats.fsproj b/src/FSharp.Stats/FSharp.Stats.fsproj index fd456944..ce317cc5 100644 --- a/src/FSharp.Stats/FSharp.Stats.fsproj +++ b/src/FSharp.Stats/FSharp.Stats.fsproj @@ -113,6 +113,7 @@ + diff --git a/src/FSharp.Stats/Testing/TestStatistics.fs b/src/FSharp.Stats/Testing/TestStatistics.fs index e81a5279..9b5839f0 100644 --- a/src/FSharp.Stats/Testing/TestStatistics.fs +++ b/src/FSharp.Stats/Testing/TestStatistics.fs @@ -96,4 +96,26 @@ module TestStatistics = let cdf = Distributions.Continuous.Normal.CDF 0. 1. statistic let pvalue = 1.- cdf let pvalueTwoTailed = pvalue * 2. - {Statistic=statistic; PValueLeft=pvalue;PValueRight = cdf; PValueTwoTailed = pvalueTwoTailed} \ No newline at end of file + {Statistic=statistic; PValueLeft=pvalue;PValueRight = cdf; PValueTwoTailed = pvalueTwoTailed} + + + /// + /// Computes the Mann-Whitney U-test statistics for a given statistic. + /// + /// The test statistic. + /// One Tailed/Sided. + /// Two Tailed/Sided. + type UTestTestStatistics = { + Statistic : float + PValueLeft : float + PValueRight : float + PValueTwoTailed : float + } + let createUTest statistic : UTestTestStatistics = + let cdf = Distributions.Continuous.Normal.CDF 0. 1. statistic + { + Statistic = statistic + PValueLeft = 1. - cdf + PValueRight = cdf + PValueTwoTailed = cdf * 2. + } \ No newline at end of file diff --git a/src/FSharp.Stats/Testing/UTest.fs b/src/FSharp.Stats/Testing/UTest.fs new file mode 100644 index 00000000..35bcbf66 --- /dev/null +++ b/src/FSharp.Stats/Testing/UTest.fs @@ -0,0 +1,51 @@ +namespace FSharp.Stats.Testing + +// taken/implemented from: https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test#U_statistic +module UTest = + + open FSharp.Stats + open FSharp.Stats.Testing + + // TO DO: Bergmann et al. (2000) showed that there are different implementations of this test that lead to different results. + // They implied that some of them are using a false algorithm. Check if the mathematical derivation from above is wrong too. + // Read: https://www.jstor.org/stable/2685616 + let inline private compute (seq1 : seq<'T>) (seq2 : seq<'T>) = + let sortedMerge = + (seq1 |> Seq.map (fun v -> float v, 0), seq2 |> Seq.map (fun v -> float v, 1)) // 0 = first group; 1 = second group + ||> Seq.append + |> Seq.sortByDescending (fun (v,groupIndex) -> v) + |> Array.ofSeq + // let abundance = // method for equal ranks instead of mean ranks when identical values occur. + // sortedMerge + // |> Array.map ( + // fun v -> Array.filter (fun v2 -> v2 = v) sortedMerge + // >> Array.length + // ) + // let myMap = sortedMerge |> Array.mapi (fun i x -> x, i + 2 - Array.item i abundance) |> Map // wrong: must return mean of ranksums with equal ranks, not always the same rank! + // let rankedMerge = sortedMerge |> Array.map (fun (v,group) -> float myMap.[(v,group)],v,group) + let rankedMerge = // method for mean ranks instead of equal ranks when identical values occur. + sortedMerge + |> Array.map fst + |> Rank.rankAverage + |> fun res -> + (sortedMerge, res) + ||> Array.map2 (fun (v,group) rank -> rank, v, group) + let calcRankSum group = + rankedMerge + |> Array.filter (fun (rank,v,group') -> group' = group) + |> Array.fold (fun state (rank,v,group') -> state + rank) 0. + let rankSumSeq1 = calcRankSum 0 + let rankSumSeq2 = calcRankSum 1 + let seq1Length = Seq.length seq1 |> float + let seq2Length = Seq.length seq2 |> float + let u1 = seq1Length * seq2Length + (seq1Length * (seq1Length + 1.) / 2.) - rankSumSeq1 + let u2 = seq1Length * seq2Length + (seq2Length * (seq2Length + 1.) / 2.) - rankSumSeq2 + let uMin = min u1 u2 + let z = (uMin - seq1Length * seq2Length / 2.) / System.Math.Sqrt (seq1Length * seq2Length * (seq1Length + seq2Length + 1.) / 12.) + z + + /// Computes a Mann-Whitney U-test. Aka Wilcoxon-Mann-Whitney test. + /// Use this test for independent samples and the Wilcoxon test (= Wilcoxon ranksum test) for dependent samples. + let inline computeUtest (seq1 : seq<'T>) (seq2 : seq<'T>) = + let z = compute seq1 seq2 + TestStatistics.createUTest z \ No newline at end of file diff --git a/tests/FSharp.Stats.Tests/Testing.fs b/tests/FSharp.Stats.Tests/Testing.fs index a3d475b9..cad3b779 100644 --- a/tests/FSharp.Stats.Tests/Testing.fs +++ b/tests/FSharp.Stats.Tests/Testing.fs @@ -235,6 +235,34 @@ let tTestTests = Expect.floatClose Accuracy.low tTest4.Statistic 0.514 "t statistic should be equal." ] + +[] +let uTestTests = + // taken from https://de.wikipedia.org/wiki/Wilcoxon-Mann-Whitney-Test#Beispiel + let testList1 = + ([0;400;500;550;600;650;750;800;900;950;1000;1100;1200;1500;1600;1800;1900;2000;2200;3500 ],["M";"W";"M";"W";"M";"W";"M";"M";"W";"W";"M";"M";"W";"M";"W";"M";"M";"M";"M";"M"]) + ||> List.map2 (fun pay sex -> sex, pay) |> List.sortBy fst + + let testList1A = testList1 |> List.choose (fun (sex,pay) -> if sex = "W" then Some pay else None) + let testList1B = testList1 |> List.choose (fun (sex,pay) -> if sex = "M" then Some pay else None) + + let observedResult1 = UTest.computeUtest testList1A testList1B + let expectedResult1 : TestStatistics.UTestTestStatistics = { + Statistic = -1.15 + PValueTwoTailed = 0.2505 + PValueLeft = 0.875 + PValueRight = 0.1253 + } + + testList "Testing.UTest" [ + testCase "TwoSample" <| fun () -> + Expect.floatClose Accuracy.low observedResult1.PValueLeft expectedResult1.PValueLeft "left p-value should be equal" + Expect.floatClose Accuracy.low observedResult1.PValueRight expectedResult1.PValueRight "right p-value should be equal" + Expect.floatClose Accuracy.low observedResult1.PValueTwoTailed expectedResult1.PValueTwoTailed "p-value should be equal" + Expect.floatClose Accuracy.low observedResult1.Statistic expectedResult1.Statistic "test statistic should be equal" + ] + + [] let chiSquaredTests = // ChiSquared https://www.graphpad.com/quickcalcs/chisquared2/