diff --git a/benchmarks/speed_review/baseline_results.json b/benchmarks/speed_review/baseline_results.json new file mode 100644 index 00000000..8a8ad6d7 --- /dev/null +++ b/benchmarks/speed_review/baseline_results.json @@ -0,0 +1,3562 @@ +{ + "reg_nocov": { + "overall_att": 1.9565444226330286, + "overall_se": 0.01753672927010524, + "overall_p_value": 0.005, + "overall_ci": [ + 1.9231467815232532, + 1.9897062673911432 + ], + "group_time_effects": { + "3,2": { + "effect": 0.03751424409792154, + "se": 0.04250845986138317, + "t_stat": 0.8825124274145103, + "p_value": 0.3417085427135678, + "conf_int": [ + -0.04134405172061957, + 0.12031299279614002 + ] + }, + "3,3": { + "effect": 1.9098435539341185, + "se": 0.03876292337670987, + "t_stat": 49.26985344664742, + "p_value": 0.005, + "conf_int": [ + 1.8348324506766724, + 1.9754689771332 + ] + }, + "3,4": { + "effect": 1.9464031448859307, + "se": 0.040662986415517126, + "t_stat": 47.86670425522846, + "p_value": 0.005, + "conf_int": [ + 1.8713614860920562, + 2.026576833072452 + ] + }, + "3,5": { + "effect": 1.9371731454821308, + "se": 0.036329268049874505, + "t_stat": 53.322658271636236, + "p_value": 0.005, + "conf_int": [ + 1.8657990985053579, + 2.004424823326166 + ] + }, + "3,6": { + "effect": 1.9419816133638255, + "se": 0.04209109837958917, + "t_stat": 46.13758462301216, + "p_value": 0.005, + "conf_int": [ + 1.8572732405876855, + 2.0227716746860014 + ] + }, + "3,7": { + "effect": 1.9124787328194162, + "se": 0.04221485387558251, + "t_stat": 45.303454998469455, + "p_value": 0.005, + "conf_int": [ + 1.8294484623622096, + 1.9891133302300121 + ] + }, + "3,8": { + "effect": 1.8920586764813514, + "se": 0.04113850991762076, + "t_stat": 45.99239691155975, + "p_value": 0.005, + "conf_int": [ + 1.8119125450151634, + 1.9773133919867345 + ] + }, + "3,9": { + "effect": 1.9483105024068341, + "se": 0.04005156737474546, + "t_stat": 48.645050121942106, + "p_value": 0.005, + "conf_int": [ + 1.8679286743483727, + 2.02459730395813 + ] + }, + "3,10": { + "effect": 1.953274094661052, + "se": 0.04159680052392647, + "t_stat": 46.95731570839275, + "p_value": 0.005, + "conf_int": [ + 1.8775003930661367, + 2.037901169041875 + ] + }, + "5,2": { + "effect": 0.051760802261117136, + "se": 0.041357442612335496, + "t_stat": 1.2515474601826244, + "p_value": 0.1708542713567839, + "conf_int": [ + -0.025401449078849638, + 0.13686728012964886 + ] + }, + "5,3": { + "effect": -0.020484034688861726, + "se": 0.03779355041261996, + "t_stat": -0.5419981574957227, + "p_value": 0.4321608040201005, + "conf_int": [ + -0.10044302149207958, + 0.04688437028226404 + ] + }, + "5,4": { + "effect": 0.014595046418748696, + "se": 0.03917230431782976, + "t_stat": 0.37258585301313457, + "p_value": 0.6030150753768844, + "conf_int": [ + -0.052566185209354986, + 0.09785968877811445 + ] + }, + "5,5": { + "effect": 1.9852869763764562, + "se": 0.0390843733850405, + "t_stat": 50.79490354926153, + "p_value": 0.005, + "conf_int": [ + 1.902710144441338, + 2.059294765506173 + ] + }, + "5,6": { + "effect": 1.9727993646105488, + "se": 0.04159371164995848, + "t_stat": 47.430231310278316, + "p_value": 0.005, + "conf_int": [ + 1.8803480561553434, + 2.049545166651633 + ] + }, + "5,7": { + "effect": 1.9591799976904642, + "se": 0.04231767428050634, + "t_stat": 46.29696766187743, + "p_value": 0.005, + "conf_int": [ + 1.868282948510058, + 2.0367339114258103 + ] + }, + "5,8": { + "effect": 1.958428970095688, + "se": 0.038264153956428834, + "t_stat": 51.18181816657281, + "p_value": 0.005, + "conf_int": [ + 1.8803167222206518, + 2.0275547227443083 + ] + }, + "5,9": { + "effect": 1.9681461868582009, + "se": 0.04053043144313915, + "t_stat": 48.55971468301164, + "p_value": 0.005, + "conf_int": [ + 1.8877728670508958, + 2.03823702849227 + ] + }, + "5,10": { + "effect": 2.0416481039146452, + "se": 0.040991615211233, + "t_stat": 49.80648099358547, + "p_value": 0.005, + "conf_int": [ + 1.9612912464308463, + 2.1230139712520577 + ] + }, + "7,2": { + "effect": 0.1385664454327274, + "se": 0.04177163134163925, + "t_stat": 3.317238062823754, + "p_value": 0.005, + "conf_int": [ + 0.05357645818792484, + 0.21578905820424268 + ] + }, + "7,3": { + "effect": -0.020801914399985924, + "se": 0.039904243239296144, + "t_stat": -0.521295799928891, + "p_value": 0.5527638190954773, + "conf_int": [ + -0.10382047085556109, + 0.0527802272616287 + ] + }, + "7,4": { + "effect": -0.03290920728468039, + "se": 0.03589115123670015, + "t_stat": -0.9169170157748912, + "p_value": 0.44221105527638194, + "conf_int": [ + -0.09522845903294508, + 0.03765574891172209 + ] + }, + "7,5": { + "effect": 0.03152847104743951, + "se": 0.035696791293565344, + "t_stat": 0.883229833968937, + "p_value": 0.35175879396984927, + "conf_int": [ + -0.0396144054662609, + 0.09391827026219171 + ] + }, + "7,6": { + "effect": -0.023434977124503778, + "se": 0.0385932768184637, + "t_stat": -0.6072295243220206, + "p_value": 0.4321608040201005, + "conf_int": [ + -0.10213037541827932, + 0.0498167888689546 + ] + }, + "7,7": { + "effect": 1.962008938917626, + "se": 0.0410095116904627, + "t_stat": 47.84277739580881, + "p_value": 0.005, + "conf_int": [ + 1.886677167974591, + 2.0389274254646383 + ] + }, + "7,8": { + "effect": 1.9412332437767443, + "se": 0.03992163612042527, + "t_stat": 48.62609432942412, + "p_value": 0.005, + "conf_int": [ + 1.8670041726862567, + 2.0199077840175996 + ] + }, + "7,9": { + "effect": 1.9473086798301407, + "se": 0.039933580041345354, + "t_stat": 48.763689051018936, + "p_value": 0.005, + "conf_int": [ + 1.880714264334792, + 2.0223100947925112 + ] + }, + "7,10": { + "effect": 2.036605363614604, + "se": 0.0437327865432517, + "t_stat": 46.56930245230092, + "p_value": 0.005, + "conf_int": [ + 1.9487820792317982, + 2.1102806584603533 + ] + } + }, + "event_study": { + "-5": { + "effect": 0.1385664454327274, + "se": 0.04177163134163925, + "t_stat": 3.317238062823754, + "p_value": 0.005, + "conf_int": [ + 0.053576458187924954, + 0.21578905820424268 + ] + }, + "-4": { + "effect": -0.020801914399985924, + "se": 0.039904243239296144, + "t_stat": -0.521295799928891, + "p_value": 0.5527638190954773, + "conf_int": [ + -0.1038204708555611, + 0.05278022726162879 + ] + }, + "-3": { + "effect": 0.01028353780558302, + "se": 0.027031783477101997, + "t_stat": 0.3804239485083908, + "p_value": 0.5728643216080402, + "conf_int": [ + -0.04218764132679645, + 0.06362530856202536 + ] + }, + "-2": { + "effect": 0.004995311150265181, + "se": 0.025770238716330426, + "t_stat": 0.19384031344264133, + "p_value": 0.9346733668341709, + "conf_int": [ + -0.05048315908353849, + 0.050498894954111744 + ] + }, + "-1": { + "effect": 0.009731344309417293, + "se": 0.021976783420507896, + "t_stat": 0.4428011198552548, + "p_value": 0.6231155778894473, + "conf_int": [ + -0.03502350175188207, + 0.05322124156317611 + ] + }, + "0": { + "effect": 1.9526658870529099, + "se": 0.019981278722830554, + "t_stat": 97.72477097883626, + "p_value": 0.005, + "conf_int": [ + 1.9121887193303653, + 1.9899256074485754 + ] + }, + "1": { + "effect": 1.9537137670524056, + "se": 0.017596311406184985, + "t_stat": 111.02973355914175, + "p_value": 0.005, + "conf_int": [ + 1.918004489857355, + 1.9910780672447774 + ] + }, + "2": { + "effect": 1.94800002677571, + "se": 0.020978675075759304, + "t_stat": 92.85619896113501, + "p_value": 0.005, + "conf_int": [ + 1.906118298469148, + 1.98707431469016 + ] + }, + "3": { + "effect": 1.9785874921406903, + "se": 0.02125668801485948, + "t_stat": 93.08070432974127, + "p_value": 0.005, + "conf_int": [ + 1.9387054341925805, + 2.0170260168578484 + ] + }, + "4": { + "effect": 1.9407177784249676, + "se": 0.028948806126954205, + "t_stat": 67.03964819530043, + "p_value": 0.005, + "conf_int": [ + 1.880617488480106, + 1.9922328621634935 + ] + }, + "5": { + "effect": 1.9679425611737245, + "se": 0.02889361409238114, + "t_stat": 68.10994827028733, + "p_value": 0.005, + "conf_int": [ + 1.9161605534608943, + 2.0208995317340386 + ] + }, + "6": { + "effect": 1.9483105024068341, + "se": 0.04005156737474547, + "t_stat": 48.6450501219421, + "p_value": 0.005, + "conf_int": [ + 1.8679286743483727, + 2.02459730395813 + ] + }, + "7": { + "effect": 1.953274094661052, + "se": 0.04159680052392647, + "t_stat": 46.95731570839275, + "p_value": 0.005, + "conf_int": [ + 1.8775003930661365, + 2.037901169041875 + ] + } + }, + "group_effects": { + "3": { + "effect": 1.9301904330043325, + "se": 0.030438318925920818, + "t_stat": 63.41317461394397, + "p_value": 0.005, + "conf_int": [ + 1.8702219368960384, + 1.9861420614466454 + ] + }, + "5": { + "effect": 1.9809149332576672, + "se": 0.03170174036975451, + "t_stat": 62.48599951148382, + "p_value": 0.005, + "conf_int": [ + 1.9184994180296897, + 2.0357338459955687 + ] + }, + "7": { + "effect": 1.9717890565347787, + "se": 0.03344476401759643, + "t_stat": 58.956584519399, + "p_value": 0.005, + "conf_int": [ + 1.9113592178120258, + 2.0361181246440454 + ] + } + } + }, + "reg_2cov": { + "overall_att": 1.9563661542729416, + "overall_se": 0.01753803022259176, + "overall_p_value": 0.005, + "overall_ci": [ + 1.9229960904533978, + 1.9894497073809283 + ], + "group_time_effects": { + "3,2": { + "effect": 0.03750649998258264, + "se": 0.04254400670252115, + "t_stat": 0.8815930348271597, + "p_value": 0.3417085427135678, + "conf_int": [ + -0.0411958593410233, + 0.12040825494590628 + ] + }, + "3,3": { + "effect": 1.9099501454348804, + "se": 0.038710666694675176, + "t_stat": 49.33911783280143, + "p_value": 0.005, + "conf_int": [ + 1.8345031006815702, + 1.9754279089659605 + ] + }, + "3,4": { + "effect": 1.9460475545421947, + "se": 0.04063499430261002, + "t_stat": 47.89092721534351, + "p_value": 0.005, + "conf_int": [ + 1.8713316085863323, + 2.0257378605718293 + ] + }, + "3,5": { + "effect": 1.9370090543960161, + "se": 0.03629493783322384, + "t_stat": 53.36857341639822, + "p_value": 0.005, + "conf_int": [ + 1.8655940420967856, + 2.0032891989959185 + ] + }, + "3,6": { + "effect": 1.9408979606683943, + "se": 0.042145462320843916, + "t_stat": 46.05235899164601, + "p_value": 0.005, + "conf_int": [ + 1.8564942680806145, + 2.023415597178116 + ] + }, + "3,7": { + "effect": 1.9111981714382718, + "se": 0.042146666625498944, + "t_stat": 45.346366022739915, + "p_value": 0.005, + "conf_int": [ + 1.8301925434423028, + 1.9867824935722587 + ] + }, + "3,8": { + "effect": 1.890746912643064, + "se": 0.04112894043025772, + "t_stat": 45.97120404424715, + "p_value": 0.005, + "conf_int": [ + 1.8093419538451232, + 1.9737883443125592 + ] + }, + "3,9": { + "effect": 1.947651168397633, + "se": 0.04007646253864604, + "t_stat": 48.59838032160394, + "p_value": 0.005, + "conf_int": [ + 1.8669352674463777, + 2.0239577902885184 + ] + }, + "3,10": { + "effect": 1.9527553566442448, + "se": 0.04163009290833081, + "t_stat": 46.90730239166651, + "p_value": 0.005, + "conf_int": [ + 1.876068943818699, + 2.0372132442384356 + ] + }, + "5,2": { + "effect": 0.051774741787699634, + "se": 0.04136115213281122, + "t_stat": 1.2517722335550576, + "p_value": 0.1708542713567839, + "conf_int": [ + -0.025247429738245904, + 0.13695955008009228 + ] + }, + "5,3": { + "effect": -0.01987500071610747, + "se": 0.03778810487607761, + "t_stat": -0.5259591816336275, + "p_value": 0.4321608040201005, + "conf_int": [ + -0.10011717412359118, + 0.04750464376070582 + ] + }, + "5,4": { + "effect": 0.014351278937516098, + "se": 0.03918741783793225, + "t_stat": 0.3662216019659374, + "p_value": 0.6030150753768844, + "conf_int": [ + -0.05372664414787496, + 0.09788001277326877 + ] + }, + "5,5": { + "effect": 1.9852951772753678, + "se": 0.03909277597029605, + "t_stat": 50.78419549391578, + "p_value": 0.005, + "conf_int": [ + 1.9028279857010126, + 2.0592127984554645 + ] + }, + "5,6": { + "effect": 1.9726142525285033, + "se": 0.041506903745787645, + "t_stat": 47.524967523714544, + "p_value": 0.005, + "conf_int": [ + 1.8813453465042729, + 2.050852073112586 + ] + }, + "5,7": { + "effect": 1.9591061436341544, + "se": 0.04237635351998152, + "t_stat": 46.23111667006427, + "p_value": 0.005, + "conf_int": [ + 1.8683816514911504, + 2.036844133502373 + ] + }, + "5,8": { + "effect": 1.9583487775657027, + "se": 0.038230837179403815, + "t_stat": 51.2243236624891, + "p_value": 0.005, + "conf_int": [ + 1.8798819853574242, + 2.0275098188160823 + ] + }, + "5,9": { + "effect": 1.9680734744163075, + "se": 0.04042280551313381, + "t_stat": 48.68720637850975, + "p_value": 0.005, + "conf_int": [ + 1.8889511938454833, + 2.0377588390705172 + ] + }, + "5,10": { + "effect": 2.041542859628759, + "se": 0.04090256204207493, + "t_stat": 49.91234675052141, + "p_value": 0.005, + "conf_int": [ + 1.962724149016656, + 2.121791821823951 + ] + }, + "7,2": { + "effect": 0.13847029101161182, + "se": 0.041770460850479314, + "t_stat": 3.3150290466575707, + "p_value": 0.005, + "conf_int": [ + 0.053312830298297725, + 0.21593423370833417 + ] + }, + "7,3": { + "effect": -0.020470136695940517, + "se": 0.0399104272680385, + "t_stat": -0.5129019681614292, + "p_value": 0.5628140703517588, + "conf_int": [ + -0.10415193431022758, + 0.05358073174735074 + ] + }, + "7,4": { + "effect": -0.032324098916655934, + "se": 0.03578638635574513, + "t_stat": -0.9032512697797619, + "p_value": 0.44221105527638194, + "conf_int": [ + -0.09443666354980268, + 0.036526226292780836 + ] + }, + "7,5": { + "effect": 0.03151613208282335, + "se": 0.03568695911218706, + "t_stat": 0.8831274187231217, + "p_value": 0.35175879396984927, + "conf_int": [ + -0.03970009970795312, + 0.09400214480866743 + ] + }, + "7,6": { + "effect": -0.024811280194351713, + "se": 0.03848463898512638, + "t_stat": -0.644706065813449, + "p_value": 0.4020100502512563, + "conf_int": [ + -0.10377323243881807, + 0.0475238862955143 + ] + }, + "7,7": { + "effect": 1.9629363558831094, + "se": 0.04097548866714681, + "t_stat": 47.9051359662477, + "p_value": 0.005, + "conf_int": [ + 1.88728835346253, + 2.040027750833742 + ] + }, + "7,8": { + "effect": 1.9421673057644255, + "se": 0.03993612493409277, + "t_stat": 48.631841696449406, + "p_value": 0.005, + "conf_int": [ + 1.868297081138492, + 2.0208129670706434 + ] + }, + "7,9": { + "effect": 1.947942344886539, + "se": 0.03997444803606312, + "t_stat": 48.729687102350844, + "p_value": 0.005, + "conf_int": [ + 1.8806508748711863, + 2.0213528987522054 + ] + }, + "7,10": { + "effect": 2.0366982942448257, + "se": 0.04380240774919928, + "t_stat": 46.497405026372256, + "p_value": 0.005, + "conf_int": [ + 1.9494247189849796, + 2.110535209487657 + ] + } + }, + "event_study": { + "-5": { + "effect": 0.13847029101161182, + "se": 0.041770460850479314, + "t_stat": 3.3150290466575707, + "p_value": 0.005, + "conf_int": [ + 0.0533128302982977, + 0.21593423370833417 + ] + }, + "-4": { + "effect": -0.020470136695940517, + "se": 0.0399104272680385, + "t_stat": -0.5129019681614292, + "p_value": 0.5628140703517588, + "conf_int": [ + -0.10415193431022762, + 0.053580731747350736 + ] + }, + "-3": { + "effect": 0.010577275589096564, + "se": 0.02705668790428922, + "t_stat": 0.39093016952085174, + "p_value": 0.5628140703517588, + "conf_int": [ + -0.041839451695935496, + 0.06362038961496878 + ] + }, + "-2": { + "effect": 0.005299953405254429, + "se": 0.025768957019486335, + "t_stat": 0.20567201851617956, + "p_value": 0.9246231155778895, + "conf_int": [ + -0.050239943678432014, + 0.05041018840635458 + ] + }, + "-1": { + "effect": 0.009194796922934364, + "se": 0.021982689501167562, + "t_stat": 0.4182744300894576, + "p_value": 0.6231155778894473, + "conf_int": [ + -0.035444076377309834, + 0.052614061000170735 + ] + }, + "0": { + "effect": 1.953007790221592, + "se": 0.019944150885925684, + "t_stat": 97.9238374896072, + "p_value": 0.005, + "conf_int": [ + 1.9124278749705312, + 1.9905319966094797 + ] + }, + "1": { + "effect": 1.9538387745910035, + "se": 0.01755375847775701, + "t_stat": 111.30600760325954, + "p_value": 0.005, + "conf_int": [ + 1.9182496226885974, + 1.990544676483545 + ] + }, + "2": { + "effect": 1.9481280340486575, + "se": 0.02096634615031962, + "t_stat": 92.91690693654598, + "p_value": 0.005, + "conf_int": [ + 1.906567068053598, + 1.9878596828926625 + ] + }, + "3": { + "effect": 1.9782315410793267, + "se": 0.02128619886571603, + "t_stat": 92.93493655485412, + "p_value": 0.005, + "conf_int": [ + 1.9387377278473277, + 2.0162541344298517 + ] + }, + "4": { + "effect": 1.9400499359451828, + "se": 0.02893850714349226, + "t_stat": 67.04042908382938, + "p_value": 0.005, + "conf_int": [ + 1.8809443225358153, + 1.991730287211487 + ] + }, + "5": { + "effect": 1.967242841864014, + "se": 0.028899306302645814, + "t_stat": 68.0723205346942, + "p_value": 0.005, + "conf_int": [ + 1.9127788508721326, + 2.019934522626693 + ] + }, + "6": { + "effect": 1.947651168397633, + "se": 0.040076462538646024, + "t_stat": 48.598380321603955, + "p_value": 0.005, + "conf_int": [ + 1.8669352674463777, + 2.0239577902885184 + ] + }, + "7": { + "effect": 1.9527553566442448, + "se": 0.0416300929083308, + "t_stat": 46.90730239166652, + "p_value": 0.005, + "conf_int": [ + 1.876068943818699, + 2.037213244238435 + ] + } + }, + "group_effects": { + "3": { + "effect": 1.9295320405205874, + "se": 0.03044125859655524, + "t_stat": 63.38542259678234, + "p_value": 0.005, + "conf_int": [ + 1.8695941117699424, + 1.9867835997412717 + ] + }, + "5": { + "effect": 1.980830114174799, + "se": 0.0316586783231788, + "t_stat": 62.568313621751564, + "p_value": 0.005, + "conf_int": [ + 1.9193890597593606, + 2.0362551053682205 + ] + }, + "7": { + "effect": 1.9724360751947247, + "se": 0.03346783119193577, + "t_stat": 58.93528217836812, + "p_value": 0.005, + "conf_int": [ + 1.912440579136104, + 2.036883663702173 + ] + } + } + }, + "reg_10cov": { + "overall_att": 1.9566557260359592, + "overall_se": 0.017617770100273013, + "overall_p_value": 0.005, + "overall_ci": [ + 1.9227702268240927, + 1.9907402583049256 + ], + "group_time_effects": { + "3,2": { + "effect": 0.03702563192914326, + "se": 0.04271239910553472, + "t_stat": 0.8668591019122931, + "p_value": 0.37185929648241206, + "conf_int": [ + -0.04233835443572854, + 0.1193635291865463 + ] + }, + "3,3": { + "effect": 1.9111083835119633, + "se": 0.03859074016041854, + "t_stat": 49.52245993644182, + "p_value": 0.005, + "conf_int": [ + 1.8345843157012454, + 1.9762872637561884 + ] + }, + "3,4": { + "effect": 1.9457108936079388, + "se": 0.04068487435896952, + "t_stat": 47.82393762459736, + "p_value": 0.005, + "conf_int": [ + 1.8723822119375984, + 2.0263300714845567 + ] + }, + "3,5": { + "effect": 1.9358956805562748, + "se": 0.036423490753859956, + "t_stat": 53.149647123021, + "p_value": 0.005, + "conf_int": [ + 1.8649978595987466, + 2.001830986742302 + ] + }, + "3,6": { + "effect": 1.940205550863152, + "se": 0.042518297357036404, + "t_stat": 45.63224944241717, + "p_value": 0.005, + "conf_int": [ + 1.8531947274507945, + 2.023400280383427 + ] + }, + "3,7": { + "effect": 1.9116485540690111, + "se": 0.04220622299015639, + "t_stat": 45.29304966508987, + "p_value": 0.005, + "conf_int": [ + 1.829606217888013, + 1.9862233513921663 + ] + }, + "3,8": { + "effect": 1.8912826607720787, + "se": 0.04147603066495284, + "t_stat": 45.5994132141051, + "p_value": 0.005, + "conf_int": [ + 1.8122214628030524, + 1.9729135834816387 + ] + }, + "3,9": { + "effect": 1.9471818712406055, + "se": 0.04040693075370602, + "t_stat": 48.18930403571954, + "p_value": 0.005, + "conf_int": [ + 1.8649074850756278, + 2.0233868038346836 + ] + }, + "3,10": { + "effect": 1.9527202285173175, + "se": 0.04158546210884797, + "t_stat": 46.95680003281352, + "p_value": 0.005, + "conf_int": [ + 1.8760365653499609, + 2.036841344597345 + ] + }, + "5,2": { + "effect": 0.053082150132743304, + "se": 0.04146144898838814, + "t_stat": 1.2802772558096005, + "p_value": 0.18090452261306533, + "conf_int": [ + -0.024623964991874966, + 0.13824495037415957 + ] + }, + "5,3": { + "effect": -0.017657894534938566, + "se": 0.03783860420137821, + "t_stat": -0.4666634752424458, + "p_value": 0.4623115577889447, + "conf_int": [ + -0.0983483942908769, + 0.04914811435005241 + ] + }, + "5,4": { + "effect": 0.011343021087463231, + "se": 0.039167402776941404, + "t_stat": 0.28960360614314423, + "p_value": 0.6934673366834171, + "conf_int": [ + -0.0535941737586439, + 0.09461345553869693 + ] + }, + "5,5": { + "effect": 1.9879692308105694, + "se": 0.03917188067266219, + "t_stat": 50.74990520426967, + "p_value": 0.005, + "conf_int": [ + 1.9039150310749444, + 2.06181011825714 + ] + }, + "5,6": { + "effect": 1.9732675769372714, + "se": 0.041599206210279384, + "t_stat": 47.43522188771156, + "p_value": 0.005, + "conf_int": [ + 1.88286401136768, + 2.0502649329888554 + ] + }, + "5,7": { + "effect": 1.9577506376820681, + "se": 0.04246738666855547, + "t_stat": 46.100096833405196, + "p_value": 0.005, + "conf_int": [ + 1.8669836418521675, + 2.0348269488198487 + ] + }, + "5,8": { + "effect": 1.9562947011967855, + "se": 0.037931429908810044, + "t_stat": 51.57450446502708, + "p_value": 0.005, + "conf_int": [ + 1.8759242811369123, + 2.0256061692957745 + ] + }, + "5,9": { + "effect": 1.9683409324787837, + "se": 0.040531840338503815, + "t_stat": 48.56283149346489, + "p_value": 0.005, + "conf_int": [ + 1.8888823767179357, + 2.0391397473148816 + ] + }, + "5,10": { + "effect": 2.041364535190255, + "se": 0.04094098877071498, + "t_stat": 49.86114396559077, + "p_value": 0.005, + "conf_int": [ + 1.9617828632918555, + 2.12041281217794 + ] + }, + "7,2": { + "effect": 0.13700436831211227, + "se": 0.04154695580702951, + "t_stat": 3.2975789838477625, + "p_value": 0.005, + "conf_int": [ + 0.0538996330805439, + 0.21395813772445194 + ] + }, + "7,3": { + "effect": -0.017228088935761207, + "se": 0.03942447026490282, + "t_stat": -0.43698973809924124, + "p_value": 0.592964824120603, + "conf_int": [ + -0.10173778271150422, + 0.05321120153842712 + ] + }, + "7,4": { + "effect": -0.03334017777029221, + "se": 0.03567372342466162, + "t_stat": -0.9345864285992583, + "p_value": 0.4020100502512563, + "conf_int": [ + -0.09289696103308707, + 0.035540276036682125 + ] + }, + "7,5": { + "effect": 0.034427602350979865, + "se": 0.03587344672401607, + "t_stat": 0.9596959727856799, + "p_value": 0.3417085427135678, + "conf_int": [ + -0.03692621804363988, + 0.09813652671332922 + ] + }, + "7,6": { + "effect": -0.022356429688299997, + "se": 0.03851992120969604, + "t_stat": -0.5803861738604115, + "p_value": 0.4723618090452261, + "conf_int": [ + -0.09924454052869598, + 0.047427527531474466 + ] + }, + "7,7": { + "effect": 1.963184378929628, + "se": 0.04091704916606947, + "t_stat": 47.979617761820464, + "p_value": 0.005, + "conf_int": [ + 1.88982074426545, + 2.0395590231992053 + ] + }, + "7,8": { + "effect": 1.9429346533865302, + "se": 0.03981165094007116, + "t_stat": 48.803167100787846, + "p_value": 0.005, + "conf_int": [ + 1.8701440502648805, + 2.0215723506137713 + ] + }, + "7,9": { + "effect": 1.9519669433984925, + "se": 0.039883606458088, + "t_stat": 48.94158569761569, + "p_value": 0.005, + "conf_int": [ + 1.8861948300158835, + 2.0223889675285363 + ] + }, + "7,10": { + "effect": 2.0374700848671328, + "se": 0.04356680537898181, + "t_stat": 46.76657072152647, + "p_value": 0.005, + "conf_int": [ + 1.9486393039381165, + 2.110525091771145 + ] + } + }, + "event_study": { + "-5": { + "effect": 0.13700436831211227, + "se": 0.04154695580702951, + "t_stat": 3.2975789838477625, + "p_value": 0.005, + "conf_int": [ + 0.05389963308054399, + 0.21395813772445194 + ] + }, + "-4": { + "effect": -0.017228088935761207, + "se": 0.03942447026490282, + "t_stat": -0.43698973809924124, + "p_value": 0.592964824120603, + "conf_int": [ + -0.10173778271150424, + 0.05321120153842708 + ] + }, + "-3": { + "effect": 0.010746478169009557, + "se": 0.027164840113712733, + "t_stat": 0.39560248188557406, + "p_value": 0.5628140703517588, + "conf_int": [ + -0.04161271984847323, + 0.06427587968218289 + ] + }, + "-2": { + "effect": 0.007857207450099109, + "se": 0.025853408169967434, + "t_stat": 0.3039137973006754, + "p_value": 0.8341708542713567, + "conf_int": [ + -0.046817163578211016, + 0.05370805423565538 + ] + }, + "-1": { + "effect": 0.008813442279218545, + "se": 0.022180743697726234, + "t_stat": 0.39734656327695717, + "p_value": 0.6331658291457286, + "conf_int": [ + -0.03558032439498496, + 0.050340192809621376 + ] + }, + "0": { + "effect": 1.9543849382390315, + "se": 0.01993484324799262, + "t_stat": 98.03864088250768, + "p_value": 0.005, + "conf_int": [ + 1.9130250650340823, + 1.9906175153840378 + ] + }, + "1": { + "effect": 1.9542014458662638, + "se": 0.01767394472664407, + "t_stat": 110.56962529254939, + "p_value": 0.005, + "conf_int": [ + 1.9176147578778007, + 1.9911216599324477 + ] + }, + "2": { + "effect": 1.9486150876430068, + "se": 0.02100317943063112, + "t_stat": 92.77714805412455, + "p_value": 0.005, + "conf_int": [ + 1.906150493211738, + 1.9877976665971346 + ] + }, + "3": { + "effect": 1.9775542700229307, + "se": 0.02121022260390955, + "t_stat": 93.23590359954167, + "p_value": 0.005, + "conf_int": [ + 1.9378420920153618, + 2.0162784861589254 + ] + }, + "4": { + "effect": 1.9404075244053471, + "se": 0.028987696939872028, + "t_stat": 66.9390027234745, + "p_value": 0.005, + "conf_int": [ + 1.881332724141189, + 1.9924501078306944 + ] + }, + "5": { + "effect": 1.9674163544974772, + "se": 0.02906516976252517, + "t_stat": 67.68982842942627, + "p_value": 0.005, + "conf_int": [ + 1.910836045125161, + 2.0199235590823292 + ] + }, + "6": { + "effect": 1.9471818712406055, + "se": 0.04040693075370602, + "t_stat": 48.18930403571954, + "p_value": 0.005, + "conf_int": [ + 1.8649074850756278, + 2.0233868038346836 + ] + }, + "7": { + "effect": 1.9527202285173175, + "se": 0.04158546210884797, + "t_stat": 46.95680003281352, + "p_value": 0.005, + "conf_int": [ + 1.8760365653499609, + 2.036841344597345 + ] + } + }, + "group_effects": { + "3": { + "effect": 1.9294692278922927, + "se": 0.030613393479888402, + "t_stat": 63.02696331786496, + "p_value": 0.005, + "conf_int": [ + 1.8693199149275344, + 1.983526736406974 + ] + }, + "5": { + "effect": 1.9808312690492889, + "se": 0.03170591980009046, + "t_stat": 62.47512393706482, + "p_value": 0.005, + "conf_int": [ + 1.9183357288880067, + 2.036631351864895 + ] + }, + "7": { + "effect": 1.9738890151454458, + "se": 0.03333757547853297, + "t_stat": 59.20913524189814, + "p_value": 0.005, + "conf_int": [ + 1.912612275253619, + 2.036232874071883 + ] + } + } + }, + "dr_2cov": { + "overall_att": 1.9563674203452948, + "overall_se": 0.017519380498438885, + "overall_p_value": 0.005, + "overall_ci": [ + 1.9230673388714083, + 1.9894916152179676 + ], + "group_time_effects": { + "3,2": { + "effect": 0.03750649649846032, + "se": 0.042553683627236036, + "t_stat": 0.8813924741982779, + "p_value": 0.3417085427135678, + "conf_int": [ + -0.04139617262776924, + 0.12039083629911893 + ] + }, + "3,3": { + "effect": 1.909946618249025, + "se": 0.03879559196178789, + "t_stat": 49.23102140393286, + "p_value": 0.005, + "conf_int": [ + 1.834651760199381, + 1.9767055502837254 + ] + }, + "3,4": { + "effect": 1.94604073472221, + "se": 0.040646252638349596, + "t_stat": 47.87749444056074, + "p_value": 0.005, + "conf_int": [ + 1.8720965599165098, + 2.0274486747023692 + ] + }, + "3,5": { + "effect": 1.9369817451627276, + "se": 0.03631265939387887, + "t_stat": 53.34177604984887, + "p_value": 0.005, + "conf_int": [ + 1.8657759856246698, + 2.0026965420169844 + ] + }, + "3,6": { + "effect": 1.9409038460197021, + "se": 0.042020847252409906, + "t_stat": 46.18906978150925, + "p_value": 0.005, + "conf_int": [ + 1.8548363572714244, + 2.023206950686179 + ] + }, + "3,7": { + "effect": 1.9111970606830675, + "se": 0.04215517445124514, + "t_stat": 45.33718779632796, + "p_value": 0.005, + "conf_int": [ + 1.8317540385638944, + 1.9861691912210824 + ] + }, + "3,8": { + "effect": 1.8907391460016552, + "se": 0.041141212122272515, + "t_stat": 45.95730287144531, + "p_value": 0.005, + "conf_int": [ + 1.8101546131615982, + 1.9726885614243597 + ] + }, + "3,9": { + "effect": 1.9476575976206445, + "se": 0.04005279057361501, + "t_stat": 48.62726341229453, + "p_value": 0.005, + "conf_int": [ + 1.866739167929477, + 2.025160485606026 + ] + }, + "3,10": { + "effect": 1.9527663424053017, + "se": 0.04162937286162342, + "t_stat": 46.90837762308652, + "p_value": 0.005, + "conf_int": [ + 1.8753942052367765, + 2.0384099353518796 + ] + }, + "5,2": { + "effect": 0.05177540855064571, + "se": 0.04138078920380084, + "t_stat": 1.2511943234250864, + "p_value": 0.1708542713567839, + "conf_int": [ + -0.02556446698941195, + 0.1373431946798288 + ] + }, + "5,3": { + "effect": -0.019897462506654553, + "se": 0.03776632153560329, + "t_stat": -0.5268573082474209, + "p_value": 0.4321608040201005, + "conf_int": [ + -0.09974461627400466, + 0.047293172084784084 + ] + }, + "5,4": { + "effect": 0.014346139746199466, + "se": 0.039224608671636164, + "t_stat": 0.3657433491891877, + "p_value": 0.6030150753768844, + "conf_int": [ + -0.053942970194340314, + 0.0977366304052031 + ] + }, + "5,5": { + "effect": 1.985295037579256, + "se": 0.03909854174538685, + "t_stat": 50.776702888503415, + "p_value": 0.005, + "conf_int": [ + 1.9027909767166094, + 2.0592483168677043 + ] + }, + "5,6": { + "effect": 1.9726139909017086, + "se": 0.04150723810640706, + "t_stat": 47.52457838425091, + "p_value": 0.005, + "conf_int": [ + 1.8813175638710902, + 2.0507176540271663 + ] + }, + "5,7": { + "effect": 1.9591059979468466, + "se": 0.0423820486046726, + "t_stat": 46.22490092965577, + "p_value": 0.005, + "conf_int": [ + 1.8683695171382486, + 2.037000627646724 + ] + }, + "5,8": { + "effect": 1.9583485069466597, + "se": 0.038239386119659886, + "t_stat": 51.21286468403374, + "p_value": 0.005, + "conf_int": [ + 1.87985678227721, + 2.0275965324785616 + ] + }, + "5,9": { + "effect": 1.9680728482414127, + "se": 0.040433228859158385, + "t_stat": 48.67463973992351, + "p_value": 0.005, + "conf_int": [ + 1.8889600469316306, + 2.0378964492524645 + ] + }, + "5,10": { + "effect": 2.041542473273583, + "se": 0.04090327871367535, + "t_stat": 49.91146278430307, + "p_value": 0.005, + "conf_int": [ + 1.9626750417759662, + 2.1220209763813966 + ] + }, + "7,2": { + "effect": 0.13846735651084777, + "se": 0.04179803263811866, + "t_stat": 3.3127721036460778, + "p_value": 0.005, + "conf_int": [ + 0.05298676509491988, + 0.21663259979927646 + ] + }, + "7,3": { + "effect": -0.020476437240900484, + "se": 0.03992943705754821, + "t_stat": -0.5128155754209324, + "p_value": 0.5628140703517588, + "conf_int": [ + -0.10465639792639839, + 0.05303235697429726 + ] + }, + "7,4": { + "effect": -0.03259221972058846, + "se": 0.03586760612737067, + "t_stat": -0.9086812095808436, + "p_value": 0.4321608040201005, + "conf_int": [ + -0.09633039350595662, + 0.036688975823018624 + ] + }, + "7,5": { + "effect": 0.031516709425282396, + "se": 0.03570024858040153, + "t_stat": 0.8828148452328766, + "p_value": 0.35175879396984927, + "conf_int": [ + -0.039920074740144994, + 0.09391325324408069 + ] + }, + "7,6": { + "effect": -0.0248176132034209, + "se": 0.03867304753388797, + "t_stat": -0.6417289245610657, + "p_value": 0.4120603015075377, + "conf_int": [ + -0.1042984040074484, + 0.050068412221070585 + ] + }, + "7,7": { + "effect": 1.9629849703601747, + "se": 0.04114245866354805, + "t_stat": 47.71190235403619, + "p_value": 0.005, + "conf_int": [ + 1.887343920050067, + 2.0408960645648495 + ] + }, + "7,8": { + "effect": 1.9421658097107328, + "se": 0.03995054458035153, + "t_stat": 48.614251197615175, + "p_value": 0.005, + "conf_int": [ + 1.8678335827357593, + 2.020387694325642 + ] + }, + "7,9": { + "effect": 1.9479630002274304, + "se": 0.03996314877982817, + "t_stat": 48.74398188589898, + "p_value": 0.005, + "conf_int": [ + 1.880622292881371, + 2.0204771535651664 + ] + }, + "7,10": { + "effect": 2.036679145649886, + "se": 0.04377297071508097, + "t_stat": 46.52823677210912, + "p_value": 0.005, + "conf_int": [ + 1.949034268449787, + 2.1112358988175512 + ] + } + }, + "event_study": { + "-5": { + "effect": 0.13846735651084777, + "se": 0.04179803263811865, + "t_stat": 3.3127721036460787, + "p_value": 0.005, + "conf_int": [ + 0.052986765094919924, + 0.2166325997992764 + ] + }, + "-4": { + "effect": -0.020476437240900484, + "se": 0.039929437057548214, + "t_stat": -0.5128155754209323, + "p_value": 0.5628140703517588, + "conf_int": [ + -0.10465639792639836, + 0.05303235697429719 + ] + }, + "-3": { + "effect": 0.010446271491798397, + "se": 0.027115431661206252, + "t_stat": 0.3852518972339932, + "p_value": 0.5628140703517588, + "conf_int": [ + -0.041036597264371415, + 0.06347327404434266 + ] + }, + "-2": { + "effect": 0.00528877778578076, + "se": 0.02572106564409154, + "t_stat": 0.20562047696478938, + "p_value": 0.914572864321608, + "conf_int": [ + -0.050218184792007715, + 0.05122095711030583 + ] + }, + "-1": { + "effect": 0.009190968247220445, + "se": 0.022044847544008324, + "t_stat": 0.41692137942312524, + "p_value": 0.6432160804020101, + "conf_int": [ + -0.03539481074279779, + 0.05295611961981317 + ] + }, + "0": { + "effect": 1.9530224983579727, + "se": 0.019940224706130445, + "t_stat": 97.94385605682433, + "p_value": 0.005, + "conf_int": [ + 1.911888450420723, + 1.9900799835270297 + ] + }, + "1": { + "effect": 1.9538359357398407, + "se": 0.017576385348197894, + "t_stat": 111.16255686441055, + "p_value": 0.005, + "conf_int": [ + 1.9175803111717018, + 1.9901390505681897 + ] + }, + "2": { + "effect": 1.9481257021980083, + "se": 0.02096961071198, + "t_stat": 92.90233037492864, + "p_value": 0.005, + "conf_int": [ + 1.9061448797140914, + 1.987698014826136 + ] + }, + "3": { + "effect": 1.9782271262925608, + "se": 0.0212268494107051, + "t_stat": 93.19457108387002, + "p_value": 0.005, + "conf_int": [ + 1.9383350181831043, + 2.016365343386242 + ] + }, + "4": { + "effect": 1.940049071008396, + "se": 0.028935660828047872, + "t_stat": 67.0469937609951, + "p_value": 0.005, + "conf_int": [ + 1.8810149133513567, + 1.9918254365459642 + ] + }, + "5": { + "effect": 1.9672388191020964, + "se": 0.02887423122372237, + "t_stat": 68.13129685980559, + "p_value": 0.005, + "conf_int": [ + 1.9131963086514905, + 2.019982112410836 + ] + }, + "6": { + "effect": 1.9476575976206445, + "se": 0.04005279057361501, + "t_stat": 48.62726341229453, + "p_value": 0.005, + "conf_int": [ + 1.866739167929477, + 2.025160485606026 + ] + }, + "7": { + "effect": 1.9527663424053017, + "se": 0.04162937286162344, + "t_stat": 46.90837762308651, + "p_value": 0.005, + "conf_int": [ + 1.8753942052367765, + 2.03840993535188 + ] + } + }, + "group_effects": { + "3": { + "effect": 1.9295291363580418, + "se": 0.030432814553369162, + "t_stat": 63.40291440919082, + "p_value": 0.005, + "conf_int": [ + 1.869928417389561, + 1.9864120974980581 + ] + }, + "5": { + "effect": 1.9808298091482444, + "se": 0.03166813271404776, + "t_stat": 62.54962447689763, + "p_value": 0.005, + "conf_int": [ + 1.9193431683117772, + 2.0362183499128976 + ] + }, + "7": { + "effect": 1.9724482314870557, + "se": 0.03348899468270557, + "t_stat": 58.89840080824135, + "p_value": 0.005, + "conf_int": [ + 1.9113054655520512, + 2.0358791270186636 + ] + } + } + }, + "ipw_2cov": { + "overall_att": 1.956350012546311, + "overall_se": 0.01751812056015621, + "overall_p_value": 0.005, + "overall_ci": [ + 1.9230091623591297, + 1.9895779073223603 + ], + "group_time_effects": { + "3,2": { + "effect": 0.03750591492543812, + "se": 0.042518197796357654, + "t_stat": 0.8821144091072244, + "p_value": 0.3417085427135678, + "conf_int": [ + -0.041550653781724727, + 0.12028608966962345 + ] + }, + "3,3": { + "effect": 1.9099557001686662, + "se": 0.03884659691219914, + "t_stat": 49.16661566225575, + "p_value": 0.005, + "conf_int": [ + 1.8342933661226222, + 1.976845669537206 + ] + }, + "3,4": { + "effect": 1.9460305588517919, + "se": 0.0406737475585759, + "t_stat": 47.84487969910408, + "p_value": 0.005, + "conf_int": [ + 1.8717511010218841, + 2.0279469021635643 + ] + }, + "3,5": { + "effect": 1.9369651647270494, + "se": 0.03634732852450516, + "t_stat": 53.29044095829928, + "p_value": 0.005, + "conf_int": [ + 1.8660360030513377, + 2.0037890569703034 + ] + }, + "3,6": { + "effect": 1.9408969783523893, + "se": 0.04195815551720851, + "t_stat": 46.257919453975035, + "p_value": 0.005, + "conf_int": [ + 1.8558804613161783, + 2.0215117803333316 + ] + }, + "3,7": { + "effect": 1.9111619189117817, + "se": 0.042224231063294926, + "t_stat": 45.26220776044242, + "p_value": 0.005, + "conf_int": [ + 1.8296790149715403, + 1.9875848763864656 + ] + }, + "3,8": { + "effect": 1.8907106360622072, + "se": 0.041148306548054146, + "t_stat": 45.94868646305486, + "p_value": 0.005, + "conf_int": [ + 1.8113935673819381, + 1.974792455752688 + ] + }, + "3,9": { + "effect": 1.9476382072605487, + "se": 0.04002784910144821, + "t_stat": 48.657078783433384, + "p_value": 0.005, + "conf_int": [ + 1.8666274687612792, + 2.025154758629858 + ] + }, + "3,10": { + "effect": 1.9527460486655963, + "se": 0.04159807771271733, + "t_stat": 46.943179974602636, + "p_value": 0.005, + "conf_int": [ + 1.8762899227106327, + 2.038609662916976 + ] + }, + "5,2": { + "effect": 0.05177602383247171, + "se": 0.0413771991743856, + "t_stat": 1.2513177514567844, + "p_value": 0.1708542713567839, + "conf_int": [ + -0.02570362042468869, + 0.13726791936254307 + ] + }, + "5,3": { + "effect": -0.019872088090072477, + "se": 0.03776897646180914, + "t_stat": -0.5261484411727847, + "p_value": 0.4321608040201005, + "conf_int": [ + -0.09933057466023898, + 0.0473019666288374 + ] + }, + "5,4": { + "effect": 0.01434466530278397, + "se": 0.03920879479328077, + "t_stat": 0.36585325762785803, + "p_value": 0.6030150753768844, + "conf_int": [ + -0.05297250858928486, + 0.09749261079642187 + ] + }, + "5,5": { + "effect": 1.985295180796935, + "se": 0.039090143515122744, + "t_stat": 50.78761555400499, + "p_value": 0.005, + "conf_int": [ + 1.9026815868265547, + 2.059339269010279 + ] + }, + "5,6": { + "effect": 1.9726138709057324, + "se": 0.04159450804953158, + "t_stat": 47.424863603548424, + "p_value": 0.005, + "conf_int": [ + 1.8801282786838058, + 2.0493944241829407 + ] + }, + "5,7": { + "effect": 1.9591063359993746, + "se": 0.04232365480074618, + "t_stat": 46.288685257040584, + "p_value": 0.005, + "conf_int": [ + 1.8681959807507005, + 2.036817433737284 + ] + }, + "5,8": { + "effect": 1.9583491233117791, + "se": 0.03827291657903161, + "t_stat": 51.16801379032321, + "p_value": 0.005, + "conf_int": [ + 1.8802155364622648, + 2.0275615356547942 + ] + }, + "5,9": { + "effect": 1.968071911786362, + "se": 0.040541243535223266, + "t_stat": 48.54493202894606, + "p_value": 0.005, + "conf_int": [ + 1.8877035993732585, + 2.0381814464033035 + ] + }, + "5,10": { + "effect": 2.041541842897819, + "se": 0.04099283726263381, + "t_stat": 49.80240401068177, + "p_value": 0.005, + "conf_int": [ + 1.9611288039893604, + 2.123137072433841 + ] + }, + "7,2": { + "effect": 0.13846484551662042, + "se": 0.04179947293576519, + "t_stat": 3.312597882021252, + "p_value": 0.005, + "conf_int": [ + 0.053149444907736376, + 0.21638933964106033 + ] + }, + "7,3": { + "effect": -0.02046958124538068, + "se": 0.039924819103300965, + "t_stat": -0.5127031682327213, + "p_value": 0.5628140703517588, + "conf_int": [ + -0.10397608879239821, + 0.05256708137933298 + ] + }, + "7,4": { + "effect": -0.032574370552012094, + "se": 0.035974248437181736, + "t_stat": -0.905491343589665, + "p_value": 0.4321608040201005, + "conf_int": [ + -0.09639213070348161, + 0.0384081558429274 + ] + }, + "7,5": { + "effect": 0.03151739486171315, + "se": 0.03571021363168788, + "t_stat": 0.8825876872869172, + "p_value": 0.35175879396984927, + "conf_int": [ + -0.0398473545950904, + 0.09396491762777813 + ] + }, + "7,6": { + "effect": -0.024841851505858402, + "se": 0.03878452666493048, + "t_stat": -0.6405093381820425, + "p_value": 0.4120603015075377, + "conf_int": [ + -0.10402420962249302, + 0.05093514712984091 + ] + }, + "7,7": { + "effect": 1.962930085053292, + "se": 0.04117582698732488, + "t_stat": 47.67190433497642, + "p_value": 0.005, + "conf_int": [ + 1.8876073315033268, + 2.040658349851558 + ] + }, + "7,8": { + "effect": 1.9421142107334253, + "se": 0.03993723064287941, + "t_stat": 48.629165805208224, + "p_value": 0.005, + "conf_int": [ + 1.8674460477635801, + 2.020409793017826 + ] + }, + "7,9": { + "effect": 1.9479039258303523, + "se": 0.039923454540999256, + "t_stat": 48.7909663185574, + "p_value": 0.005, + "conf_int": [ + 1.8811828685280696, + 2.022025613979884 + ] + }, + "7,10": { + "effect": 2.036655404967283, + "se": 0.04370357593750816, + "t_stat": 46.601573470315124, + "p_value": 0.005, + "conf_int": [ + 1.949308036293503, + 2.111056942737731 + ] + } + }, + "event_study": { + "-5": { + "effect": 0.13846484551662042, + "se": 0.0417994729357652, + "t_stat": 3.3125978820212514, + "p_value": 0.005, + "conf_int": [ + 0.05314944490773647, + 0.21638933964106033 + ] + }, + "-4": { + "effect": -0.02046958124538068, + "se": 0.039924819103300965, + "t_stat": -0.5127031682327213, + "p_value": 0.5628140703517588, + "conf_int": [ + -0.10397608879239825, + 0.05256708137933303 + ] + }, + "-3": { + "effect": 0.01045532913098536, + "se": 0.027090783345708334, + "t_stat": 0.38593675928686916, + "p_value": 0.5829145728643216, + "conf_int": [ + -0.041088751681154466, + 0.06388081905274846 + ] + }, + "-2": { + "effect": 0.0053020578213137815, + "se": 0.025721678024746814, + "t_stat": 0.20613187896266622, + "p_value": 0.914572864321608, + "conf_int": [ + -0.05013940813560796, + 0.051628500926228345 + ] + }, + "-1": { + "effect": 0.009182332978337437, + "se": 0.022039728375415114, + "t_stat": 0.41662641308139486, + "p_value": 0.6432160804020101, + "conf_int": [ + -0.03551834793327227, + 0.0530165043817704 + ] + }, + "0": { + "effect": 1.9530075777174178, + "se": 0.019977256413481, + "t_stat": 97.76155130088304, + "p_value": 0.005, + "conf_int": [ + 1.9119634145945312, + 1.9897969011372811 + ] + }, + "1": { + "effect": 1.9538156212861473, + "se": 0.017617490452046733, + "t_stat": 110.90203945926704, + "p_value": 0.005, + "conf_int": [ + 1.9177292736468305, + 1.991192204972325 + ] + }, + "2": { + "effect": 1.9481009733333257, + "se": 0.020981502891774653, + "t_stat": 92.84849533333653, + "p_value": 0.005, + "conf_int": [ + 1.9060147530633686, + 1.9872038477932066 + ] + }, + "3": { + "effect": 1.9782172845127244, + "se": 0.021198378658895317, + "t_stat": 93.31927296631339, + "p_value": 0.005, + "conf_int": [ + 1.9387096901105774, + 2.016764277760435 + ] + }, + "4": { + "effect": 1.9400312809465028, + "se": 0.028947264022410057, + "t_stat": 67.01950413844264, + "p_value": 0.005, + "conf_int": [ + 1.8802970660813891, + 1.991207574156113 + ] + }, + "5": { + "effect": 1.9672244519375237, + "se": 0.02886830332427285, + "t_stat": 68.1447894543721, + "p_value": 0.005, + "conf_int": [ + 1.9154206826658964, + 2.0202879899554596 + ] + }, + "6": { + "effect": 1.9476382072605487, + "se": 0.04002784910144821, + "t_stat": 48.657078783433384, + "p_value": 0.005, + "conf_int": [ + 1.8666274687612792, + 2.025154758629858 + ] + }, + "7": { + "effect": 1.9527460486655963, + "se": 0.04159807771271733, + "t_stat": 46.943179974602636, + "p_value": 0.005, + "conf_int": [ + 1.8762899227106327, + 2.0386096629169765 + ] + } + }, + "group_effects": { + "3": { + "effect": 1.9295131516250037, + "se": 0.030428651624796504, + "t_stat": 63.41106321164199, + "p_value": 0.005, + "conf_int": [ + 1.8693318914387955, + 1.9852891590817527 + ] + }, + "5": { + "effect": 1.980829710949667, + "se": 0.031711555960950315, + "t_stat": 62.463970969726795, + "p_value": 0.005, + "conf_int": [ + 1.9183682933830388, + 2.035608652828243 + ] + }, + "7": { + "effect": 1.972400906646088, + "se": 0.033466935554797926, + "t_stat": 58.93580855099589, + "p_value": 0.005, + "conf_int": [ + 1.9108155783646885, + 2.035952061830552 + ] + } + } + }, + "ipw_2cov_nyt": { + "overall_att": 1.9589825790728579, + "overall_se": 0.017211848671288586, + "overall_p_value": 0.005, + "overall_ci": [ + 1.927693563702625, + 1.9897581208137631 + ], + "group_time_effects": { + "3,2": { + "effect": -0.02496662401015908, + "se": 0.03442108950402342, + "t_stat": -0.725329278355375, + "p_value": 0.4120603015075377, + "conf_int": [ + -0.0932177093473173, + 0.03756643791962572 + ] + }, + "3,3": { + "effect": 1.9235115150874162, + "se": 0.033359676216926676, + "t_stat": 57.65977770825689, + "p_value": 0.005, + "conf_int": [ + 1.8591146542896935, + 1.9837288683801948 + ] + }, + "3,4": { + "effect": 1.965571748636836, + "se": 0.03313867665105778, + "t_stat": 59.31352568280952, + "p_value": 0.005, + "conf_int": [ + 1.907643382765451, + 2.036164161572371 + ] + }, + "3,5": { + "effect": 1.9480945624848207, + "se": 0.033002818270324676, + "t_stat": 59.02812743227143, + "p_value": 0.005, + "conf_int": [ + 1.8863889427187914, + 2.003672042141776 + ] + }, + "3,6": { + "effect": 1.9642458208072717, + "se": 0.03550239623458118, + "t_stat": 55.32713363426422, + "p_value": 0.005, + "conf_int": [ + 1.9034013152799873, + 2.035103596082 + ] + }, + "3,7": { + "effect": 1.9111619189117817, + "se": 0.042224231063294926, + "t_stat": 45.26220776044242, + "p_value": 0.005, + "conf_int": [ + 1.8296790149715403, + 1.9875848763864656 + ] + }, + "3,8": { + "effect": 1.8907106360622072, + "se": 0.041148306548054146, + "t_stat": 45.94868646305486, + "p_value": 0.005, + "conf_int": [ + 1.8113935673819381, + 1.974792455752688 + ] + }, + "3,9": { + "effect": 1.9476382072605487, + "se": 0.04002784910144821, + "t_stat": 48.657078783433384, + "p_value": 0.005, + "conf_int": [ + 1.8666274687612792, + 2.025154758629858 + ] + }, + "3,10": { + "effect": 1.9527460486655963, + "se": 0.04159807771271733, + "t_stat": 46.943179974602636, + "p_value": 0.005, + "conf_int": [ + 1.8762899227106327, + 2.038609662916976 + ] + }, + "5,2": { + "effect": -0.0060375406271469985, + "se": 0.03530008386358107, + "t_stat": -0.17103473891108514, + "p_value": 1.0, + "conf_int": [ + -0.07282947197318143, + 0.07069746894264468 + ] + }, + "5,3": { + "effect": -0.01001348021364359, + "se": 0.03380264093060639, + "t_stat": -0.2962336651210275, + "p_value": 0.6834170854271356, + "conf_int": [ + -0.08446622217809617, + 0.050420842404559324 + ] + }, + "5,4": { + "effect": 0.03024307748195834, + "se": 0.03513794900312327, + "t_stat": 0.8606955824106342, + "p_value": 0.3417085427135678, + "conf_int": [ + -0.039336511476988176, + 0.0981855672363105 + ] + }, + "5,5": { + "effect": 1.9698570439291214, + "se": 0.03297543686742949, + "t_stat": 59.737102251245354, + "p_value": 0.005, + "conf_int": [ + 1.9017872677563412, + 2.0255085613197488 + ] + }, + "5,6": { + "effect": 1.9687799942125177, + "se": 0.03510654238469603, + "t_stat": 56.08014519455401, + "p_value": 0.005, + "conf_int": [ + 1.893645802455528, + 2.0353871260189265 + ] + }, + "5,7": { + "effect": 1.9591063359993746, + "se": 0.04232365480074618, + "t_stat": 46.288685257040584, + "p_value": 0.005, + "conf_int": [ + 1.8681959807507005, + 2.036817433737284 + ] + }, + "5,8": { + "effect": 1.9583491233117791, + "se": 0.03827291657903161, + "t_stat": 51.16801379032321, + "p_value": 0.005, + "conf_int": [ + 1.8802155364622648, + 2.0275615356547942 + ] + }, + "5,9": { + "effect": 1.968071911786362, + "se": 0.040541243535223266, + "t_stat": 48.54493202894606, + "p_value": 0.005, + "conf_int": [ + 1.8877035993732585, + 2.0381814464033035 + ] + }, + "5,10": { + "effect": 2.041541842897819, + "se": 0.04099283726263381, + "t_stat": 49.80240401068177, + "p_value": 0.005, + "conf_int": [ + 1.9611288039893604, + 2.123137072433841 + ] + }, + "7,2": { + "effect": 0.10902668260830736, + "se": 0.035049239590010134, + "t_stat": 3.1106718400641866, + "p_value": 0.005, + "conf_int": [ + 0.03292778819453042, + 0.17578051435712166 + ] + }, + "7,3": { + "effect": -0.010588290676054816, + "se": 0.03561253856493372, + "t_stat": -0.29731917753486675, + "p_value": 0.6934673366834171, + "conf_int": [ + -0.0844012943083568, + 0.0590608725472134 + ] + }, + "7,4": { + "effect": -0.03998927313399908, + "se": 0.03240928350663836, + "t_stat": -1.2338832830354958, + "p_value": 0.22110552763819097, + "conf_int": [ + -0.09901195934461186, + 0.03686205776812521 + ] + }, + "7,5": { + "effect": 0.03151739486171315, + "se": 0.03571021363168788, + "t_stat": 0.8825876872869172, + "p_value": 0.35175879396984927, + "conf_int": [ + -0.0398473545950904, + 0.09396491762777813 + ] + }, + "7,6": { + "effect": -0.024841851505858402, + "se": 0.03878452666493048, + "t_stat": -0.6405093381820425, + "p_value": 0.4120603015075377, + "conf_int": [ + -0.10402420962249302, + 0.05093514712984091 + ] + }, + "7,7": { + "effect": 1.962930085053292, + "se": 0.04117582698732488, + "t_stat": 47.67190433497642, + "p_value": 0.005, + "conf_int": [ + 1.8876073315033268, + 2.040658349851558 + ] + }, + "7,8": { + "effect": 1.9421142107334253, + "se": 0.03993723064287941, + "t_stat": 48.629165805208224, + "p_value": 0.005, + "conf_int": [ + 1.8674460477635801, + 2.020409793017826 + ] + }, + "7,9": { + "effect": 1.9479039258303523, + "se": 0.039923454540999256, + "t_stat": 48.7909663185574, + "p_value": 0.005, + "conf_int": [ + 1.8811828685280696, + 2.022025613979884 + ] + }, + "7,10": { + "effect": 2.036655404967283, + "se": 0.04370357593750816, + "t_stat": 46.601573470315124, + "p_value": 0.005, + "conf_int": [ + 1.949308036293503, + 2.111056942737731 + ] + } + }, + "event_study": { + "-5": { + "effect": 0.10902668260830736, + "se": 0.035049239590010134, + "t_stat": 3.1106718400641866, + "p_value": 0.005, + "conf_int": [ + 0.03292778819453042, + 0.17578051435712158 + ] + }, + "-4": { + "effect": -0.010588290676054816, + "se": 0.035612538564933724, + "t_stat": -0.2973191775348667, + "p_value": 0.6934673366834171, + "conf_int": [ + -0.08440129430835683, + 0.05906087254721329 + ] + }, + "-3": { + "effect": -0.022669462549259896, + "se": 0.023512963438233402, + "t_stat": -0.9641261344539018, + "p_value": 0.45226130653266333, + "conf_int": [ + -0.06547652425277392, + 0.023272666345586578 + ] + }, + "-2": { + "effect": 0.010331233313742801, + "se": 0.024488082030849893, + "t_stat": 0.42188821896004736, + "p_value": 0.7537688442211056, + "conf_int": [ + -0.04792069004038909, + 0.05242866250392419 + ] + }, + "-1": { + "effect": -0.006092986806945554, + "se": 0.02071752551683161, + "t_stat": -0.29409819246972363, + "p_value": 0.7236180904522613, + "conf_int": [ + -0.04309789738950867, + 0.028419529221368155 + ] + }, + "0": { + "effect": 1.9522327528742873, + "se": 0.01816985944688022, + "t_stat": 107.44347024706828, + "p_value": 0.005, + "conf_int": [ + 1.9187909594852097, + 1.9852572798333659 + ] + }, + "1": { + "effect": 1.958982246415351, + "se": 0.019487362101500493, + "t_stat": 100.52577851286054, + "p_value": 0.005, + "conf_int": [ + 1.9215148310333086, + 1.9975772378916883 + ] + }, + "2": { + "effect": 1.9517883794314739, + "se": 0.020762497546605186, + "t_stat": 94.00547188749, + "p_value": 0.005, + "conf_int": [ + 1.9131528127031805, + 1.9908815353523261 + ] + }, + "3": { + "effect": 1.9859532524354018, + "se": 0.02241034908210397, + "t_stat": 88.61768485441874, + "p_value": 0.005, + "conf_int": [ + 1.9464153398161292, + 2.0240460195318057 + ] + }, + "4": { + "effect": 1.9400312809465028, + "se": 0.028947264022410057, + "t_stat": 67.01950413844264, + "p_value": 0.005, + "conf_int": [ + 1.8802970660813891, + 1.991207574156113 + ] + }, + "5": { + "effect": 1.9672244519375237, + "se": 0.02886830332427285, + "t_stat": 68.1447894543721, + "p_value": 0.005, + "conf_int": [ + 1.9154206826658964, + 2.0202879899554596 + ] + }, + "6": { + "effect": 1.9476382072605487, + "se": 0.04002784910144821, + "t_stat": 48.657078783433384, + "p_value": 0.005, + "conf_int": [ + 1.8666274687612792, + 2.025154758629858 + ] + }, + "7": { + "effect": 1.9527460486655963, + "se": 0.04159807771271733, + "t_stat": 46.943179974602636, + "p_value": 0.005, + "conf_int": [ + 1.8762899227106327, + 2.0386096629169765 + ] + } + }, + "group_effects": { + "3": { + "effect": 1.9379600572395599, + "se": 0.027359714676020434, + "t_stat": 70.8326121155823, + "p_value": 0.005, + "conf_int": [ + 1.8889964928323424, + 1.9847938462368524 + ] + }, + "5": { + "effect": 1.9776177086894955, + "se": 0.029511266225510993, + "t_stat": 67.01229603560505, + "p_value": 0.005, + "conf_int": [ + 1.9223093399752835, + 2.0294569469461097 + ] + }, + "7": { + "effect": 1.972400906646088, + "se": 0.033466935554797926, + "t_stat": 58.93580855099589, + "p_value": 0.005, + "conf_int": [ + 1.9108155783646885, + 2.035952061830552 + ] + } + } + }, + "dr_2cov_nyt": { + "overall_att": 1.9589988131956368, + "overall_se": 0.017198565143707635, + "overall_p_value": 0.005, + "overall_ci": [ + 1.9276866483662152, + 1.9898153330698811 + ], + "group_time_effects": { + "3,2": { + "effect": -0.024967009970231643, + "se": 0.034375834476103595, + "t_stat": -0.7262953860098289, + "p_value": 0.4221105527638191, + "conf_int": [ + -0.09292087141075185, + 0.03729493527799852 + ] + }, + "3,3": { + "effect": 1.9235111894790704, + "se": 0.033354989669843384, + "t_stat": 57.6678694407793, + "p_value": 0.005, + "conf_int": [ + 1.8591496806215322, + 1.9837202453002745 + ] + }, + "3,4": { + "effect": 1.9655687085814022, + "se": 0.03317511347734599, + "t_stat": 59.248288929700685, + "p_value": 0.005, + "conf_int": [ + 1.9073070785938464, + 2.0355769912261645 + ] + }, + "3,5": { + "effect": 1.9481008688943244, + "se": 0.03299503683166252, + "t_stat": 59.04223955964487, + "p_value": 0.005, + "conf_int": [ + 1.8857955923566014, + 2.0036739509367596 + ] + }, + "3,6": { + "effect": 1.9642447505910872, + "se": 0.03550588757625699, + "t_stat": 55.32166310087091, + "p_value": 0.005, + "conf_int": [ + 1.9031798332625876, + 2.0349276126964804 + ] + }, + "3,7": { + "effect": 1.9111970606830675, + "se": 0.04215517445124514, + "t_stat": 45.33718779632796, + "p_value": 0.005, + "conf_int": [ + 1.8317540385638944, + 1.9861691912210824 + ] + }, + "3,8": { + "effect": 1.8907391460016552, + "se": 0.041141212122272515, + "t_stat": 45.95730287144531, + "p_value": 0.005, + "conf_int": [ + 1.8101546131615982, + 1.9726885614243597 + ] + }, + "3,9": { + "effect": 1.9476575976206445, + "se": 0.04005279057361501, + "t_stat": 48.62726341229453, + "p_value": 0.005, + "conf_int": [ + 1.866739167929477, + 2.025160485606026 + ] + }, + "3,10": { + "effect": 1.9527663424053017, + "se": 0.04162937286162342, + "t_stat": 46.90837762308652, + "p_value": 0.005, + "conf_int": [ + 1.8753942052367765, + 2.0384099353518796 + ] + }, + "5,2": { + "effect": -0.006037903440633564, + "se": 0.035305026252725066, + "t_stat": -0.17102107211058992, + "p_value": 1.0, + "conf_int": [ + -0.07286530356210216, + 0.07078807949668509 + ] + }, + "5,3": { + "effect": -0.010019839744307896, + "se": 0.033778245600191145, + "t_stat": -0.29663588402149565, + "p_value": 0.6733668341708543, + "conf_int": [ + -0.08409476442485322, + 0.050528584862707955 + ] + }, + "5,4": { + "effect": 0.030251503825274532, + "se": 0.0351071416807902, + "t_stat": 0.8616908804577342, + "p_value": 0.3316582914572864, + "conf_int": [ + -0.037883155279237926, + 0.09888375281601194 + ] + }, + "5,5": { + "effect": 1.9698570376024231, + "se": 0.0329857883744562, + "t_stat": 59.71835553058532, + "p_value": 0.005, + "conf_int": [ + 1.9017919849001035, + 2.025841142548297 + ] + }, + "5,6": { + "effect": 1.9687813278492947, + "se": 0.03505019869571444, + "t_stat": 56.1703328686127, + "p_value": 0.005, + "conf_int": [ + 1.8946177010505283, + 2.0355230703402873 + ] + }, + "5,7": { + "effect": 1.9591059979468466, + "se": 0.0423820486046726, + "t_stat": 46.22490092965577, + "p_value": 0.005, + "conf_int": [ + 1.8683695171382486, + 2.037000627646724 + ] + }, + "5,8": { + "effect": 1.9583485069466597, + "se": 0.038239386119659886, + "t_stat": 51.21286468403374, + "p_value": 0.005, + "conf_int": [ + 1.87985678227721, + 2.0275965324785616 + ] + }, + "5,9": { + "effect": 1.9680728482414127, + "se": 0.040433228859158385, + "t_stat": 48.67463973992351, + "p_value": 0.005, + "conf_int": [ + 1.8889600469316306, + 2.0378964492524645 + ] + }, + "5,10": { + "effect": 2.041542473273583, + "se": 0.04090327871367535, + "t_stat": 49.91146278430307, + "p_value": 0.005, + "conf_int": [ + 1.9626750417759662, + 2.1220209763813966 + ] + }, + "7,2": { + "effect": 0.1090259892108355, + "se": 0.03505099943083932, + "t_stat": 3.110495876899588, + "p_value": 0.005, + "conf_int": [ + 0.03327819307450883, + 0.17530573202082425 + ] + }, + "7,3": { + "effect": -0.010588144108126162, + "se": 0.03557335307032487, + "t_stat": -0.29764256653553256, + "p_value": 0.6934673366834171, + "conf_int": [ + -0.0844244380004211, + 0.058986361668644044 + ] + }, + "7,4": { + "effect": -0.039989874998767785, + "se": 0.032333526843147026, + "t_stat": -1.236792855686991, + "p_value": 0.22110552763819097, + "conf_int": [ + -0.09882584974171368, + 0.03719313909336867 + ] + }, + "7,5": { + "effect": 0.031516709425282396, + "se": 0.03570024858040153, + "t_stat": 0.8828148452328766, + "p_value": 0.35175879396984927, + "conf_int": [ + -0.039920074740144994, + 0.09391325324408069 + ] + }, + "7,6": { + "effect": -0.0248176132034209, + "se": 0.03867304753388797, + "t_stat": -0.6417289245610657, + "p_value": 0.4120603015075377, + "conf_int": [ + -0.1042984040074484, + 0.050068412221070585 + ] + }, + "7,7": { + "effect": 1.9629849703601747, + "se": 0.04114245866354805, + "t_stat": 47.71190235403619, + "p_value": 0.005, + "conf_int": [ + 1.887343920050067, + 2.0408960645648495 + ] + }, + "7,8": { + "effect": 1.9421658097107328, + "se": 0.03995054458035153, + "t_stat": 48.614251197615175, + "p_value": 0.005, + "conf_int": [ + 1.8678335827357593, + 2.020387694325642 + ] + }, + "7,9": { + "effect": 1.9479630002274304, + "se": 0.03996314877982817, + "t_stat": 48.74398188589898, + "p_value": 0.005, + "conf_int": [ + 1.880622292881371, + 2.0204771535651664 + ] + }, + "7,10": { + "effect": 2.036679145649886, + "se": 0.04377297071508097, + "t_stat": 46.52823677210912, + "p_value": 0.005, + "conf_int": [ + 1.949034268449787, + 2.1112358988175512 + ] + } + }, + "event_study": { + "-5": { + "effect": 0.1090259892108355, + "se": 0.03505099943083932, + "t_stat": 3.110495876899588, + "p_value": 0.005, + "conf_int": [ + 0.03327819307450884, + 0.17530573202082422 + ] + }, + "-4": { + "effect": -0.010588144108126162, + "se": 0.03557335307032487, + "t_stat": -0.29764256653553256, + "p_value": 0.6934673366834171, + "conf_int": [ + -0.08442443800042101, + 0.05898636166864399 + ] + }, + "-3": { + "effect": -0.02266994246670453, + "se": 0.02350575542299432, + "t_stat": -0.964442199740062, + "p_value": 0.45226130653266333, + "conf_int": [ + -0.06533633030823635, + 0.02339000518359672 + ] + }, + "-2": { + "effect": 0.01032765334940113, + "se": 0.024477783630683345, + "t_stat": 0.42191946400143965, + "p_value": 0.7537688442211056, + "conf_int": [ + -0.047963103188348045, + 0.052336891767644146 + ] + }, + "-1": { + "effect": -0.006082300716506086, + "se": 0.020679676972488178, + "t_stat": -0.29411971592195835, + "p_value": 0.7236180904522613, + "conf_int": [ + -0.04341187930843545, + 0.028831834454777737 + ] + }, + "0": { + "effect": 1.9522506213622821, + "se": 0.01814847355830751, + "t_stat": 107.57106459064346, + "p_value": 0.005, + "conf_int": [ + 1.9187760412044523, + 1.9849973203989215 + ] + }, + "1": { + "effect": 1.9589985961409027, + "se": 0.019486525770677453, + "t_stat": 100.5309319472805, + "p_value": 0.005, + "conf_int": [ + 1.9210512194452734, + 1.9974718919237648 + ] + }, + "2": { + "effect": 1.9518097042928502, + "se": 0.0207633387266417, + "t_stat": 94.00269051086946, + "p_value": 0.005, + "conf_int": [ + 1.913417536737055, + 1.9909164553349423 + ] + }, + "3": { + "effect": 1.985960464225669, + "se": 0.022407086017329974, + "t_stat": 88.63091178789145, + "p_value": 0.005, + "conf_int": [ + 1.9465227111176464, + 2.0245238108692765 + ] + }, + "4": { + "effect": 1.940049071008396, + "se": 0.028935660828047872, + "t_stat": 67.0469937609951, + "p_value": 0.005, + "conf_int": [ + 1.8810149133513567, + 1.9918254365459642 + ] + }, + "5": { + "effect": 1.9672388191020964, + "se": 0.02887423122372237, + "t_stat": 68.13129685980559, + "p_value": 0.005, + "conf_int": [ + 1.9131963086514905, + 2.019982112410836 + ] + }, + "6": { + "effect": 1.9476575976206445, + "se": 0.04005279057361501, + "t_stat": 48.62726341229453, + "p_value": 0.005, + "conf_int": [ + 1.866739167929477, + 2.025160485606026 + ] + }, + "7": { + "effect": 1.9527663424053017, + "se": 0.04162937286162344, + "t_stat": 46.90837762308651, + "p_value": 0.005, + "conf_int": [ + 1.8753942052367765, + 2.03840993535188 + ] + } + }, + "group_effects": { + "3": { + "effect": 1.9379732080320693, + "se": 0.027348748599256394, + "t_stat": 70.86149485043576, + "p_value": 0.005, + "conf_int": [ + 1.8890625503369387, + 1.9846384878549268 + ] + }, + "5": { + "effect": 1.9776180319767032, + "se": 0.02946287209657048, + "t_stat": 67.12237780127691, + "p_value": 0.005, + "conf_int": [ + 1.9220032490097771, + 2.0295504746249913 + ] + }, + "7": { + "effect": 1.9724482314870557, + "se": 0.03348899468270557, + "t_stat": 58.89840080824135, + "p_value": 0.005, + "conf_int": [ + 1.9113054655520512, + 2.0358791270186636 + ] + } + } + }, + "reg_2cov_nyt": { + "overall_att": 1.9589962129873684, + "overall_se": 0.017217345449195905, + "overall_p_value": 0.005, + "overall_ci": [ + 1.927777909830344, + 1.9900721069556977 + ], + "group_time_effects": { + "3,2": { + "effect": -0.024966998685347557, + "se": 0.034374930536244894, + "t_stat": -0.726314156737637, + "p_value": 0.4221105527638191, + "conf_int": [ + -0.09301416597661631, + 0.03719900116326701 + ] + }, + "3,3": { + "effect": 1.9235116026210928, + "se": 0.033363987953878314, + "t_stat": 57.652328770772705, + "p_value": 0.005, + "conf_int": [ + 1.8595032919944294, + 1.983858574870647 + ] + }, + "3,4": { + "effect": 1.9655678070201976, + "se": 0.033151877981396786, + "t_stat": 59.289787689348344, + "p_value": 0.005, + "conf_int": [ + 1.9069132494126741, + 2.0357629896649065 + ] + }, + "3,5": { + "effect": 1.9481128201613773, + "se": 0.03297010417844233, + "t_stat": 59.087250971902016, + "p_value": 0.005, + "conf_int": [ + 1.8856578181258636, + 2.003488317118942 + ] + }, + "3,6": { + "effect": 1.9642413325003203, + "se": 0.035527120561122096, + "t_stat": 55.28850358477465, + "p_value": 0.005, + "conf_int": [ + 1.9032205100954631, + 2.0350420702889043 + ] + }, + "3,7": { + "effect": 1.9111981714382718, + "se": 0.042146666625498944, + "t_stat": 45.346366022739915, + "p_value": 0.005, + "conf_int": [ + 1.8301925434423028, + 1.9867824935722587 + ] + }, + "3,8": { + "effect": 1.890746912643064, + "se": 0.04112894043025772, + "t_stat": 45.97120404424715, + "p_value": 0.005, + "conf_int": [ + 1.8093419538451232, + 1.9737883443125592 + ] + }, + "3,9": { + "effect": 1.947651168397633, + "se": 0.04007646253864604, + "t_stat": 48.59838032160394, + "p_value": 0.005, + "conf_int": [ + 1.8669352674463777, + 2.0239577902885184 + ] + }, + "3,10": { + "effect": 1.9527553566442448, + "se": 0.04163009290833081, + "t_stat": 46.90730239166651, + "p_value": 0.005, + "conf_int": [ + 1.876068943818699, + 2.0372132442384356 + ] + }, + "5,2": { + "effect": -0.006037938339828159, + "se": 0.035325428417678245, + "t_stat": -0.17092328699986933, + "p_value": 1.0, + "conf_int": [ + -0.07268852058380058, + 0.07081992927069146 + ] + }, + "5,3": { + "effect": -0.010013107610279525, + "se": 0.033788108576754745, + "t_stat": -0.2963500483471353, + "p_value": 0.6733668341708543, + "conf_int": [ + -0.08444097019194169, + 0.05048982223198024 + ] + }, + "5,4": { + "effect": 0.030262441958152038, + "se": 0.03513594985684198, + "t_stat": 0.8612956838068537, + "p_value": 0.3316582914572864, + "conf_int": [ + -0.03784278560932994, + 0.09918304612750779 + ] + }, + "5,5": { + "effect": 1.969857008810436, + "se": 0.03298410758110572, + "t_stat": 59.72139776608141, + "p_value": 0.005, + "conf_int": [ + 1.9017988394826137, + 2.0257980073930653 + ] + }, + "5,6": { + "effect": 1.9687813068952833, + "se": 0.03504129532108484, + "t_stat": 56.184604160755434, + "p_value": 0.005, + "conf_int": [ + 1.8946594946300568, + 2.0354466424224444 + ] + }, + "5,7": { + "effect": 1.9591061436341544, + "se": 0.04237635351998152, + "t_stat": 46.23111667006427, + "p_value": 0.005, + "conf_int": [ + 1.8683816514911504, + 2.036844133502373 + ] + }, + "5,8": { + "effect": 1.9583487775657027, + "se": 0.038230837179403815, + "t_stat": 51.2243236624891, + "p_value": 0.005, + "conf_int": [ + 1.8798819853574242, + 2.0275098188160823 + ] + }, + "5,9": { + "effect": 1.9680734744163075, + "se": 0.04042280551313381, + "t_stat": 48.68720637850975, + "p_value": 0.005, + "conf_int": [ + 1.8889511938454833, + 2.0377588390705172 + ] + }, + "5,10": { + "effect": 2.041542859628759, + "se": 0.04090256204207493, + "t_stat": 49.91234675052141, + "p_value": 0.005, + "conf_int": [ + 1.962724149016656, + 2.121791821823951 + ] + }, + "7,2": { + "effect": 0.10902875706279118, + "se": 0.03503801424080621, + "t_stat": 3.1117276314082143, + "p_value": 0.005, + "conf_int": [ + 0.032950013880304115, + 0.17502568508840102 + ] + }, + "7,3": { + "effect": -0.010588084294212995, + "se": 0.035581083797696596, + "t_stat": -0.2975762164641661, + "p_value": 0.6834170854271356, + "conf_int": [ + -0.0844355009791085, + 0.058992609120250526 + ] + }, + "7,4": { + "effect": -0.03984265621929237, + "se": 0.032354707434724975, + "t_stat": -1.2314330549789145, + "p_value": 0.24120603015075376, + "conf_int": [ + -0.09883304925477608, + 0.03698521501439384 + ] + }, + "7,5": { + "effect": 0.03151613208282335, + "se": 0.03568695911218706, + "t_stat": 0.8831274187231217, + "p_value": 0.35175879396984927, + "conf_int": [ + -0.03970009970795312, + 0.09400214480866743 + ] + }, + "7,6": { + "effect": -0.024811280194351713, + "se": 0.03848463898512638, + "t_stat": -0.644706065813449, + "p_value": 0.4020100502512563, + "conf_int": [ + -0.10377323243881807, + 0.0475238862955143 + ] + }, + "7,7": { + "effect": 1.9629363558831094, + "se": 0.04097548866714681, + "t_stat": 47.9051359662477, + "p_value": 0.005, + "conf_int": [ + 1.88728835346253, + 2.040027750833742 + ] + }, + "7,8": { + "effect": 1.9421673057644255, + "se": 0.03993612493409277, + "t_stat": 48.631841696449406, + "p_value": 0.005, + "conf_int": [ + 1.868297081138492, + 2.0208129670706434 + ] + }, + "7,9": { + "effect": 1.947942344886539, + "se": 0.03997444803606312, + "t_stat": 48.729687102350844, + "p_value": 0.005, + "conf_int": [ + 1.8806508748711863, + 2.0213528987522054 + ] + }, + "7,10": { + "effect": 2.0366982942448257, + "se": 0.04380240774919928, + "t_stat": 46.497405026372256, + "p_value": 0.005, + "conf_int": [ + 1.9494247189849796, + 2.110535209487657 + ] + } + }, + "event_study": { + "-5": { + "effect": 0.10902875706279118, + "se": 0.0350380142408062, + "t_stat": 3.1117276314082147, + "p_value": 0.005, + "conf_int": [ + 0.032950013880304115, + 0.17502568508840108 + ] + }, + "-4": { + "effect": -0.010588084294212995, + "se": 0.035581083797696596, + "t_stat": -0.2975762164641661, + "p_value": 0.6834170854271356, + "conf_int": [ + -0.08443550097910853, + 0.058992609120250505 + ] + }, + "-3": { + "effect": -0.022597842263930787, + "se": 0.023511897879760425, + "t_stat": -0.9611236991371727, + "p_value": 0.4321608040201005, + "conf_int": [ + -0.06534982518032498, + 0.023599724927229025 + ] + }, + "-2": { + "effect": 0.010330804793041881, + "se": 0.024490162937030775, + "t_stat": 0.421834873847287, + "p_value": 0.7537688442211056, + "conf_int": [ + -0.047700187700442, + 0.05272860576285881 + ] + }, + "-1": { + "effect": -0.00607649135987052, + "se": 0.020613962061688775, + "t_stat": -0.2947755187327007, + "p_value": 0.7336683417085427, + "conf_int": [ + -0.04291763038882027, + 0.0289382804948964 + ] + }, + "0": { + "effect": 1.9522348240034328, + "se": 0.01817153581990779, + "t_stat": 107.43367227467179, + "p_value": 0.005, + "conf_int": [ + 1.9186581568373569, + 1.9858195114126422 + ] + }, + "1": { + "effect": 1.9589987803422293, + "se": 0.01948758166607304, + "t_stat": 100.52549433328373, + "p_value": 0.005, + "conf_int": [ + 1.9212719399953113, + 1.99776325318124 + ] + }, + "2": { + "effect": 1.9518069477227613, + "se": 0.020757459771082076, + "t_stat": 94.0291812797773, + "p_value": 0.005, + "conf_int": [ + 1.9132017157946304, + 1.9908956463347152 + ] + }, + "3": { + "effect": 1.9859656964683081, + "se": 0.022442874449064055, + "t_stat": 88.48981002748201, + "p_value": 0.005, + "conf_int": [ + 1.9461685980074304, + 2.024526446388967 + ] + }, + "4": { + "effect": 1.9400499359451828, + "se": 0.02893850714349226, + "t_stat": 67.04042908382938, + "p_value": 0.005, + "conf_int": [ + 1.8809443225358153, + 1.991730287211487 + ] + }, + "5": { + "effect": 1.967242841864014, + "se": 0.028899306302645814, + "t_stat": 68.0723205346942, + "p_value": 0.005, + "conf_int": [ + 1.9127788508721326, + 2.019934522626693 + ] + }, + "6": { + "effect": 1.947651168397633, + "se": 0.040076462538646024, + "t_stat": 48.598380321603955, + "p_value": 0.005, + "conf_int": [ + 1.8669352674463777, + 2.0239577902885184 + ] + }, + "7": { + "effect": 1.9527553566442448, + "se": 0.0416300929083308, + "t_stat": 46.90730239166652, + "p_value": 0.005, + "conf_int": [ + 1.876068943818699, + 2.037213244238435 + ] + } + }, + "group_effects": { + "3": { + "effect": 1.9379731464282752, + "se": 0.027354393932262182, + "t_stat": 70.84686837614781, + "p_value": 0.005, + "conf_int": [ + 1.8891430541850542, + 1.9844547989387653 + ] + }, + "5": { + "effect": 1.977618261825107, + "se": 0.029456139694155045, + "t_stat": 67.13772688338805, + "p_value": 0.005, + "conf_int": [ + 1.9219215453418508, + 2.02949447907519 + ] + }, + "7": { + "effect": 1.9724360751947247, + "se": 0.03346783119193577, + "t_stat": 58.93528217836812, + "p_value": 0.005, + "conf_int": [ + 1.912440579136104, + 2.036883663702173 + ] + } + } + } +} \ No newline at end of file diff --git a/benchmarks/speed_review/bench_callaway.py b/benchmarks/speed_review/bench_callaway.py new file mode 100644 index 00000000..033daacb --- /dev/null +++ b/benchmarks/speed_review/bench_callaway.py @@ -0,0 +1,135 @@ +""" +Benchmark CallawaySantAnna.fit() at multiple scales with per-phase granularity. + +Usage: + python benchmarks/speed_review/bench_callaway.py +""" + +import time +import sys +import numpy as np +import pandas as pd + +sys.path.insert(0, ".") +from diff_diff import CallawaySantAnna + + +def generate_staggered_data(n_units, n_periods=10, n_cohorts=5, seed=42): + """Generate panel data with staggered treatment adoption.""" + rng = np.random.default_rng(seed) + + # Assign cohorts: ~20% never-treated, rest split among cohorts + treatment_periods = np.linspace(3, n_periods - 2, n_cohorts, dtype=int) + cohort_assignment = rng.choice( + [0] + list(treatment_periods), + size=n_units, + p=[0.2] + [0.8 / n_cohorts] * n_cohorts, + ) + + rows = [] + for i in range(n_units): + g = cohort_assignment[i] + for t in range(1, n_periods + 1): + treated = 1 if (g > 0 and t >= g) else 0 + y = rng.normal(0, 1) + 2.0 * treated + rows.append((i, t, y, g)) + + df = pd.DataFrame(rows, columns=["unit", "time", "outcome", "first_treat"]) + return df + + +def bench_fit(n_units, n_bootstrap=0, covariates=None, n_cohorts=5, n_runs=3, + estimation_method="reg"): + """Benchmark fit() and return median time.""" + df = generate_staggered_data(n_units, n_cohorts=n_cohorts) + + if covariates: + rng = np.random.default_rng(99) + for cov in covariates: + df[cov] = rng.normal(size=len(df)) + + cs = CallawaySantAnna( + n_bootstrap=n_bootstrap, + seed=123, + estimation_method=estimation_method, + ) + + times = [] + for _ in range(n_runs): + start = time.perf_counter() + cs.fit( + df, + outcome="outcome", + unit="unit", + time="time", + first_treat="first_treat", + covariates=covariates, + aggregate="all", + ) + elapsed = time.perf_counter() - start + times.append(elapsed) + + return np.median(times) + + +def main(): + scales = [1_000, 5_000, 10_000, 50_000] + print("=" * 72) + print("CallawaySantAnna Benchmark Suite") + print("=" * 72) + + # No-covariates, no bootstrap + print("\n--- No covariates, no bootstrap ---") + print(f"{'Units':>10} {'Time (s)':>10}") + for n in scales: + t = bench_fit(n, n_bootstrap=0, n_runs=3) + print(f"{n:>10} {t:>10.4f}") + + # No-covariates, with bootstrap + print("\n--- No covariates, bootstrap=999 ---") + print(f"{'Units':>10} {'Time (s)':>10}") + for n in scales[:3]: # skip 50K with bootstrap (too slow) + t = bench_fit(n, n_bootstrap=999, n_runs=1) + print(f"{n:>10} {t:>10.4f}") + + # With covariates, no bootstrap (reg) + print("\n--- 2 covariates, reg, no bootstrap ---") + print(f"{'Units':>10} {'Time (s)':>10}") + for n in scales[:3]: + t = bench_fit(n, n_bootstrap=0, covariates=["x1", "x2"], n_runs=3) + print(f"{n:>10} {t:>10.4f}") + + # With 10 covariates, no bootstrap (reg) + cov10 = [f"x{i}" for i in range(1, 11)] + print("\n--- 10 covariates, reg, no bootstrap ---") + print(f"{'Units':>10} {'Time (s)':>10}") + for n in scales[:3]: + t = bench_fit(n, n_bootstrap=0, covariates=cov10, n_runs=3) + print(f"{n:>10} {t:>10.4f}") + + # With 2 covariates, DR, no bootstrap + print("\n--- 2 covariates, dr, no bootstrap ---") + print(f"{'Units':>10} {'Time (s)':>10}") + for n in scales[:3]: + t = bench_fit(n, n_bootstrap=0, covariates=["x1", "x2"], n_runs=3, + estimation_method="dr") + print(f"{n:>10} {t:>10.4f}") + + # With 2 covariates, IPW, no bootstrap + print("\n--- 2 covariates, ipw, no bootstrap ---") + print(f"{'Units':>10} {'Time (s)':>10}") + for n in scales[:3]: + t = bench_fit(n, n_bootstrap=0, covariates=["x1", "x2"], n_runs=3, + estimation_method="ipw") + print(f"{n:>10} {t:>10.4f}") + + # With 10 covariates, 50K units (reg) + print("\n--- 10 covariates, reg, 50K units ---") + t = bench_fit(50_000, n_bootstrap=0, covariates=cov10, n_runs=1) + print(f"{'50000':>10} {t:>10.4f}") + + print("\nDone.") + + +if __name__ == "__main__": + main() diff --git a/benchmarks/speed_review/validate_results.py b/benchmarks/speed_review/validate_results.py new file mode 100644 index 00000000..9578a907 --- /dev/null +++ b/benchmarks/speed_review/validate_results.py @@ -0,0 +1,283 @@ +""" +Validate that optimization changes produce identical results. + +Usage: + # Save baseline (run BEFORE code changes): + python benchmarks/speed_review/validate_results.py --save + + # Validate (run AFTER code changes): + python benchmarks/speed_review/validate_results.py --check +""" + +import argparse +import json +import sys +import numpy as np +import pandas as pd + +sys.path.insert(0, ".") +from diff_diff import CallawaySantAnna + + +def generate_data(n_units=10_000, seed=42, n_covariates=0): + """Generate deterministic test data.""" + rng = np.random.default_rng(seed) + n_periods = 10 + treatment_periods = [3, 5, 7] + + cohort_assignment = rng.choice( + [0] + treatment_periods, + size=n_units, + p=[0.25, 0.25, 0.25, 0.25], + ) + + rows = [] + for i in range(n_units): + g = cohort_assignment[i] + for t in range(1, n_periods + 1): + treated = 1 if (g > 0 and t >= g) else 0 + y = rng.normal(0, 1) + 2.0 * treated + rows.append((i, t, y, g)) + + df = pd.DataFrame(rows, columns=["unit", "time", "outcome", "first_treat"]) + + if n_covariates > 0: + cov_rng = np.random.default_rng(seed + 1) + for i in range(1, n_covariates + 1): + df[f"x{i}"] = cov_rng.normal(size=len(df)) + + return df + + +def run_estimator(df, estimation_method="reg", covariates=None, control_group="never_treated"): + """Run estimator and extract key results.""" + cs = CallawaySantAnna( + n_bootstrap=199, + seed=42, + estimation_method=estimation_method, + control_group=control_group, + ) + results = cs.fit( + df, + outcome="outcome", + unit="unit", + time="time", + first_treat="first_treat", + covariates=covariates, + aggregate="all", + ) + + out = { + "overall_att": float(results.overall_att), + "overall_se": float(results.overall_se), + "overall_p_value": float(results.overall_p_value), + "overall_ci": [float(results.overall_conf_int[0]), float(results.overall_conf_int[1])], + } + + # Group-time effects (sorted for determinism) + gt_effects = {} + for (g, t), data in sorted(results.group_time_effects.items()): + key = f"{g},{t}" + gt_effects[key] = { + "effect": float(data["effect"]), + "se": float(data["se"]), + "t_stat": float(data["t_stat"]), + "p_value": float(data["p_value"]), + "conf_int": [float(data["conf_int"][0]), float(data["conf_int"][1])], + } + out["group_time_effects"] = gt_effects + + # Event study + if results.event_study_effects: + es = {} + for e, data in sorted(results.event_study_effects.items()): + es[str(e)] = { + "effect": float(data["effect"]), + "se": float(data["se"]), + "t_stat": float(data["t_stat"]), + "p_value": float(data["p_value"]), + "conf_int": [float(data["conf_int"][0]), float(data["conf_int"][1])], + } + out["event_study"] = es + + # Group effects + if results.group_effects: + ge = {} + for g_key, data in sorted(results.group_effects.items()): + ge[str(g_key)] = { + "effect": float(data["effect"]), + "se": float(data["se"]), + "t_stat": float(data["t_stat"]), + "p_value": float(data["p_value"]), + "conf_int": [float(data["conf_int"][0]), float(data["conf_int"][1])], + } + out["group_effects"] = ge + + return out + + +SCENARIOS = [ + {"name": "reg_nocov", "method": "reg", "n_cov": 0}, + {"name": "reg_2cov", "method": "reg", "n_cov": 2}, + {"name": "reg_10cov", "method": "reg", "n_cov": 10}, + {"name": "dr_2cov", "method": "dr", "n_cov": 2}, + {"name": "ipw_2cov", "method": "ipw", "n_cov": 2}, + {"name": "ipw_2cov_nyt", "method": "ipw", "n_cov": 2, "control_group": "not_yet_treated"}, + {"name": "dr_2cov_nyt", "method": "dr", "n_cov": 2, "control_group": "not_yet_treated"}, + {"name": "reg_2cov_nyt", "method": "reg", "n_cov": 2, "control_group": "not_yet_treated"}, +] + + +def save_baseline(path="benchmarks/speed_review/baseline_results.json"): + """Save baseline results for all scenarios.""" + all_results = {} + for scenario in SCENARIOS: + name = scenario["name"] + print(f"Running scenario: {name} ...") + df = generate_data(n_covariates=scenario["n_cov"]) + covariates = [f"x{i}" for i in range(1, scenario["n_cov"] + 1)] if scenario["n_cov"] > 0 else None + control_group = scenario.get("control_group", "never_treated") + results = run_estimator(df, estimation_method=scenario["method"], + covariates=covariates, control_group=control_group) + all_results[name] = results + print(f" Overall ATT: {results['overall_att']:.10f}") + print(f" N group-time effects: {len(results['group_time_effects'])}") + + with open(path, "w") as f: + json.dump(all_results, f, indent=2) + print(f"\nBaseline saved to {path}") + + +def check_results(path="benchmarks/speed_review/baseline_results.json", tol=1e-12): + """Check current results against baseline for all scenarios.""" + with open(path) as f: + all_baseline = json.load(f) + + all_failures = [] + + for scenario in SCENARIOS: + name = scenario["name"] + if name not in all_baseline: + print(f" Skipping {name} (no baseline)") + continue + + baseline = all_baseline[name] + df = generate_data(n_covariates=scenario["n_cov"]) + covariates = [f"x{i}" for i in range(1, scenario["n_cov"] + 1)] if scenario["n_cov"] > 0 else None + control_group = scenario.get("control_group", "never_treated") + + # Use relaxed tolerance for covariate scenarios (Cholesky vs lstsq) + scenario_tol = 1e-10 if scenario["n_cov"] > 0 else tol + + current = run_estimator(df, estimation_method=scenario["method"], + covariates=covariates, control_group=control_group) + + failures = [] + + def compare(label, base_val, cur_val, t): + if np.isnan(base_val) and np.isnan(cur_val): + return + if np.isnan(base_val) or np.isnan(cur_val): + failures.append(f" {label}: NaN mismatch baseline={base_val}, current={cur_val}") + return + diff = abs(base_val - cur_val) + if diff > t: + failures.append(f" {label}: baseline={base_val:.15e}, current={cur_val:.15e}, diff={diff:.2e}") + + compare(f"{name}/overall_att", baseline["overall_att"], current["overall_att"], scenario_tol) + compare(f"{name}/overall_se", baseline["overall_se"], current["overall_se"], scenario_tol) + compare(f"{name}/overall_p_value", baseline["overall_p_value"], current["overall_p_value"], 0.02) + + # Compare overall CI values + if "overall_ci" in baseline and "overall_ci" in current: + for i, label in enumerate(["lower", "upper"]): + compare(f"{name}/overall_ci.{label}", + baseline["overall_ci"][i], current["overall_ci"][i], scenario_tol) + + # Group-time SE tolerance: tight for covariate scenarios, relaxed for bootstrap + gt_se_tol = 1e-8 if scenario["n_cov"] > 0 else 0.01 + + for key in baseline["group_time_effects"]: + b = baseline["group_time_effects"][key] + c = current["group_time_effects"].get(key, {}) + if not c: + failures.append(f" {name}/Missing group-time effect: {key}") + continue + compare(f"{name}/gt[{key}].effect", b["effect"], c["effect"], scenario_tol) + compare(f"{name}/gt[{key}].se", b["se"], c["se"], gt_se_tol) + if "t_stat" in b and "t_stat" in c: + compare(f"{name}/gt[{key}].t_stat", b["t_stat"], c["t_stat"], gt_se_tol) + if "p_value" in b and "p_value" in c: + compare(f"{name}/gt[{key}].p_value", b["p_value"], c["p_value"], 0.02) + if "conf_int" in b and "conf_int" in c: + for i, label in enumerate(["lower", "upper"]): + compare(f"{name}/gt[{key}].ci.{label}", b["conf_int"][i], c["conf_int"][i], gt_se_tol) + + # Compare event study effects/SEs if present + if "event_study" in baseline and "event_study" in current: + for e_key in baseline["event_study"]: + b_es = baseline["event_study"][e_key] + c_es = current["event_study"].get(e_key, {}) + if not c_es: + failures.append(f" {name}/Missing event study effect: e={e_key}") + continue + compare(f"{name}/es[{e_key}].effect", b_es["effect"], c_es["effect"], scenario_tol) + compare(f"{name}/es[{e_key}].se", b_es["se"], c_es["se"], gt_se_tol) + if "t_stat" in b_es and "t_stat" in c_es: + compare(f"{name}/es[{e_key}].t_stat", b_es["t_stat"], c_es["t_stat"], gt_se_tol) + if "p_value" in b_es and "p_value" in c_es: + compare(f"{name}/es[{e_key}].p_value", b_es["p_value"], c_es["p_value"], 0.02) + if "conf_int" in b_es and "conf_int" in c_es: + for i, label in enumerate(["lower", "upper"]): + compare(f"{name}/es[{e_key}].ci.{label}", b_es["conf_int"][i], c_es["conf_int"][i], gt_se_tol) + + # Compare group effects if present + if "group_effects" in baseline and "group_effects" in current: + for g_key in baseline["group_effects"]: + b_ge = baseline["group_effects"][g_key] + c_ge = current["group_effects"].get(g_key, {}) + if not c_ge: + failures.append(f" {name}/Missing group effect: g={g_key}") + continue + compare(f"{name}/ge[{g_key}].effect", b_ge["effect"], c_ge["effect"], scenario_tol) + compare(f"{name}/ge[{g_key}].se", b_ge["se"], c_ge["se"], gt_se_tol) + if "t_stat" in b_ge and "t_stat" in c_ge: + compare(f"{name}/ge[{g_key}].t_stat", b_ge["t_stat"], c_ge["t_stat"], gt_se_tol) + if "p_value" in b_ge and "p_value" in c_ge: + compare(f"{name}/ge[{g_key}].p_value", b_ge["p_value"], c_ge["p_value"], 0.02) + if "conf_int" in b_ge and "conf_int" in c_ge: + for i, label in enumerate(["lower", "upper"]): + compare(f"{name}/ge[{g_key}].ci.{label}", b_ge["conf_int"][i], c_ge["conf_int"][i], gt_se_tol) + + if failures: + all_failures.extend(failures) + print(f" {name}: FAILED ({len(failures)} mismatches)") + else: + print(f" {name}: PASSED ({len(current['group_time_effects'])} effects checked)") + + if all_failures: + print("\nVALIDATION FAILED:") + for f in all_failures: + print(f) + sys.exit(1) + else: + print("\nALL SCENARIOS PASSED") + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--save", action="store_true", help="Save baseline results") + parser.add_argument("--check", action="store_true", help="Check against baseline") + parser.add_argument("--tol", type=float, default=1e-12, help="Tolerance for comparison") + args = parser.parse_args() + + if args.save: + save_baseline() + elif args.check: + check_results(tol=args.tol) + else: + parser.print_help() + + +if __name__ == "__main__": + main() diff --git a/diff_diff/bootstrap_utils.py b/diff_diff/bootstrap_utils.py index 5b8ee824..71156921 100644 --- a/diff_diff/bootstrap_utils.py +++ b/diff_diff/bootstrap_utils.py @@ -19,6 +19,7 @@ "compute_percentile_ci", "compute_bootstrap_pvalue", "compute_effect_bootstrap_stats", + "compute_effect_bootstrap_stats_batch", ] @@ -277,3 +278,126 @@ def compute_effect_bootstrap_stats( original_effect, valid_dist, n_valid=len(valid_dist) ) return se, ci, p_value + + +def compute_effect_bootstrap_stats_batch( + original_effects: np.ndarray, + bootstrap_matrix: np.ndarray, + alpha: float = 0.05, +) -> tuple: + """ + Batch-compute bootstrap statistics for multiple effects at once. + + Parameters + ---------- + original_effects : np.ndarray + Array of original point estimates, shape (n_effects,). + bootstrap_matrix : np.ndarray + Bootstrap distributions, shape (n_bootstrap, n_effects). + alpha : float, default=0.05 + Significance level. + + Returns + ------- + ses : np.ndarray + Bootstrap SEs for each effect. + ci_lowers : np.ndarray + Lower CI bounds for each effect. + ci_uppers : np.ndarray + Upper CI bounds for each effect. + p_values : np.ndarray + Bootstrap p-values for each effect. + """ + n_bootstrap, n_effects = bootstrap_matrix.shape + ses = np.full(n_effects, np.nan) + ci_lowers = np.full(n_effects, np.nan) + ci_uppers = np.full(n_effects, np.nan) + p_values = np.full(n_effects, np.nan) + + # Check for non-finite original effects + valid_effects = np.isfinite(original_effects) + if not np.any(valid_effects): + return ses, ci_lowers, ci_uppers, p_values + + # Count valid bootstrap samples per effect + finite_mask = np.isfinite(bootstrap_matrix) # (n_bootstrap, n_effects) + n_valid = finite_mask.sum(axis=0) # (n_effects,) + + # Determine which effects have enough valid samples + enough_valid = (n_valid >= n_bootstrap * 0.5) & valid_effects + + if not np.any(enough_valid): + n_insufficient = int(np.sum(valid_effects)) + if n_insufficient > 0: + warnings.warn( + f"{n_insufficient} effect(s) had too few valid bootstrap samples (<50%). " + "Returning NaN for SE/CI/p-value.", + RuntimeWarning, + stacklevel=2, + ) + return ses, ci_lowers, ci_uppers, p_values + + # Warn about subset with insufficient samples + n_insufficient = int(np.sum(valid_effects & ~enough_valid)) + if n_insufficient > 0: + warnings.warn( + f"{n_insufficient} effect(s) had too few valid bootstrap samples (<50%). " + "Returning NaN for SE/CI/p-value.", + RuntimeWarning, + stacklevel=2, + ) + + # For effects with all-finite bootstraps (common case), use vectorized ops + all_finite = (n_valid == n_bootstrap) & enough_valid + if np.any(all_finite): + idx = np.where(all_finite)[0] + sub = bootstrap_matrix[:, idx] + + # Vectorized SE: std across bootstrap dimension + batch_ses = np.std(sub, axis=0, ddof=1) + + # Vectorized percentile CI + lower_pct = alpha / 2 * 100 + upper_pct = (1 - alpha / 2) * 100 + batch_ci = np.percentile(sub, [lower_pct, upper_pct], axis=0) + + # Vectorized p-values + batch_p = np.empty(len(idx)) + for j, eff_idx in enumerate(idx): + eff = original_effects[eff_idx] + if eff >= 0: + batch_p[j] = np.mean(sub[:, j] <= 0) + else: + batch_p[j] = np.mean(sub[:, j] >= 0) + batch_p = np.minimum(2 * batch_p, 1.0) + batch_p = np.maximum(batch_p, 1 / (n_bootstrap + 1)) + + # Guard: SE must be positive and finite + se_valid = np.isfinite(batch_ses) & (batch_ses > 0) + n_bad_se = int(np.sum(~se_valid)) + if n_bad_se > 0: + warnings.warn( + f"{n_bad_se} effect(s) had non-finite or zero bootstrap SE. " + "Returning NaN for SE/CI/p-value.", + RuntimeWarning, + stacklevel=2, + ) + ses[idx[se_valid]] = batch_ses[se_valid] + ci_lowers[idx[se_valid]] = batch_ci[0][se_valid] + ci_uppers[idx[se_valid]] = batch_ci[1][se_valid] + p_values[idx[se_valid]] = batch_p[se_valid] + + # Handle effects with some non-finite bootstraps (rare) via scalar fallback + partial_valid = enough_valid & ~all_finite + if np.any(partial_valid): + for j in np.where(partial_valid)[0]: + se, ci, pv = compute_effect_bootstrap_stats( + original_effects[j], bootstrap_matrix[:, j], alpha=alpha, + context=f"effect {j}" + ) + ses[j] = se + ci_lowers[j] = ci[0] + ci_uppers[j] = ci[1] + p_values[j] = pv + + return ses, ci_lowers, ci_uppers, p_values diff --git a/diff_diff/efficient_did.py b/diff_diff/efficient_did.py index 63fb61ab..5ae23f63 100644 --- a/diff_diff/efficient_did.py +++ b/diff_diff/efficient_did.py @@ -217,6 +217,15 @@ def fit( "panel where every unit is observed in every time period." ) + # Reject non-finite outcomes (NaN/Inf corrupt Omega*/EIF calculations) + non_finite_mask = ~np.isfinite(df[outcome]) + if non_finite_mask.any(): + n_bad = int(non_finite_mask.sum()) + raise ValueError( + f"Found {n_bad} non-finite value(s) in outcome column '{outcome}'. " + "EfficientDiD requires finite outcomes for all unit-period observations." + ) + # Reject duplicate (unit, time) rows dup_mask = df.duplicated(subset=[unit, time], keep=False) if dup_mask.any(): diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py index c56876ae..65d4865b 100644 --- a/diff_diff/staggered.py +++ b/diff_diff/staggered.py @@ -10,10 +10,11 @@ import numpy as np import pandas as pd +from scipy import linalg as scipy_linalg from scipy import optimize -from diff_diff.linalg import solve_ols -from diff_diff.utils import safe_inference +from diff_diff.linalg import solve_ols, _detect_rank_deficiency, _format_dropped_columns +from diff_diff.utils import safe_inference, safe_inference_batch # Import from split modules from diff_diff.staggered_results import ( @@ -433,6 +434,8 @@ def _precompute_structures( period_cov = period_data.reindex(all_units)[covariates] covariate_by_period[t] = period_cov.values # Shape: (n_units, n_covariates) + is_balanced = not np.any(np.isnan(outcome_matrix)) + return { 'all_units': all_units, 'unit_to_idx': unit_to_idx, @@ -443,6 +446,7 @@ def _precompute_structures( 'never_treated_mask': never_treated_mask, 'covariate_by_period': covariate_by_period, 'time_periods': time_periods, + 'is_balanced': is_balanced, } def _compute_att_gt_fast( @@ -451,6 +455,8 @@ def _compute_att_gt_fast( g: Any, t: Any, covariates: Optional[List[str]], + pscore_cache: Optional[Dict] = None, + cho_cache: Optional[Dict] = None, ) -> Tuple[Optional[float], float, int, int, Optional[Dict[str, Any]]]: """ Compute ATT(g,t) using pre-computed data structures (fast version). @@ -458,13 +464,11 @@ def _compute_att_gt_fast( Uses vectorized numpy operations on pre-pivoted outcome matrix instead of repeated pandas filtering. """ - time_periods = precomputed['time_periods'] period_to_col = precomputed['period_to_col'] outcome_matrix = precomputed['outcome_matrix'] cohort_masks = precomputed['cohort_masks'] never_treated_mask = precomputed['never_treated_mask'] unit_cohorts = precomputed['unit_cohorts'] - all_units = precomputed['all_units'] covariate_by_period = precomputed['covariate_by_period'] # Base period selection based on mode @@ -527,10 +531,6 @@ def _compute_att_gt_fast( treated_change = outcome_change[treated_valid] control_change = outcome_change[control_valid] - # Get unit IDs for influence function - treated_units = all_units[treated_valid] - control_units = all_units[control_valid] - # Get covariates if specified (from the base period) X_treated = None X_control = None @@ -550,6 +550,24 @@ def _compute_att_gt_fast( X_treated = None X_control = None + # Compute cache key for propensity score reuse + pscore_key = None + if pscore_cache is not None and X_treated is not None: + is_balanced = precomputed.get('is_balanced', False) + if is_balanced and self.control_group == "never_treated": + pscore_key = (g, base_period_val) + else: + pscore_key = (g, base_period_val, t) + + # Compute cache key for Cholesky reuse (DR outcome regression) + cho_key = None + if cho_cache is not None and X_control is not None: + is_balanced = precomputed.get('is_balanced', False) + if is_balanced and self.control_group == "never_treated": + cho_key = base_period_val + else: + cho_key = (g, base_period_val, t) + # Estimation method if self.estimation_method == "reg": att_gt, se_gt, inf_func = self._outcome_regression( @@ -559,24 +577,505 @@ def _compute_att_gt_fast( att_gt, se_gt, inf_func = self._ipw_estimation( treated_change, control_change, int(n_treated), int(n_control), - X_treated, X_control + X_treated, X_control, + pscore_cache=pscore_cache, + pscore_key=pscore_key, ) else: # doubly robust att_gt, se_gt, inf_func = self._doubly_robust( - treated_change, control_change, X_treated, X_control + treated_change, control_change, X_treated, X_control, + pscore_cache=pscore_cache, + pscore_key=pscore_key, + cho_cache=cho_cache, + cho_key=cho_key, ) - # Package influence function info with unit IDs for bootstrap + # Package influence function info with index arrays (positions into + # precomputed['all_units']) for O(1) downstream lookups instead of + # O(n) Python dict lookups. n_t = int(n_treated) + all_units = precomputed['all_units'] + treated_positions = np.where(treated_valid)[0] + control_positions = np.where(control_valid)[0] inf_func_info = { - 'treated_units': list(treated_units), - 'control_units': list(control_units), + 'treated_idx': treated_positions, + 'control_idx': control_positions, + 'treated_units': all_units[treated_positions], + 'control_units': all_units[control_positions], 'treated_inf': inf_func[:n_t], 'control_inf': inf_func[n_t:], } return att_gt, se_gt, int(n_treated), int(n_control), inf_func_info + def _compute_all_att_gt_vectorized( + self, + precomputed: PrecomputedData, + treatment_groups: List[Any], + time_periods: List[Any], + min_period: Any, + ) -> Tuple[Dict, Dict]: + """ + Vectorized computation of all ATT(g,t) for the no-covariates regression case. + + This inlines the simple difference-in-means path from _outcome_regression() + and eliminates per-(g,t) Python function call overhead. + + Returns + ------- + group_time_effects : dict + Mapping (g, t) -> effect dict. + influence_func_info : dict + Mapping (g, t) -> influence function info dict. + """ + period_to_col = precomputed['period_to_col'] + outcome_matrix = precomputed['outcome_matrix'] + cohort_masks = precomputed['cohort_masks'] + never_treated_mask = precomputed['never_treated_mask'] + unit_cohorts = precomputed['unit_cohorts'] + + group_time_effects = {} + influence_func_info = {} + + # Collect all valid (g, t, base_col, post_col) tuples + tasks = [] + for g in treatment_groups: + if self.base_period == "universal": + universal_base = g - 1 - self.anticipation + valid_periods = [t for t in time_periods if t != universal_base] + else: + valid_periods = [ + t for t in time_periods + if t >= g - self.anticipation or t > min_period + ] + + for t in valid_periods: + # Base period selection + if self.base_period == "universal": + base_period_val = g - 1 - self.anticipation + else: + if t < g - self.anticipation: + base_period_val = t - 1 + else: + base_period_val = g - 1 - self.anticipation + + if base_period_val not in period_to_col or t not in period_to_col: + continue + + tasks.append((g, t, period_to_col[base_period_val], period_to_col[t])) + + # Process all tasks + atts = [] + ses = [] + task_keys = [] + + for g, t, base_col, post_col in tasks: + treated_mask = cohort_masks[g] + + if self.control_group == "never_treated": + control_mask = never_treated_mask + else: + control_mask = never_treated_mask | ( + (unit_cohorts > t + self.anticipation) & (unit_cohorts != g) + ) + + y_base = outcome_matrix[:, base_col] + y_post = outcome_matrix[:, post_col] + outcome_change = y_post - y_base + valid_mask = ~(np.isnan(y_base) | np.isnan(y_post)) + + treated_valid = treated_mask & valid_mask + control_valid = control_mask & valid_mask + + n_treated = np.sum(treated_valid) + n_control = np.sum(control_valid) + + if n_treated == 0 or n_control == 0: + continue + + treated_change = outcome_change[treated_valid] + control_change = outcome_change[control_valid] + + n_t = int(n_treated) + n_c = int(n_control) + + # Inline no-covariates regression (difference in means) + att = float(np.mean(treated_change) - np.mean(control_change)) + + var_t = float(np.var(treated_change, ddof=1)) if n_t > 1 else 0.0 + var_c = float(np.var(control_change, ddof=1)) if n_c > 1 else 0.0 + se = float(np.sqrt(var_t / n_t + var_c / n_c)) if (n_t > 0 and n_c > 0) else 0.0 + + # Influence function + inf_treated = (treated_change - np.mean(treated_change)) / n_t + inf_control = -(control_change - np.mean(control_change)) / n_c + + group_time_effects[(g, t)] = { + 'effect': att, + 'se': se, + # t_stat, p_value, conf_int filled by batch inference below + 't_stat': np.nan, + 'p_value': np.nan, + 'conf_int': (np.nan, np.nan), + 'n_treated': n_t, + 'n_control': n_c, + } + + all_units = precomputed['all_units'] + treated_positions = np.where(treated_valid)[0] + control_positions = np.where(control_valid)[0] + influence_func_info[(g, t)] = { + 'treated_idx': treated_positions, + 'control_idx': control_positions, + 'treated_units': all_units[treated_positions], + 'control_units': all_units[control_positions], + 'treated_inf': inf_treated, + 'control_inf': inf_control, + } + + atts.append(att) + ses.append(se) + task_keys.append((g, t)) + + # Batch inference for all (g,t) pairs at once + if task_keys: + t_stats, p_values, ci_lowers, ci_uppers = safe_inference_batch( + np.array(atts), np.array(ses), alpha=self.alpha + ) + for idx, key in enumerate(task_keys): + group_time_effects[key]['t_stat'] = float(t_stats[idx]) + group_time_effects[key]['p_value'] = float(p_values[idx]) + group_time_effects[key]['conf_int'] = ( + float(ci_lowers[idx]), float(ci_uppers[idx]) + ) + + return group_time_effects, influence_func_info + + def _compute_all_att_gt_covariate_reg( + self, + precomputed: PrecomputedData, + treatment_groups: List[Any], + time_periods: List[Any], + min_period: Any, + ) -> Tuple[Dict, Dict]: + """ + Optimized computation of all ATT(g,t) for the covariate regression case. + + Groups (g,t) pairs by their control regression key to reuse Cholesky + factorizations of X^T X across pairs that share the same control design + matrix. + + Returns + ------- + group_time_effects : dict + Mapping (g, t) -> effect dict. + influence_func_info : dict + Mapping (g, t) -> influence function info dict. + """ + period_to_col = precomputed['period_to_col'] + outcome_matrix = precomputed['outcome_matrix'] + cohort_masks = precomputed['cohort_masks'] + never_treated_mask = precomputed['never_treated_mask'] + unit_cohorts = precomputed['unit_cohorts'] + covariate_by_period = precomputed['covariate_by_period'] + is_balanced = precomputed['is_balanced'] + + group_time_effects = {} + influence_func_info = {} + atts = [] + ses = [] + task_keys = [] + n_nan_cells = 0 + + # Collect all valid (g, t) tasks with their base periods + tasks_by_group = {} # control_key -> list of (g, t, base_period_val, base_col, post_col) + for g in treatment_groups: + if self.base_period == "universal": + universal_base = g - 1 - self.anticipation + valid_periods = [t for t in time_periods if t != universal_base] + else: + valid_periods = [ + t for t in time_periods + if t >= g - self.anticipation or t > min_period + ] + + for t in valid_periods: + if self.base_period == "universal": + base_period_val = g - 1 - self.anticipation + else: + if t < g - self.anticipation: + base_period_val = t - 1 + else: + base_period_val = g - 1 - self.anticipation + + if base_period_val not in period_to_col or t not in period_to_col: + continue + + # Determine control regression grouping key. + # For balanced panels with never_treated control, X_control depends + # only on base_period_val (control mask is time-invariant). + # For not_yet_treated, the control mask excludes cohort g, so include g. + if is_balanced and self.control_group == "never_treated": + control_key = base_period_val + else: + control_key = (g, base_period_val, t) + + tasks_by_group.setdefault(control_key, []).append( + (g, t, base_period_val, period_to_col[base_period_val], period_to_col[t]) + ) + + # Process each group of tasks sharing the same control regression + for control_key, tasks in tasks_by_group.items(): + # Use the first task to build X_control (same for all in the group) + first_g, first_t, base_period_val, first_base_col, first_post_col = tasks[0] + + cov_matrix = covariate_by_period[base_period_val] + + # Build control mask (same for all tasks in this group) + if self.control_group == "never_treated": + control_mask = never_treated_mask + else: + # For not_yet_treated, control_key includes t + ref_t = first_t + control_mask = never_treated_mask | ( + (unit_cohorts > ref_t + self.anticipation) & (unit_cohorts != first_g) + ) + + # For balanced panels, valid_mask is all True so control_valid = control_mask + if is_balanced: + control_valid_base = control_mask + else: + y_base_first = outcome_matrix[:, first_base_col] + y_post_first = outcome_matrix[:, first_post_col] + valid_first = ~(np.isnan(y_base_first) | np.isnan(y_post_first)) + control_valid_base = control_mask & valid_first + + X_ctrl_raw = cov_matrix[control_valid_base] + + # Check for NaN in control covariates + ctrl_has_nan = bool(np.any(np.isnan(X_ctrl_raw))) + + # Build X_ctrl with intercept + n_c_base = int(np.sum(control_valid_base)) + if n_c_base == 0: + continue + + X_ctrl = None + cho = None + kept_cols = None + if not ctrl_has_nan: + X_ctrl = np.column_stack([np.ones(n_c_base), X_ctrl_raw]) + + # One-time rank check for this control group + rank, dropped_cols, _ = _detect_rank_deficiency(X_ctrl) + + if len(dropped_cols) > 0: + # Rank-deficient: force lstsq for both "warn" and "silent". + # Cholesky on near-singular XtX could yield unstable coefficients. + if self.rank_deficient_action == "warn": + col_info = _format_dropped_columns(dropped_cols) + warnings.warn( + f"Rank-deficient covariate design (control_key={control_key}): " + f"dropped columns {col_info}. Rank {rank} < {X_ctrl.shape[1]}. " + "Using minimum-norm least-squares solution.", + UserWarning, stacklevel=2, + ) + cho = None # Force lstsq path for ALL rank-deficient cases + kept_cols = np.array([i for i in range(X_ctrl.shape[1]) + if i not in dropped_cols]) + else: + kept_cols = None # Full rank — use all columns + with np.errstate(all='ignore'): + XtX = X_ctrl.T @ X_ctrl + try: + cho = scipy_linalg.cho_factor(XtX) + except np.linalg.LinAlgError: + cho = None + + # Process each (g, t) pair in this group + for g, t, _, base_col, post_col in tasks: + treated_mask = cohort_masks[g] + + # Recompute control mask for not_yet_treated (varies by g, t) + if self.control_group == "not_yet_treated": + control_mask = never_treated_mask | ( + (unit_cohorts > t + self.anticipation) & (unit_cohorts != g) + ) + + y_base = outcome_matrix[:, base_col] + y_post = outcome_matrix[:, post_col] + outcome_change = y_post - y_base + + if is_balanced: + valid_mask_pair = np.ones(len(y_base), dtype=bool) + else: + valid_mask_pair = ~(np.isnan(y_base) | np.isnan(y_post)) + + treated_valid = treated_mask & valid_mask_pair + # For balanced + never_treated, control_valid is same as control_valid_base + if is_balanced and self.control_group == "never_treated": + control_valid = control_valid_base + else: + control_valid = control_mask & valid_mask_pair + + n_t = int(np.sum(treated_valid)) + n_c = int(np.sum(control_valid)) + + if n_t == 0 or n_c == 0: + continue + + treated_change = outcome_change[treated_valid] + control_change = outcome_change[control_valid] + + X_treated_pair = cov_matrix[treated_valid] + X_control_pair = cov_matrix[control_valid] + + # Check for NaN in this pair's covariates + if np.any(np.isnan(X_treated_pair)) or np.any(np.isnan(X_control_pair)): + # Fall back to unconditional (difference in means) + warnings.warn( + f"Missing values in covariates for group {g}, time {t}. " + "Falling back to unconditional estimation.", + UserWarning, + stacklevel=3, + ) + att = float(np.mean(treated_change) - np.mean(control_change)) + var_t = float(np.var(treated_change, ddof=1)) if n_t > 1 else 0.0 + var_c = float(np.var(control_change, ddof=1)) if n_c > 1 else 0.0 + se = float(np.sqrt(var_t / n_t + var_c / n_c)) + inf_treated = (treated_change - np.mean(treated_change)) / n_t + inf_control = -(control_change - np.mean(control_change)) / n_c + else: + # Build per-pair X_ctrl if control_valid differs from base + if (is_balanced and self.control_group == "never_treated" + and X_ctrl is not None): + pair_X_ctrl = X_ctrl + pair_n_c = n_c_base + else: + pair_X_ctrl = np.column_stack([np.ones(n_c), X_control_pair]) + pair_n_c = n_c + + # Solve for beta + beta = None + with np.errstate(all='ignore'): + if (cho is not None and is_balanced + and self.control_group == "never_treated"): + # Use cached Cholesky + Xty = pair_X_ctrl.T @ control_change + beta = scipy_linalg.cho_solve(cho, Xty) + else: + # Compute per-pair Cholesky or lstsq fallback + if kept_cols is not None: + # Rank-deficient: skip Cholesky, use reduced lstsq + pass + else: + pair_XtX = pair_X_ctrl.T @ pair_X_ctrl + try: + pair_cho = scipy_linalg.cho_factor(pair_XtX) + Xty = pair_X_ctrl.T @ control_change + beta = scipy_linalg.cho_solve(pair_cho, Xty) + except np.linalg.LinAlgError: + pass + + if beta is None or np.any(~np.isfinite(beta)): + if kept_cols is not None: + # Reduced solve for rank-deficient design + result = scipy_linalg.lstsq( + pair_X_ctrl[:, kept_cols], control_change, + cond=1e-07, + ) + beta = np.zeros(pair_X_ctrl.shape[1]) + beta[kept_cols] = result[0] + else: + # Full-rank lstsq fallback (Cholesky numerical failure) + result = scipy_linalg.lstsq( + pair_X_ctrl, control_change, cond=1e-07, + ) + beta = result[0] + + nan_cell = False + + if beta is None or np.any(~np.isfinite(beta)): + nan_cell = True + n_nan_cells += 1 + + if not nan_cell: + X_treated_w_intercept = np.column_stack([np.ones(n_t), X_treated_pair]) + with np.errstate(all='ignore'): + predicted_control = X_treated_w_intercept @ beta + treated_residuals = treated_change - predicted_control + if np.any(~np.isfinite(predicted_control)): + nan_cell = True + n_nan_cells += 1 + + if not nan_cell: + att = float(np.mean(treated_residuals)) + with np.errstate(all='ignore'): + residuals = control_change - pair_X_ctrl @ beta + if np.any(~np.isfinite(residuals)): + nan_cell = True + n_nan_cells += 1 + + if nan_cell: + att = np.nan + se = np.nan + inf_treated = np.zeros(n_t) + inf_control = np.zeros(n_c) + else: + var_t = float(np.var(treated_residuals, ddof=1)) if n_t > 1 else 0.0 + var_c = float(np.var(residuals, ddof=1)) if pair_n_c > 1 else 0.0 + se = float(np.sqrt(var_t / n_t + var_c / pair_n_c)) + inf_treated = (treated_residuals - np.mean(treated_residuals)) / n_t + inf_control = -residuals / pair_n_c + + group_time_effects[(g, t)] = { + 'effect': att, + 'se': se, + 't_stat': np.nan, + 'p_value': np.nan, + 'conf_int': (np.nan, np.nan), + 'n_treated': n_t, + 'n_control': n_c, + } + + all_units = precomputed['all_units'] + treated_positions = np.where(treated_valid)[0] + control_positions = np.where(control_valid)[0] + influence_func_info[(g, t)] = { + 'treated_idx': treated_positions, + 'control_idx': control_positions, + 'treated_units': all_units[treated_positions], + 'control_units': all_units[control_positions], + 'treated_inf': inf_treated, + 'control_inf': inf_control, + } + + atts.append(att) + ses.append(se) + task_keys.append((g, t)) + + if n_nan_cells > 0: + warnings.warn( + f"{n_nan_cells} group-time cell(s) have non-finite regression results " + "(near-singular covariates). These cells are preserved with NaN inference.", + UserWarning, + stacklevel=2, + ) + + # Batch inference + if task_keys: + t_stats, p_values, ci_lowers, ci_uppers = safe_inference_batch( + np.array(atts), np.array(ses), alpha=self.alpha + ) + for idx, key in enumerate(task_keys): + group_time_effects[key]['t_stat'] = float(t_stats[idx]) + group_time_effects[key]['p_value'] = float(p_values[idx]) + group_time_effects[key]['conf_int'] = ( + float(ci_lowers[idx]), float(ci_uppers[idx]) + ) + + return group_time_effects, influence_func_info + def fit( self, data: pd.DataFrame, @@ -627,6 +1126,10 @@ def fit( ValueError If required columns are missing or data validation fails. """ + # Normalize empty covariates list to None + if covariates is not None and len(covariates) == 0: + covariates = None + # Validate inputs required_cols = [outcome, unit, time, first_treat] if covariates: @@ -675,45 +1178,70 @@ def fit( ) # Compute ATT(g,t) for each group-time combination - group_time_effects = {} - influence_func_info = {} # Store influence functions for bootstrap - - # Get minimum period for determining valid pre-treatment periods min_period = min(time_periods) - for g in treatment_groups: - # Compute valid periods including pre-treatment - if self.base_period == "universal": - # Universal: all periods except the base period (which is normalized to 0) - universal_base = g - 1 - self.anticipation - valid_periods = [t for t in time_periods if t != universal_base] - else: - # Varying: post-treatment + pre-treatment where t-1 exists - valid_periods = [ - t for t in time_periods - if t >= g - self.anticipation or t > min_period - ] - - for t in valid_periods: - att_gt, se_gt, n_treat, n_ctrl, inf_info = self._compute_att_gt_fast( - precomputed, g, t, covariates + if covariates is None and self.estimation_method == "reg": + # Fast vectorized path for the common no-covariates regression case + group_time_effects, influence_func_info = ( + self._compute_all_att_gt_vectorized( + precomputed, treatment_groups, time_periods, min_period + ) + ) + elif (covariates is not None and self.estimation_method == "reg" + and self.rank_deficient_action != "error"): + # Optimized covariate regression path with Cholesky caching + group_time_effects, influence_func_info = ( + self._compute_all_att_gt_covariate_reg( + precomputed, treatment_groups, time_periods, min_period ) + ) + else: + # General path: IPW, DR, rank_deficient_action="error", or edge cases + group_time_effects = {} + influence_func_info = {} + + # Propensity score cache for IPW/DR with covariates + pscore_cache = {} if ( + covariates and self.estimation_method in ("ipw", "dr") + ) else None + # Cholesky cache for DR outcome regression component + cho_cache = {} if ( + covariates and self.estimation_method == "dr" + and self.rank_deficient_action != "error" + ) else None + + for g in treatment_groups: + if self.base_period == "universal": + universal_base = g - 1 - self.anticipation + valid_periods = [t for t in time_periods if t != universal_base] + else: + valid_periods = [ + t for t in time_periods + if t >= g - self.anticipation or t > min_period + ] + + for t in valid_periods: + att_gt, se_gt, n_treat, n_ctrl, inf_info = self._compute_att_gt_fast( + precomputed, g, t, covariates, + pscore_cache=pscore_cache, + cho_cache=cho_cache, + ) - if att_gt is not None: - t_stat, p_val, ci = safe_inference(att_gt, se_gt, alpha=self.alpha) + if att_gt is not None: + t_stat, p_val, ci = safe_inference(att_gt, se_gt, alpha=self.alpha) - group_time_effects[(g, t)] = { - 'effect': att_gt, - 'se': se_gt, - 't_stat': t_stat, - 'p_value': p_val, - 'conf_int': ci, - 'n_treated': n_treat, - 'n_control': n_ctrl, - } + group_time_effects[(g, t)] = { + 'effect': att_gt, + 'se': se_gt, + 't_stat': t_stat, + 'p_value': p_val, + 'conf_int': ci, + 'n_treated': n_treat, + 'n_control': n_ctrl, + } - if inf_info is not None: - influence_func_info[(g, t)] = inf_info + if inf_info is not None: + influence_func_info[(g, t)] = inf_info if not group_time_effects: raise ValueError( @@ -742,7 +1270,8 @@ def fit( if aggregate in ["group", "all"]: group_effects = self._aggregate_by_group( - group_time_effects, influence_func_info, treatment_groups + group_time_effects, influence_func_info, treatment_groups, + precomputed=precomputed, ) # Run bootstrap inference if requested @@ -767,44 +1296,49 @@ def fit( overall_p = bootstrap_results.overall_att_p_value overall_ci = bootstrap_results.overall_att_ci - # Update group-time effects with bootstrap SEs - for gt in group_time_effects: - if gt in bootstrap_results.group_time_ses: + # Update group-time effects with bootstrap SEs (batched) + gt_keys = [gt for gt in group_time_effects if gt in bootstrap_results.group_time_ses] + if gt_keys: + gt_effects_arr = np.array([float(group_time_effects[gt]['effect']) for gt in gt_keys]) + gt_ses_arr = np.array([float(bootstrap_results.group_time_ses[gt]) for gt in gt_keys]) + gt_t_stats, _, _, _ = safe_inference_batch(gt_effects_arr, gt_ses_arr, alpha=self.alpha) + for idx, gt in enumerate(gt_keys): group_time_effects[gt]['se'] = bootstrap_results.group_time_ses[gt] group_time_effects[gt]['conf_int'] = bootstrap_results.group_time_cis[gt] group_time_effects[gt]['p_value'] = bootstrap_results.group_time_p_values[gt] - effect = float(group_time_effects[gt]['effect']) - se = float(group_time_effects[gt]['se']) - group_time_effects[gt]['t_stat'] = safe_inference(effect, se, alpha=self.alpha)[0] + group_time_effects[gt]['t_stat'] = float(gt_t_stats[idx]) - # Update event study effects with bootstrap SEs + # Update event study effects with bootstrap SEs (batched) if (event_study_effects is not None and bootstrap_results.event_study_ses is not None and bootstrap_results.event_study_cis is not None and bootstrap_results.event_study_p_values is not None): - for e in event_study_effects: - if e in bootstrap_results.event_study_ses: + es_keys = [e for e in event_study_effects if e in bootstrap_results.event_study_ses] + if es_keys: + es_effects_arr = np.array([float(event_study_effects[e]['effect']) for e in es_keys]) + es_ses_arr = np.array([float(bootstrap_results.event_study_ses[e]) for e in es_keys]) + es_t_stats, _, _, _ = safe_inference_batch(es_effects_arr, es_ses_arr, alpha=self.alpha) + for idx, e in enumerate(es_keys): event_study_effects[e]['se'] = bootstrap_results.event_study_ses[e] event_study_effects[e]['conf_int'] = bootstrap_results.event_study_cis[e] - p_val = bootstrap_results.event_study_p_values[e] - event_study_effects[e]['p_value'] = p_val - effect = float(event_study_effects[e]['effect']) - se = float(event_study_effects[e]['se']) - event_study_effects[e]['t_stat'] = safe_inference(effect, se, alpha=self.alpha)[0] + event_study_effects[e]['p_value'] = bootstrap_results.event_study_p_values[e] + event_study_effects[e]['t_stat'] = float(es_t_stats[idx]) - # Update group effects with bootstrap SEs + # Update group effects with bootstrap SEs (batched) if (group_effects is not None and bootstrap_results.group_effect_ses is not None and bootstrap_results.group_effect_cis is not None and bootstrap_results.group_effect_p_values is not None): - for g in group_effects: - if g in bootstrap_results.group_effect_ses: + grp_keys = [g for g in group_effects if g in bootstrap_results.group_effect_ses] + if grp_keys: + grp_effects_arr = np.array([float(group_effects[g]['effect']) for g in grp_keys]) + grp_ses_arr = np.array([float(bootstrap_results.group_effect_ses[g]) for g in grp_keys]) + grp_t_stats, _, _, _ = safe_inference_batch(grp_effects_arr, grp_ses_arr, alpha=self.alpha) + for idx, g in enumerate(grp_keys): group_effects[g]['se'] = bootstrap_results.group_effect_ses[g] group_effects[g]['conf_int'] = bootstrap_results.group_effect_cis[g] group_effects[g]['p_value'] = bootstrap_results.group_effect_p_values[g] - effect = float(group_effects[g]['effect']) - se = float(group_effects[g]['se']) - group_effects[g]['t_stat'] = safe_inference(effect, se, alpha=self.alpha)[0] + group_effects[g]['t_stat'] = float(grp_t_stats[idx]) # Compute simultaneous confidence band CIs if cband is available cband_crit_value = None @@ -920,6 +1454,8 @@ def _ipw_estimation( n_control: int, X_treated: Optional[np.ndarray] = None, X_control: Optional[np.ndarray] = None, + pscore_cache: Optional[Dict] = None, + pscore_key: Optional[Any] = None, ) -> Tuple[float, float, np.ndarray]: """ Estimate ATT using inverse probability weighting. @@ -938,22 +1474,39 @@ def _ipw_estimation( if X_treated is not None and X_control is not None and X_treated.shape[1] > 0: # Covariate-adjusted IPW estimation - # Stack covariates and create treatment indicator - X_all = np.vstack([X_treated, X_control]) - D = np.concatenate([np.ones(n_t), np.zeros(n_c)]) - - # Estimate propensity scores using logistic regression - try: - _, pscore = _logistic_regression(X_all, D) - except (np.linalg.LinAlgError, ValueError): - # Fallback to unconditional if logistic regression fails - warnings.warn( - "Propensity score estimation failed. " - "Falling back to unconditional estimation.", - UserWarning, - stacklevel=4, - ) - pscore = np.full(len(D), n_t / (n_t + n_c)) + # Check propensity score cache + cached_pscore = None + if pscore_cache is not None and pscore_key is not None: + cached_pscore = pscore_cache.get(pscore_key) + + if cached_pscore is not None: + # Use cached propensity scores (beta coefficients) + beta_logistic = cached_pscore + X_all = np.vstack([X_treated, X_control]) + X_all_with_intercept = np.column_stack([np.ones(n_t + n_c), X_all]) + z = np.dot(X_all_with_intercept, beta_logistic) + z = np.clip(z, -500, 500) + pscore = 1 / (1 + np.exp(-z)) + else: + # Stack covariates and create treatment indicator + X_all = np.vstack([X_treated, X_control]) + D = np.concatenate([np.ones(n_t), np.zeros(n_c)]) + + # Estimate propensity scores using logistic regression + try: + beta_logistic, pscore = _logistic_regression(X_all, D) + # Cache the fitted coefficients + if pscore_cache is not None and pscore_key is not None: + pscore_cache[pscore_key] = beta_logistic + except (np.linalg.LinAlgError, ValueError): + # Fallback to unconditional if logistic regression fails + warnings.warn( + "Propensity score estimation failed. " + "Falling back to unconditional estimation.", + UserWarning, + stacklevel=4, + ) + pscore = np.full(len(D), n_t / (n_t + n_c)) # Propensity scores for treated and control pscore_treated = pscore[:n_t] @@ -1009,6 +1562,10 @@ def _doubly_robust( control_change: np.ndarray, X_treated: Optional[np.ndarray] = None, X_control: Optional[np.ndarray] = None, + pscore_cache: Optional[Dict] = None, + pscore_key: Optional[Any] = None, + cho_cache: Optional[Dict] = None, + cho_key: Optional[Any] = None, ) -> Tuple[float, float, np.ndarray]: """ Estimate ATT using doubly robust estimation. @@ -1032,26 +1589,77 @@ def _doubly_robust( if X_treated is not None and X_control is not None and X_treated.shape[1] > 0: # Doubly robust estimation with covariates # Step 1: Outcome regression - fit E[Delta Y | X] on control - beta, _ = _linear_regression( - X_control, control_change, - rank_deficient_action=self.rank_deficient_action, - ) + # Try Cholesky cache for outcome regression + beta = None + X_control_with_intercept = np.column_stack([np.ones(n_c), X_control]) + if cho_cache is not None and cho_key is not None: + cached_cho = cho_cache.get(cho_key) + + if cached_cho is False: + # Rank-deficient sentinel: skip Cholesky, fall through + pass + elif cached_cho is not None: + Xty = X_control_with_intercept.T @ control_change + beta = scipy_linalg.cho_solve(cached_cho, Xty) + if np.any(~np.isfinite(beta)): + beta = None + else: + # First time for this cho_key: check rank before Cholesky + rank_info = _detect_rank_deficiency(X_control_with_intercept) + if len(rank_info[1]) > 0: + cho_cache[cho_key] = False # Sentinel + else: + XtX = X_control_with_intercept.T @ X_control_with_intercept + try: + cho_factor = scipy_linalg.cho_factor(XtX) + cho_cache[cho_key] = cho_factor + Xty = X_control_with_intercept.T @ control_change + beta = scipy_linalg.cho_solve(cho_factor, Xty) + if np.any(~np.isfinite(beta)): + beta = None + except np.linalg.LinAlgError: + pass + + if beta is None: + beta, _ = _linear_regression( + X_control, control_change, + rank_deficient_action=self.rank_deficient_action, + ) + # Zero NaN coefficients for prediction only — dropped columns + # contribute 0 to the column space projection. Note: solve_ols + # deliberately uses NaN (R's lm() convention) for inference, but + # here we only need beta for prediction (m_treated, m_control). + beta = np.where(np.isfinite(beta), beta, 0.0) # Predict counterfactual for both treated and control X_treated_with_intercept = np.column_stack([np.ones(n_t), X_treated]) - X_control_with_intercept = np.column_stack([np.ones(n_c), X_control]) m_treated = np.dot(X_treated_with_intercept, beta) m_control = np.dot(X_control_with_intercept, beta) # Step 2: Propensity score estimation - X_all = np.vstack([X_treated, X_control]) - D = np.concatenate([np.ones(n_t), np.zeros(n_c)]) - - try: - _, pscore = _logistic_regression(X_all, D) - except (np.linalg.LinAlgError, ValueError): - # Fallback to unconditional if logistic regression fails - pscore = np.full(len(D), n_t / (n_t + n_c)) + # Check propensity score cache + cached_pscore = None + if pscore_cache is not None and pscore_key is not None: + cached_pscore = pscore_cache.get(pscore_key) + + if cached_pscore is not None: + beta_logistic = cached_pscore + X_all = np.vstack([X_treated, X_control]) + X_all_with_intercept = np.column_stack([np.ones(n_t + n_c), X_all]) + z = np.dot(X_all_with_intercept, beta_logistic) + z = np.clip(z, -500, 500) + pscore = 1 / (1 + np.exp(-z)) + else: + X_all = np.vstack([X_treated, X_control]) + D = np.concatenate([np.ones(n_t), np.zeros(n_c)]) + + try: + beta_logistic, pscore = _logistic_regression(X_all, D) + if pscore_cache is not None and pscore_key is not None: + pscore_cache[pscore_key] = beta_logistic + except (np.linalg.LinAlgError, ValueError): + # Fallback to unconditional if logistic regression fails + pscore = np.full(len(D), n_t / (n_t + n_c)) pscore_control = pscore[n_t:] diff --git a/diff_diff/staggered_aggregation.py b/diff_diff/staggered_aggregation.py index 3b0a04a0..7faf043f 100644 --- a/diff_diff/staggered_aggregation.py +++ b/diff_diff/staggered_aggregation.py @@ -10,7 +10,7 @@ import numpy as np import pandas as pd -from diff_diff.utils import safe_inference +from diff_diff.utils import safe_inference_batch # Type alias for pre-computed structures (defined at module scope for runtime access) PrecomputedData = Dict[str, Any] @@ -87,6 +87,31 @@ def _aggregate_simple( weights = np.array(weights_list, dtype=float) groups_for_gt = np.array(groups_for_gt) + # Exclude NaN effects from aggregation (R's aggte() convention) + finite_mask = np.isfinite(effects) + n_nan = int(np.sum(~finite_mask)) + if n_nan > 0: + import warnings + warnings.warn( + f"{n_nan} group-time effect(s) are NaN and excluded from overall ATT " + "aggregation. Inspect group_time_effects for details.", + UserWarning, + stacklevel=2, + ) + effects = effects[finite_mask] + weights = weights[finite_mask] + gt_pairs = [gt for gt, m in zip(gt_pairs, finite_mask) if m] + groups_for_gt = groups_for_gt[finite_mask] + + if len(effects) == 0: + import warnings + warnings.warn( + "All post-treatment effects are NaN. Cannot compute overall ATT.", + UserWarning, + stacklevel=2, + ) + return np.nan, np.nan + # Normalize weights total_weight = np.sum(weights) weights_norm = weights / total_weight @@ -107,6 +132,7 @@ def _compute_aggregated_se( gt_pairs: List[Tuple[Any, Any]], weights: np.ndarray, influence_func_info: Dict, + n_units: Optional[int] = None, ) -> float: """ Compute standard error using influence function aggregation. @@ -118,26 +144,32 @@ def _compute_aggregated_se( Var(overall) = (1/n) Σ_i [ψ_i]² This matches R's `did` package analytical SE formula. + + Parameters + ---------- + n_units : int, optional + Size of the canonical index space (len(precomputed['all_units'])). + When provided, influence function indices (treated_idx, control_idx) + index directly into this space, eliminating dict lookups. """ if not influence_func_info: - # Fallback if no influence functions available return 0.0 - # Build unit index mapping from all (g,t) pairs - all_units = set() - for (g, t) in gt_pairs: - if (g, t) in influence_func_info: - info = influence_func_info[(g, t)] - all_units.update(info['treated_units']) - all_units.update(info['control_units']) + if n_units is None: + # Fallback: infer size from influence function info + max_idx = 0 + for (g, t) in gt_pairs: + if (g, t) in influence_func_info: + info = influence_func_info[(g, t)] + if len(info['treated_idx']) > 0: + max_idx = max(max_idx, info['treated_idx'].max()) + if len(info['control_idx']) > 0: + max_idx = max(max_idx, info['control_idx'].max()) + n_units = max_idx + 1 - if not all_units: + if n_units == 0: return 0.0 - all_units = sorted(all_units) - n_units = len(all_units) - unit_to_idx = {u: i for i, u in enumerate(all_units)} - # Aggregate influence functions across (g,t) pairs psi_overall = np.zeros(n_units) @@ -148,15 +180,14 @@ def _compute_aggregated_se( info = influence_func_info[(g, t)] w = weights[j] - # Treated unit contributions - for i, unit_id in enumerate(info['treated_units']): - idx = unit_to_idx[unit_id] - psi_overall[idx] += w * info['treated_inf'][i] + # Vectorized influence function aggregation using index arrays + treated_idx = info['treated_idx'] + if len(treated_idx) > 0: + np.add.at(psi_overall, treated_idx, w * info['treated_inf']) - # Control unit contributions - for i, unit_id in enumerate(info['control_units']): - idx = unit_to_idx[unit_id] - psi_overall[idx] += w * info['control_inf'][i] + control_idx = info['control_idx'] + if len(control_idx) > 0: + np.add.at(psi_overall, control_idx, w * info['control_inf']) # Compute variance: Var(θ̄) = (1/n) Σᵢ ψᵢ² variance = np.sum(psi_overall ** 2) @@ -215,6 +246,7 @@ def _compute_combined_influence_function( n_units = len(all_units) unit_to_idx = {u: i for i, u in enumerate(all_units)} + # Get unique groups and their information unique_groups = sorted(set(groups_for_gt)) unique_groups_set = set(unique_groups) @@ -248,15 +280,14 @@ def _compute_combined_influence_function( info = influence_func_info[(g, t)] w = weights[j] - # Vectorized influence function aggregation for treated units - treated_indices = np.array([unit_to_idx[uid] for uid in info['treated_units']]) - if len(treated_indices) > 0: - np.add.at(psi_standard, treated_indices, w * info['treated_inf']) + # Vectorized influence function aggregation using precomputed index arrays + treated_idx = info['treated_idx'] + if len(treated_idx) > 0: + np.add.at(psi_standard, treated_idx, w * info['treated_inf']) - # Vectorized influence function aggregation for control units - control_indices = np.array([unit_to_idx[uid] for uid in info['control_units']]) - if len(control_indices) > 0: - np.add.at(psi_standard, control_indices, w * info['control_inf']) + control_idx = info['control_idx'] + if len(control_idx) > 0: + np.add.at(psi_standard, control_idx, w * info['control_inf']) # Build unit-group array: normalize iterator to (idx, uid) pairs unit_groups_array = np.full(n_units, -1, dtype=np.float64) @@ -383,6 +414,8 @@ def _aggregate_event_study( adjustment that accounts for uncertainty in group-size weights, matching R's did::aggte(..., type="dynamic"). """ + n_units = len(precomputed['all_units']) if precomputed is not None else None + # Organize effects by relative time, keeping track of (g,t) pairs effects_by_e: Dict[int, List[Tuple[Tuple[Any, Any], float, int]]] = {} @@ -401,7 +434,7 @@ def _aggregate_event_study( # Keep only groups that have effects at relative time balance_e groups_at_e = set() for (g, t), data in group_time_effects.items(): - if t - g == balance_e: + if t - g == balance_e and np.isfinite(data['effect']): groups_at_e.add(g) # Filter effects to only include balanced groups @@ -418,17 +451,29 @@ def _aggregate_event_study( )) effects_by_e = balanced_effects - # Compute aggregated effects - event_study_effects = {} - - for e, effect_list in sorted(effects_by_e.items()): + # Compute aggregated effects and SEs for all relative periods + sorted_periods = sorted(effects_by_e.items()) + agg_effects_list = [] + agg_ses_list = [] + agg_n_groups = [] + for e, effect_list in sorted_periods: gt_pairs = [x[0] for x in effect_list] effs = np.array([x[1] for x in effect_list]) ns = np.array([x[2] for x in effect_list], dtype=float) - # Weight by group size - weights = ns / np.sum(ns) + # Exclude NaN effects from this period's aggregation + finite_mask = np.isfinite(effs) + if not np.all(finite_mask): + effs = effs[finite_mask] + ns = ns[finite_mask] + gt_pairs = [gt for gt, m in zip(gt_pairs, finite_mask) if m] + if len(effs) == 0: + agg_effects_list.append(np.nan) + agg_ses_list.append(np.nan) + agg_n_groups.append(0) + continue + weights = ns / np.sum(ns) agg_effect = np.sum(weights * effs) # Compute SE with WIF adjustment (matching R's did::aggte) @@ -438,31 +483,39 @@ def _aggregate_event_study( influence_func_info, df, unit, precomputed ) - t_stat, p_val, ci = safe_inference(agg_effect, agg_se, alpha=self.alpha) + agg_effects_list.append(agg_effect) + agg_ses_list.append(agg_se) + agg_n_groups.append(len(effect_list)) + # Batch inference for all relative periods + if not agg_effects_list: + return {} + t_stats, p_values, ci_lowers, ci_uppers = safe_inference_batch( + np.array(agg_effects_list), np.array(agg_ses_list), alpha=self.alpha + ) + + event_study_effects = {} + for idx, (e, _) in enumerate(sorted_periods): event_study_effects[e] = { - 'effect': agg_effect, - 'se': agg_se, - 't_stat': t_stat, - 'p_value': p_val, - 'conf_int': ci, - 'n_groups': len(effect_list), + 'effect': agg_effects_list[idx], + 'se': agg_ses_list[idx], + 't_stat': float(t_stats[idx]), + 'p_value': float(p_values[idx]), + 'conf_int': (float(ci_lowers[idx]), float(ci_uppers[idx])), + 'n_groups': agg_n_groups[idx], } # Add reference period for universal base period mode (matches R did package) - # The reference period e = -1 - anticipation has effect = 0 by construction - # Only add if there are actual computed effects (guard against empty data) if getattr(self, 'base_period', 'varying') == "universal": ref_period = -1 - self.anticipation - # Only inject reference if we have at least one real effect if event_study_effects and ref_period not in event_study_effects: event_study_effects[ref_period] = { 'effect': 0.0, - 'se': np.nan, # Undefined - no data, normalization constraint - 't_stat': np.nan, # Undefined - normalization constraint + 'se': np.nan, + 't_stat': np.nan, 'p_value': np.nan, - 'conf_int': (np.nan, np.nan), # NaN propagation for undefined inference - 'n_groups': 0, # No groups contribute - fixed by construction + 'conf_int': (np.nan, np.nan), + 'n_groups': 0, } return event_study_effects @@ -472,6 +525,7 @@ def _aggregate_by_group( group_time_effects: Dict, influence_func_info: Dict, groups: List[Any], + precomputed: Optional["PrecomputedData"] = None, ) -> Dict[Any, Dict[str, Any]]: """ Aggregate effects by treatment cohort. @@ -481,11 +535,11 @@ def _aggregate_by_group( Standard errors use influence function aggregation to account for covariances across time periods within a cohort. """ - group_effects = {} + n_units = len(precomputed['all_units']) if precomputed is not None else None + # Collect all group aggregation data first + group_data_list = [] for g in groups: - # Get all effects for this group (post-treatment only: t >= g - anticipation) - # Keep track of (g, t) pairs for influence function aggregation g_effects = [ ((g, t), data['effect']) for (gg, t), data in group_time_effects.items() @@ -498,25 +552,41 @@ def _aggregate_by_group( gt_pairs = [x[0] for x in g_effects] effs = np.array([x[1] for x in g_effects]) - # Equal weight across time periods for a group - weights = np.ones(len(effs)) / len(effs) + # Exclude NaN effects from this group's aggregation + finite_mask = np.isfinite(effs) + if not np.all(finite_mask): + effs = effs[finite_mask] + gt_pairs = [gt for gt, m in zip(gt_pairs, finite_mask) if m] + if len(effs) == 0: + continue + weights = np.ones(len(effs)) / len(effs) agg_effect = np.sum(weights * effs) - # Compute SE using influence function aggregation agg_se = self._compute_aggregated_se( - gt_pairs, weights, influence_func_info + gt_pairs, weights, influence_func_info, n_units=n_units ) + group_data_list.append((g, agg_effect, agg_se, len(g_effects))) + + if not group_data_list: + return {} - t_stat, p_val, ci = safe_inference(agg_effect, agg_se, alpha=self.alpha) + # Batch inference + agg_effects = np.array([x[1] for x in group_data_list]) + agg_ses = np.array([x[2] for x in group_data_list]) + t_stats, p_values, ci_lowers, ci_uppers = safe_inference_batch( + agg_effects, agg_ses, alpha=self.alpha + ) + group_effects = {} + for idx, (g, agg_effect, agg_se, n_periods) in enumerate(group_data_list): group_effects[g] = { 'effect': agg_effect, 'se': agg_se, - 't_stat': t_stat, - 'p_value': p_val, - 'conf_int': ci, - 'n_periods': len(g_effects), + 't_stat': float(t_stats[idx]), + 'p_value': float(p_values[idx]), + 'conf_int': (float(ci_lowers[idx]), float(ci_uppers[idx])), + 'n_periods': n_periods, } return group_effects diff --git a/diff_diff/staggered_bootstrap.py b/diff_diff/staggered_bootstrap.py index 6e54dc73..de037cf1 100644 --- a/diff_diff/staggered_bootstrap.py +++ b/diff_diff/staggered_bootstrap.py @@ -18,6 +18,9 @@ from diff_diff.bootstrap_utils import ( compute_effect_bootstrap_stats as _compute_effect_bootstrap_stats_func, ) +from diff_diff.bootstrap_utils import ( + compute_effect_bootstrap_stats_batch as _compute_effect_bootstrap_stats_batch_func, +) from diff_diff.bootstrap_utils import ( compute_percentile_ci as _compute_percentile_ci_func, ) @@ -201,6 +204,15 @@ def _run_multiplier_bootstrap( ], dtype=float) post_n_treated = all_n_treated[post_treatment_mask] + # Filter out NaN ATT(g,t) cells from overall aggregation (matches analytical path) + post_effects_raw = np.array([ + group_time_effects[gt_pairs[i]]['effect'] for i in post_treatment_indices + ]) + finite_post = np.isfinite(post_effects_raw) + if not np.all(finite_post): + post_treatment_indices = post_treatment_indices[finite_post] + post_n_treated = post_n_treated[finite_post] + # Flag to skip overall ATT aggregation when no post-treatment effects # But continue bootstrap for per-effect SEs (pre-treatment effects need bootstrap SEs too) skip_overall_aggregation = False @@ -221,7 +233,7 @@ def _run_multiplier_bootstrap( if skip_overall_aggregation: original_overall = np.nan else: - original_overall = np.sum(overall_weights_post * original_atts[post_treatment_mask]) + original_overall = np.sum(overall_weights_post * original_atts[post_treatment_indices]) # Prepare event study and group aggregation info if needed event_study_info = None @@ -248,10 +260,8 @@ def _run_multiplier_bootstrap( for j, gt in enumerate(gt_pairs): info = influence_func_info[gt] - treated_idx = np.array([unit_to_idx[u] for u in info['treated_units']]) - control_idx = np.array([unit_to_idx[u] for u in info['control_units']]) - gt_treated_indices.append(treated_idx) - gt_control_indices.append(control_idx) + gt_treated_indices.append(info['treated_idx']) + gt_control_indices.append(info['control_idx']) gt_treated_inf.append(np.asarray(info['treated_inf'])) gt_control_inf.append(np.asarray(info['control_inf'])) @@ -296,7 +306,7 @@ def _run_multiplier_bootstrap( # Use combined IF (standard IF + WIF) for proper bootstrap post_gt_pairs = [gt_pairs[i] for i in post_treatment_indices] post_groups = np.array([gt_pairs[i][0] for i in post_treatment_indices]) - post_effects = original_atts[post_treatment_mask] + post_effects = original_atts[post_treatment_indices] overall_combined_if, _ = self._compute_combined_influence_function( post_gt_pairs, overall_weights_post, post_effects, post_groups, influence_func_info, df, unit, precomputed, @@ -335,19 +345,19 @@ def _run_multiplier_bootstrap( with np.errstate(divide='ignore', invalid='ignore', over='ignore'): bootstrap_group[g] = bootstrap_atts_gt[:, gt_indices] @ weights - # Compute bootstrap statistics for ATT(g,t) + # Batch compute bootstrap statistics for ATT(g,t) + batch_ses, batch_ci_lo, batch_ci_hi, batch_pv = ( + _compute_effect_bootstrap_stats_batch_func( + original_atts, bootstrap_atts_gt, alpha=self.alpha + ) + ) gt_ses = {} gt_cis = {} gt_p_values = {} - for j, gt in enumerate(gt_pairs): - se, ci, p_value = self._compute_effect_bootstrap_stats( - original_atts[j], bootstrap_atts_gt[:, j], - context=f"ATT(g={gt[0]}, t={gt[1]})" - ) - gt_ses[gt] = se - gt_cis[gt] = ci - gt_p_values[gt] = p_value + gt_ses[gt] = float(batch_ses[j]) + gt_cis[gt] = (float(batch_ci_lo[j]), float(batch_ci_hi[j])) + gt_p_values[gt] = float(batch_pv[j]) # Compute bootstrap statistics for overall ATT if skip_overall_aggregation: @@ -360,43 +370,39 @@ def _run_multiplier_bootstrap( context="overall ATT" ) - # Compute bootstrap statistics for event study effects + # Batch compute bootstrap statistics for event study effects event_study_ses = None event_study_cis = None event_study_p_values = None if bootstrap_event_study is not None and event_study_info is not None: - event_study_ses = {} - event_study_cis = {} - event_study_p_values = {} - - for e in rel_periods: - se, ci, p_value = self._compute_effect_bootstrap_stats( - event_study_info[e]['effect'], bootstrap_event_study[e], - context=f"event study (e={e})" + es_effects = np.array([event_study_info[e]['effect'] for e in rel_periods]) + es_boot_matrix = np.column_stack([bootstrap_event_study[e] for e in rel_periods]) + es_ses, es_ci_lo, es_ci_hi, es_pv = ( + _compute_effect_bootstrap_stats_batch_func( + es_effects, es_boot_matrix, alpha=self.alpha ) - event_study_ses[e] = se - event_study_cis[e] = ci - event_study_p_values[e] = p_value + ) + event_study_ses = {e: float(es_ses[i]) for i, e in enumerate(rel_periods)} + event_study_cis = {e: (float(es_ci_lo[i]), float(es_ci_hi[i])) for i, e in enumerate(rel_periods)} + event_study_p_values = {e: float(es_pv[i]) for i, e in enumerate(rel_periods)} - # Compute bootstrap statistics for group effects + # Batch compute bootstrap statistics for group effects group_effect_ses = None group_effect_cis = None group_effect_p_values = None if bootstrap_group is not None and group_agg_info is not None: - group_effect_ses = {} - group_effect_cis = {} - group_effect_p_values = {} - - for g in group_list: - se, ci, p_value = self._compute_effect_bootstrap_stats( - group_agg_info[g]['effect'], bootstrap_group[g], - context=f"group effect (g={g})" + grp_effects = np.array([group_agg_info[g]['effect'] for g in group_list]) + grp_boot_matrix = np.column_stack([bootstrap_group[g] for g in group_list]) + grp_ses, grp_ci_lo, grp_ci_hi, grp_pv = ( + _compute_effect_bootstrap_stats_batch_func( + grp_effects, grp_boot_matrix, alpha=self.alpha ) - group_effect_ses[g] = se - group_effect_cis[g] = ci - group_effect_p_values[g] = p_value + ) + group_effect_ses = {g: float(grp_ses[i]) for i, g in enumerate(group_list)} + group_effect_cis = {g: (float(grp_ci_lo[i]), float(grp_ci_hi[i])) for i, g in enumerate(group_list)} + group_effect_p_values = {g: float(grp_pv[i]) for i, g in enumerate(group_list)} # Compute simultaneous confidence band critical value (sup-t) cband_crit_value = None @@ -483,7 +489,7 @@ def _prepare_event_study_aggregation( if balance_e is not None: groups_at_e = set() for j, (g, t) in enumerate(gt_pairs): - if t - g == balance_e: + if t - g == balance_e and np.isfinite(group_time_effects[(g, t)]['effect']): groups_at_e.add(g) balanced_effects: Dict[int, List[Tuple[int, float, float]]] = {} @@ -506,6 +512,15 @@ def _prepare_event_study_aggregation( effects = np.array([x[1] for x in effect_list]) n_treated = np.array([x[2] for x in effect_list], dtype=float) + # Exclude NaN effects (matches analytical aggregation path) + finite_mask = np.isfinite(effects) + if not np.all(finite_mask): + indices = indices[finite_mask] + effects = effects[finite_mask] + n_treated = n_treated[finite_mask] + if len(effects) == 0: + continue + weights = n_treated / np.sum(n_treated) agg_effect = np.sum(weights * effects) @@ -556,6 +571,14 @@ def _prepare_group_aggregation( indices = np.array([x[0] for x in group_data]) effects = np.array([x[1] for x in group_data]) + # Exclude NaN effects (matches analytical aggregation path) + finite_mask = np.isfinite(effects) + if not np.all(finite_mask): + indices = indices[finite_mask] + effects = effects[finite_mask] + if len(effects) == 0: + continue + # Equal weights across time periods weights = np.ones(len(effects)) / len(effects) agg_effect = np.sum(weights * effects) diff --git a/diff_diff/utils.py b/diff_diff/utils.py index 1b4b3c60..3108c53e 100644 --- a/diff_diff/utils.py +++ b/diff_diff/utils.py @@ -29,6 +29,20 @@ _OPTIMIZATION_TOL = 1e-8 # Convergence tolerance for optimization _NUMERICAL_EPS = 1e-10 # Small constant to prevent division by zero +# Cache for critical values to avoid repeated scipy calls +_critical_value_cache: Dict[Tuple[float, Optional[int]], float] = {} + + +def _get_critical_value(alpha: float, df: Optional[int] = None) -> float: + """Return cached critical value for (alpha, df) pair.""" + key = (alpha, df) + if key not in _critical_value_cache: + if df is not None: + _critical_value_cache[key] = float(stats.t.ppf(1 - alpha / 2, df)) + else: + _critical_value_cache[key] = float(stats.norm.ppf(1 - alpha / 2)) + return _critical_value_cache[key] + def validate_binary(arr: np.ndarray, name: str) -> None: """ @@ -107,11 +121,7 @@ def compute_confidence_interval( tuple (lower_bound, upper_bound) of confidence interval. """ - if df is not None: - critical_value = stats.t.ppf(1 - alpha / 2, df) - else: - critical_value = stats.norm.ppf(1 - alpha / 2) - + critical_value = _get_critical_value(alpha, df) lower = estimate - critical_value * se upper = estimate + critical_value * se @@ -181,6 +191,54 @@ def safe_inference(effect, se, alpha=0.05, df=None): return t_stat, p_value, conf_int +def safe_inference_batch(effects, ses, alpha=0.05, df=None): + """Vectorized batch inference for arrays of effects and SEs. + + Parameters + ---------- + effects : np.ndarray + Array of point estimates. + ses : np.ndarray + Array of standard errors. + alpha : float, optional + Significance level (default 0.05). + df : int, optional + Degrees of freedom. If None, uses normal distribution. + + Returns + ------- + t_stats : np.ndarray + p_values : np.ndarray + ci_lowers : np.ndarray + ci_uppers : np.ndarray + """ + effects = np.asarray(effects, dtype=float) + ses = np.asarray(ses, dtype=float) + n = len(effects) + + t_stats = np.full(n, np.nan) + p_values = np.full(n, np.nan) + ci_lowers = np.full(n, np.nan) + ci_uppers = np.full(n, np.nan) + + valid = np.isfinite(ses) & (ses > 0) + if not np.any(valid): + return t_stats, p_values, ci_lowers, ci_uppers + + t_stats[valid] = effects[valid] / ses[valid] + + if df is not None: + p_values[valid] = 2.0 * stats.t.sf(np.abs(t_stats[valid]), df) + else: + p_values[valid] = 2.0 * stats.norm.sf(np.abs(t_stats[valid])) + + crit = _get_critical_value(alpha, df) + ci_lowers[valid] = effects[valid] - crit * ses[valid] + ci_uppers[valid] = effects[valid] + crit * ses[valid] + + return t_stats, p_values, ci_lowers, ci_uppers + + # ============================================================================= # Wild Cluster Bootstrap # ============================================================================= diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index b7a4e57b..42548c88 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -347,6 +347,7 @@ The multiplier bootstrap uses random weights w_i with E[w]=0 and Var(w)=1: *Edge cases:* - Groups with single observation: included but may have high variance - Missing group-time cells: ATT(g,t) set to NaN + - **Note:** When `balance_e` is specified, cohorts with NaN effects at the anchor horizon are excluded from the balanced panel - Anticipation: `anticipation` parameter shifts reference period - Group aggregation includes periods t >= g - anticipation (not just t >= g) - Both analytical SE and bootstrap SE aggregation respect anticipation diff --git a/tests/test_bootstrap_utils.py b/tests/test_bootstrap_utils.py index 15b74876..097aeb11 100644 --- a/tests/test_bootstrap_utils.py +++ b/tests/test_bootstrap_utils.py @@ -1,9 +1,14 @@ """Tests for bootstrap utility edge cases (NaN propagation).""" +import warnings + import numpy as np import pytest -from diff_diff.bootstrap_utils import compute_effect_bootstrap_stats +from diff_diff.bootstrap_utils import ( + compute_effect_bootstrap_stats, + compute_effect_bootstrap_stats_batch, +) class TestBootstrapStatsNaNPropagation: @@ -81,3 +86,55 @@ def test_bootstrap_stats_normal_case(self): assert ci[0] < ci[1] assert np.isfinite(p_value) assert 0 < p_value <= 1 + + +class TestBatchBootstrapStatsWarnings: + """Tests for warning emission in compute_effect_bootstrap_stats_batch.""" + + def test_batch_warns_insufficient_valid_samples(self): + """Batch function should warn when >50% of bootstrap samples are NaN.""" + rng = np.random.default_rng(42) + n_bootstrap = 100 + n_effects = 3 + # Column 1 has >50% NaN -> should trigger warning + matrix = rng.normal(size=(n_bootstrap, n_effects)) + matrix[:60, 1] = np.nan # 60% NaN + + effects = np.array([1.0, 2.0, 3.0]) + with pytest.warns(RuntimeWarning, match="too few valid"): + ses, ci_lo, ci_hi, pvals = compute_effect_bootstrap_stats_batch( + effects, matrix + ) + # Effect 1 (index 1) should be NaN + assert np.isnan(ses[1]) + # Other effects should be finite + assert np.isfinite(ses[0]) + assert np.isfinite(ses[2]) + + def test_batch_warns_zero_se(self): + """Batch function should warn when bootstrap SE is zero (identical values).""" + n_bootstrap = 100 + n_effects = 2 + matrix = np.ones((n_bootstrap, n_effects)) * 5.0 # All identical -> SE=0 + + effects = np.array([5.0, 5.0]) + with pytest.warns(RuntimeWarning, match="non-finite or zero"): + ses, ci_lo, ci_hi, pvals = compute_effect_bootstrap_stats_batch( + effects, matrix + ) + assert np.isnan(ses[0]) + assert np.isnan(ses[1]) + + def test_batch_no_warning_for_normal_case(self): + """Batch function should not warn when all values are normal.""" + rng = np.random.default_rng(42) + n_bootstrap = 200 + n_effects = 3 + matrix = rng.normal(size=(n_bootstrap, n_effects)) + effects = np.array([0.5, -0.3, 1.0]) + + with warnings.catch_warnings(): + warnings.simplefilter("error", RuntimeWarning) + ses, ci_lo, ci_hi, pvals = compute_effect_bootstrap_stats_batch( + effects, matrix + ) diff --git a/tests/test_efficient_did.py b/tests/test_efficient_did.py index ae6fd2f7..efad78fc 100644 --- a/tests/test_efficient_did.py +++ b/tests/test_efficient_did.py @@ -321,6 +321,13 @@ def test_pt_post_no_never_treated_raises(self): with pytest.raises(ValueError, match="never-treated"): EfficientDiD(pt_assumption="post").fit(df, "y", "unit", "time", "first_treat") + def test_nan_outcome_raises(self): + """Non-finite outcomes in a balanced panel should be rejected.""" + df = _make_simple_panel() + df.loc[df.index[0], "y"] = np.nan + with pytest.raises(ValueError, match="non-finite"): + EfficientDiD().fit(df, "y", "unit", "time", "first_treat") + def test_duplicate_unit_time_raises(self): """Duplicate (unit, time) rows should be rejected.""" df = _make_simple_panel() diff --git a/tests/test_staggered.py b/tests/test_staggered.py index a54ba997..715b88c0 100644 --- a/tests/test_staggered.py +++ b/tests/test_staggered.py @@ -885,6 +885,46 @@ def test_missing_values_in_covariates_warning(self): assert results.overall_att is not None assert results.overall_se > 0 + def test_dr_covariates_not_yet_treated(self): + """Regression test: DR + covariates with not_yet_treated control group. + + Ensures cache keys correctly include cohort g for not_yet_treated, + preventing stale Cholesky/pscore reuse across groups. + """ + data = generate_staggered_data_with_covariates(seed=42, n_units=200) + + for method in ['dr', 'reg']: + cs = CallawaySantAnna( + estimation_method=method, + control_group='not_yet_treated', + ) + results = cs.fit( + data, + outcome='outcome', + unit='unit', + time='time', + first_treat='first_treat', + covariates=['x1', 'x2'], + ) + + assert np.isfinite(results.overall_att), ( + f"{method}/not_yet_treated: ATT should be finite" + ) + assert results.overall_se > 0, ( + f"{method}/not_yet_treated: SE should be positive" + ) + assert len(results.group_time_effects) > 0, ( + f"{method}/not_yet_treated: should have group-time effects" + ) + # All effects should be finite + for (g, t), eff in results.group_time_effects.items(): + assert np.isfinite(eff['effect']), ( + f"{method}/not_yet_treated: effect for ({g},{t}) should be finite" + ) + assert np.isfinite(eff['se']), ( + f"{method}/not_yet_treated: SE for ({g},{t}) should be finite" + ) + def test_rank_deficient_action_error_raises(self): """Test that rank_deficient_action='error' raises ValueError on collinear data.""" data = generate_staggered_data_with_covariates(seed=42) @@ -940,6 +980,386 @@ def test_rank_deficient_action_silent_no_warning(self): assert results is not None assert results.overall_att is not None + def test_rank_deficient_action_warn_emits_warning(self): + """Test that rank_deficient_action='warn' emits rank-deficiency warning on batched path.""" + import warnings + + data = generate_staggered_data_with_covariates(seed=42) + + # Add a covariate that is perfectly collinear with x1 + data["x1_dup"] = data["x1"].copy() + + # estimation_method="reg" + rank_deficient_action="warn" routes to + # _compute_all_att_gt_covariate_reg (batched path) + cs = CallawaySantAnna( + estimation_method="reg", + rank_deficient_action="warn", + ) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + results = cs.fit( + data, + outcome='outcome', + unit='unit', + time='time', + first_treat='first_treat', + covariates=['x1', 'x1_dup'] + ) + + rank_warnings = [x for x in w if "rank-deficient" in str(x.message).lower() + or "Rank-deficient" in str(x.message)] + assert len(rank_warnings) > 0, ( + "Expected at least one rank-deficiency warning with collinear covariates" + ) + + # Should still produce valid results (lstsq fallback) + assert results is not None + assert results.overall_att is not None + assert results.overall_se > 0 + + def test_empty_covariates_list_behaves_like_none(self): + """covariates=[] should behave identically to covariates=None.""" + data = generate_staggered_data_with_covariates(seed=42) + + cs_none = CallawaySantAnna(n_bootstrap=0, seed=42) + results_none = cs_none.fit( + data, + outcome='outcome', + unit='unit', + time='time', + first_treat='first_treat', + covariates=None, + ) + + cs_empty = CallawaySantAnna(n_bootstrap=0, seed=42) + results_empty = cs_empty.fit( + data, + outcome='outcome', + unit='unit', + time='time', + first_treat='first_treat', + covariates=[], + ) + + assert results_none.overall_att == results_empty.overall_att + assert results_none.overall_se == results_empty.overall_se + assert len(results_none.group_time_effects) == len(results_empty.group_time_effects) + + def test_nan_cell_preserved_not_dropped(self): + """Non-finite regression cells should be preserved as NaN, not dropped.""" + import warnings + from unittest.mock import patch + + data = generate_staggered_data_with_covariates(seed=42, n_units=100) + + # Patch lstsq to return inf for one specific call to simulate numerical failure + original_lstsq = __import__('scipy').linalg.lstsq + call_count = [0] + + def mock_lstsq(*args, **kwargs): + call_count[0] += 1 + result = original_lstsq(*args, **kwargs) + if call_count[0] == 1: + # Poison the first lstsq result + bad_beta = np.full_like(result[0], np.inf) + return (bad_beta,) + result[1:] + return result + + # Use rank_deficient_action="warn" to ensure we go through the covariate reg path + # and also force lstsq fallback by using collinear covariates + data['x1_dup'] = data['x1'] + cs = CallawaySantAnna( + n_bootstrap=0, seed=42, estimation_method='reg', + rank_deficient_action='warn', + ) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + with patch('scipy.linalg.lstsq', side_effect=mock_lstsq): + results = cs.fit( + data, + outcome='outcome', + unit='unit', + time='time', + first_treat='first_treat', + covariates=['x1', 'x1_dup'], + ) + + # Check that NaN cells are preserved (not dropped) + nan_cells = [ + (g, t) for (g, t), eff in results.group_time_effects.items() + if np.isnan(eff['effect']) + ] + # At least one cell should have NaN effect from our mock + if call_count[0] > 0: + # Verify warning about non-finite regression results + nan_warnings = [ + x for x in w + if "non-finite regression results" in str(x.message) + ] + if nan_cells: + assert len(nan_warnings) > 0 + # NaN cells should have NaN SE too + for g, t in nan_cells: + assert np.isnan(results.group_time_effects[(g, t)]['se']) + + # Overall ATT should still be finite (NaN cells excluded from aggregation) + assert np.isfinite(results.overall_att) + + def test_nan_cell_bootstrap_aggregation_excludes_nan(self, ci_params): + """Bootstrap aggregation paths must exclude NaN ATT(g,t) cells.""" + import warnings + from unittest.mock import patch + + data = generate_staggered_data_with_covariates(seed=42, n_units=100) + + original_lstsq = __import__('scipy').linalg.lstsq + call_count = [0] + + def mock_lstsq(*args, **kwargs): + call_count[0] += 1 + result = original_lstsq(*args, **kwargs) + # Poison call #7 — corresponds to (g=3, t=3), a post-treatment cell, + # so the overall ATT bootstrap aggregation path is exercised. + if call_count[0] == 7: + bad_beta = np.full_like(result[0], np.inf) + return (bad_beta,) + result[1:] + return result + + data['x1_dup'] = data['x1'] + n_boot = ci_params.bootstrap(199) + cs = CallawaySantAnna( + n_bootstrap=n_boot, seed=42, estimation_method='reg', + rank_deficient_action='warn', + ) + + with warnings.catch_warnings(record=True): + warnings.simplefilter("always") + with patch('scipy.linalg.lstsq', side_effect=mock_lstsq): + results = cs.fit( + data, + outcome='outcome', + unit='unit', + time='time', + first_treat='first_treat', + covariates=['x1', 'x1_dup'], + aggregate='all', + ) + + # NaN cell should be preserved in group_time_effects + nan_cells = [ + (g, t) for (g, t), eff in results.group_time_effects.items() + if np.isnan(eff['effect']) + ] + assert len(nan_cells) > 0, "Expected at least one NaN cell from mock" + + # Verify poisoned cell is post-treatment so overall ATT bootstrap path is exercised + post_treatment_nan = [(g, t) for g, t in nan_cells if t >= g - cs.anticipation] + assert len(post_treatment_nan) > 0, ( + "Poisoned cell must be post-treatment to exercise overall ATT bootstrap filtering" + ) + + # Overall ATT bootstrap inference should be finite (NaN cells excluded) + assert np.isfinite(results.overall_att), "overall_att should be finite" + assert np.isfinite(results.overall_se), "overall_se should be finite" + assert np.isfinite(results.overall_p_value), "overall_p_value should be finite" + assert all(np.isfinite(x) for x in results.overall_conf_int), "overall CI should be finite" + + # Event study: valid relative times should have finite bootstrap inference + if results.event_study_effects: + for e, data_es in results.event_study_effects.items(): + if np.isfinite(data_es['effect']): + assert np.isfinite(data_es['se']), f"ES e={e} se should be finite" + assert np.isfinite(data_es['p_value']), f"ES e={e} p_value should be finite" + + # Group effects: valid groups should have finite bootstrap inference + if results.group_effects: + for g, data_ge in results.group_effects.items(): + if np.isfinite(data_ge['effect']): + assert np.isfinite(data_ge['se']), f"Group {g} se should be finite" + assert np.isfinite(data_ge['p_value']), f"Group {g} p_value should be finite" + + + def test_balance_e_excludes_nan_anchor_cohort(self, ci_params): + """balance_e must exclude cohorts whose anchor-horizon effect is NaN.""" + import warnings + from unittest.mock import patch + + data = generate_staggered_data_with_covariates(seed=42, n_units=100) + + original_lstsq = __import__('scipy').linalg.lstsq + call_count = [0] + + def mock_lstsq(*args, **kwargs): + call_count[0] += 1 + result = original_lstsq(*args, **kwargs) + # Poison call #7: (g=3, t=3), the anchor for cohort g=3 at e=0 + if call_count[0] == 7: + bad_beta = np.full_like(result[0], np.inf) + return (bad_beta,) + result[1:] + return result + + data['x1_dup'] = data['x1'] + n_boot = ci_params.bootstrap(199) + cs = CallawaySantAnna( + n_bootstrap=n_boot, seed=42, estimation_method='reg', + rank_deficient_action='warn', + ) + + with warnings.catch_warnings(record=True): + warnings.simplefilter("always") + with patch('scipy.linalg.lstsq', side_effect=mock_lstsq): + results = cs.fit( + data, + outcome='outcome', + unit='unit', + time='time', + first_treat='first_treat', + covariates=['x1', 'x1_dup'], + aggregate='event_study', + balance_e=0, + ) + + # Confirm the anchor cell is NaN and is specifically the anchor (t - g == 0) + assert np.isnan(results.group_time_effects[(3, 3)]['effect']), \ + "Mock should have poisoned (g=3, t=3)" + assert 3 - 3 == 0, "Poisoned cell must be the anchor at balance_e=0" + + # Cohort g=3 should be excluded from ALL event-study horizons + # Only g=5 and g=8 should contribute (<=2 because not all balanced + # cohorts have cells at extreme horizons) + for e, es_data in results.event_study_effects.items(): + assert es_data['n_groups'] <= 2, ( + f"Event time e={e} has n_groups={es_data['n_groups']}, " + "expected <=2 (cohort g=3 should be excluded due to NaN anchor)" + ) + + # Analytical effects and SEs should be finite for all horizons + for e, es_data in results.event_study_effects.items(): + assert np.isfinite(es_data['effect']), \ + f"e={e}: analytical effect should be finite" + assert np.isfinite(es_data['se']), \ + f"e={e}: analytical SE should be finite" + + # Bootstrap SEs should also be finite + if results.bootstrap_results and results.bootstrap_results.event_study_ses: + for e, se in results.bootstrap_results.event_study_ses.items(): + assert np.isfinite(se), \ + f"e={e}: bootstrap SE should be finite" + + +class TestCallawaySantAnnaRankDeficiencyPaths: + """Tests for rank-deficiency handling in DR and reg not_yet_treated paths.""" + + def test_dr_rank_deficient_action_warn_emits_warning(self): + """Test that DR path emits rank-deficiency warning with collinear covariates.""" + import warnings as warn_mod + + data = generate_staggered_data_with_covariates(seed=42) + # Near-collinear covariate: x1 + tiny noise + rng = np.random.default_rng(99) + data["x1_near"] = data["x1"] + rng.normal(scale=1e-9, size=len(data)) + + cs = CallawaySantAnna( + estimation_method="dr", + rank_deficient_action="warn", + ) + + with warn_mod.catch_warnings(record=True) as w: + warn_mod.simplefilter("always") + results = cs.fit( + data, + outcome="outcome", + unit="unit", + time="time", + first_treat="first_treat", + covariates=["x1", "x1_near"], + ) + + rank_warnings = [x for x in w if "rank-deficient" in str(x.message).lower() + or "Rank-deficient" in str(x.message)] + assert len(rank_warnings) > 0, ( + "Expected at least one rank-deficiency warning from DR path" + ) + + assert results is not None + assert results.overall_att is not None + + def test_reg_nyt_rank_deficient_action_warn(self): + """Test that reg+not_yet_treated emits rank-deficiency warning with collinear covariates.""" + import warnings as warn_mod + + data = generate_staggered_data_with_covariates(seed=42) + data["x1_dup"] = data["x1"].copy() + + cs = CallawaySantAnna( + estimation_method="reg", + control_group="not_yet_treated", + rank_deficient_action="warn", + ) + + with warn_mod.catch_warnings(record=True) as w: + warn_mod.simplefilter("always") + results = cs.fit( + data, + outcome="outcome", + unit="unit", + time="time", + first_treat="first_treat", + covariates=["x1", "x1_dup"], + ) + + rank_warnings = [x for x in w if "rank-deficient" in str(x.message).lower() + or "Rank-deficient" in str(x.message)] + assert len(rank_warnings) > 0, ( + "Expected at least one rank-deficiency warning from reg nyt path" + ) + + assert results is not None + assert results.overall_att is not None + assert results.overall_se > 0 + + def test_bootstrap_single_unit_cohort_handles_gracefully(self, ci_params): + """Test that bootstrap handles cohort with 1 treated unit without crashing.""" + # Build small dataset where one cohort has exactly 1 unit + rng = np.random.default_rng(42) + n_periods = 6 + # 15 never-treated, 14 in cohort 3, 1 in cohort 5 + cohorts = ([0] * 15) + ([3] * 14) + ([5] * 1) + n_units = len(cohorts) + + rows = [] + for i in range(n_units): + g = cohorts[i] + for t in range(1, n_periods + 1): + treated = 1 if (g > 0 and t >= g) else 0 + y = rng.normal(0, 1) + 2.0 * treated + rows.append((i, t, y, g)) + + data = pd.DataFrame(rows, columns=["unit", "time", "outcome", "first_treat"]) + + n_boot = ci_params.bootstrap(99) + cs = CallawaySantAnna(n_bootstrap=n_boot, seed=42) + + results = cs.fit( + data, + outcome="outcome", + unit="unit", + time="time", + first_treat="first_treat", + aggregate="all", + ) + + assert results is not None + assert results.overall_att is not None + # Single-unit cohort (g=5) effects should exist and have finite ATT + g5_effects = {(g, t): eff for (g, t), eff in results.group_time_effects.items() + if g == 5} + assert len(g5_effects) > 0, "Expected group-time effects for cohort g=5" + for (g, t), eff in g5_effects.items(): + assert np.isfinite(eff["effect"]), f"g={g},t={t}: ATT should be finite" + class TestCallawaySantAnnaBootstrap: """Tests for Callaway-Sant'Anna multiplier bootstrap inference."""