@@ -416,8 +416,10 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
416416
417417 free_oa_buffer (dev_priv );
418418
419- intel_uncore_forcewake_put (dev_priv , FORCEWAKE_ALL );
420- intel_runtime_pm_put (dev_priv );
419+ if (!dev_priv -> perf .oa .enable_rc6 ) {
420+ intel_uncore_forcewake_put (dev_priv , FORCEWAKE_ALL );
421+ intel_runtime_pm_put (dev_priv );
422+ }
421423
422424 dev_priv -> perf .oa .exclusive_stream = NULL ;
423425}
@@ -834,6 +836,119 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream)
834836 hrtimer_cancel (& dev_priv -> perf .oa .poll_check_timer );
835837}
836838
839+ struct drm_i915_gem_object *
840+ i915_oa_ctx_wa_obj (struct drm_i915_private * dev_priv )
841+ {
842+ struct drm_i915_gem_object * ctx_wa_obj =
843+ dev_priv -> perf .oa .ctx_wa_obj_buf [dev_priv -> perf .oa .ctx_wa_idx ];
844+
845+ if (dev_priv -> perf .oa .dirty_wa_obj ) {
846+ dev_priv -> perf .oa .dirty_wa_obj = false;
847+ dev_priv -> perf .oa .ctx_wa_idx = !dev_priv -> perf .oa .ctx_wa_idx ;
848+ }
849+
850+ return ctx_wa_obj ;
851+ }
852+
853+ static int init_ctx_wa_obj_buf (struct drm_i915_private * dev_priv )
854+ {
855+ struct intel_engine_cs * ring = & dev_priv -> ring [RCS ];
856+ struct page * page = i915_gem_object_get_page (ring -> wa_ctx .obj , 0 );
857+ uint32_t * data = kmap_atomic (page );
858+ int ret ;
859+
860+ dev_priv -> perf .oa .ctx_wa_obj_buf [0 ] =
861+ i915_gem_object_create_from_data (dev_priv -> dev , data ,
862+ PAGE_SIZE );
863+ kunmap_atomic (data );
864+
865+ if (!dev_priv -> perf .oa .ctx_wa_obj_buf [0 ]) {
866+ DRM_DEBUG_DRIVER ("failed to allocate rc6 wa bb\n" );
867+ return - ENOMEM ;
868+ }
869+
870+ ret = i915_gem_obj_ggtt_pin (dev_priv -> perf .oa .ctx_wa_obj_buf [0 ],
871+ PAGE_SIZE , 0 );
872+ if (ret ) {
873+ DRM_DEBUG_DRIVER ("failed to pin rc6 wa bb\n" );
874+
875+ mutex_lock (& dev_priv -> dev -> struct_mutex );
876+ drm_gem_object_unreference (& dev_priv -> perf .oa .ctx_wa_obj_buf [0 ]-> base );
877+ mutex_unlock (& dev_priv -> dev -> struct_mutex );
878+
879+ dev_priv -> perf .oa .ctx_wa_obj_buf [0 ] = NULL ;
880+
881+ return ret ;
882+ }
883+
884+ dev_priv -> perf .oa .ctx_wa_obj_buf [1 ] = ring -> wa_ctx .obj ;
885+
886+ return 0 ;
887+ }
888+
889+ static int init_rc6_wa_bb (struct drm_i915_private * dev_priv )
890+ {
891+ struct page * page ;
892+ uint32_t * batch ;
893+ int ret , index , i , num_regs ;
894+ struct intel_engine_cs * ring = & dev_priv -> ring [RCS ];
895+ struct drm_i915_gem_object * ctx_wa_obj ;
896+
897+ if (!dev_priv -> perf .oa .ctx_wa_obj_buf [0 ]) {
898+ ret = init_ctx_wa_obj_buf (dev_priv );
899+ if (ret )
900+ return ret ;
901+ }
902+
903+ dev_priv -> perf .oa .dirty_wa_obj = true;
904+
905+ ctx_wa_obj = dev_priv -> perf .oa .ctx_wa_obj_buf [dev_priv -> perf .oa .ctx_wa_idx ];
906+
907+ page = i915_gem_object_get_page (ctx_wa_obj , 0 );
908+ batch = kmap_atomic (page );
909+
910+ index = ring -> wa_ctx .per_ctx_rc6 .offset ;
911+
912+ batch [index ++ ] = MI_NOOP ;
913+ batch [index ++ ] = MI_LOAD_REGISTER_IMM (1 );
914+ batch [index ++ ] = GDT_CHICKEN_BITS ;
915+ batch [index ++ ] = 0xA0 ;
916+
917+ for (i = 0 ; i < dev_priv -> perf .oa .mux_regs_len ; i ++ ) {
918+ /* x <= 16 must hold with MI_LOAD_REGISTER_IMM(x) */
919+ if (i % 16 == 0 ) {
920+ num_regs = min (16 , dev_priv -> perf .oa .mux_regs_len - i );
921+ batch [index ++ ] = MI_NOOP ;
922+ batch [index ++ ] = MI_LOAD_REGISTER_IMM (num_regs );
923+ }
924+
925+ batch [index ++ ] = dev_priv -> perf .oa .mux_regs [i ].addr ;
926+ batch [index ++ ] = dev_priv -> perf .oa .mux_regs [i ].value ;
927+ }
928+
929+ batch [index ++ ] = MI_NOOP ;
930+ batch [index ++ ] = MI_LOAD_REGISTER_IMM (1 );
931+ batch [index ++ ] = GDT_CHICKEN_BITS ;
932+ batch [index ++ ] = 0x80 ;
933+
934+ for (i = 0 ; i < dev_priv -> perf .oa .b_counter_regs_len ; i ++ ) {
935+ if (i % 16 == 0 ) {
936+ num_regs = min (16 , dev_priv -> perf .oa .b_counter_regs_len - i );
937+ batch [index ++ ] = MI_NOOP ;
938+ batch [index ++ ] = MI_LOAD_REGISTER_IMM (num_regs );
939+ }
940+
941+ batch [index ++ ] = dev_priv -> perf .oa .b_counter_regs [i ].addr ;
942+ batch [index ++ ] = dev_priv -> perf .oa .b_counter_regs [i ].value ;
943+ }
944+
945+ batch [index ++ ] = MI_BATCH_BUFFER_END ;
946+
947+ kunmap_atomic (batch );
948+
949+ return 0 ;
950+ }
951+
837952static int i915_oa_stream_init (struct i915_perf_stream * stream ,
838953 struct drm_i915_perf_open_param * param ,
839954 struct perf_open_properties * props )
@@ -912,12 +1027,38 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
9121027 *
9131028 * In our case we are expected that taking pm + FORCEWAKE
9141029 * references will effectively disable RC6.
1030+ *
1031+ * For BDW+, RC6 + OA is not plagued by this issue, so we instead
1032+ * try to leave RC6 enabled. One caveat though is that we now need
1033+ * to restore the NOA MUX configuration upon exiting RC6.
9151034 */
916- intel_runtime_pm_get (dev_priv );
917- intel_uncore_forcewake_get (dev_priv , FORCEWAKE_ALL );
1035+
1036+ /* We must disable RC6 until we are able to correctly setup the RC6 WA
1037+ * BB, if requested, otherwise we could potentially loose some OA state
1038+ * which is not automatically restored as part of the OA power context */
1039+ intel_uncore_forcewake_put (dev_priv , FORCEWAKE_ALL );
1040+ intel_runtime_pm_put (dev_priv );
9181041
9191042 dev_priv -> perf .oa .ops .enable_metric_set (dev_priv );
9201043
1044+ if (props -> enable_rc6 ) {
1045+ if (IS_BROADWELL (dev_priv -> dev )) {
1046+ ret = init_rc6_wa_bb (dev_priv );
1047+ if (ret )
1048+ DRM_ERROR ("Failed to enable RC6 with OA\n" );
1049+ } else {
1050+ DRM_ERROR ("OA with RC6 enabled is not supported on this"
1051+ " platform\n" );
1052+ ret = - EINVAL ;
1053+ }
1054+
1055+ intel_uncore_forcewake_put (dev_priv , FORCEWAKE_ALL );
1056+ intel_runtime_pm_put (dev_priv );
1057+
1058+ if (ret )
1059+ return ret ;
1060+ }
1061+
9211062 stream -> destroy = i915_oa_stream_destroy ;
9221063 stream -> enable = i915_oa_stream_enable ;
9231064 stream -> disable = i915_oa_stream_disable ;
@@ -1665,6 +1806,10 @@ void i915_perf_init(struct drm_device *dev)
16651806
16661807 dev_priv -> perf .initialized = true;
16671808
1809+ dev_priv -> perf .oa .ctx_wa_obj_buf [0 ] = NULL ;
1810+ dev_priv -> perf .oa .ctx_wa_idx = 0 ;
1811+ dev_priv -> perf .oa .dirty_wa_obj = false;
1812+
16681813 return ;
16691814
16701815sysfs_error :
0 commit comments