|  | 
| 32 | 32 |     SpecificationError, | 
| 33 | 33 | ) | 
| 34 | 34 | from pandas.util._decorators import ( | 
| 35 |  | -    Appender, | 
| 36 |  | -    Substitution, | 
| 37 | 35 |     doc, | 
| 38 | 36 |     set_module, | 
| 39 | 37 | ) | 
|  | 
| 71 | 69 | from pandas.core.groupby.groupby import ( | 
| 72 | 70 |     GroupBy, | 
| 73 | 71 |     GroupByPlot, | 
| 74 |  | -    _transform_template, | 
| 75 | 72 | ) | 
| 76 | 73 | from pandas.core.indexes.api import ( | 
| 77 | 74 |     Index, | 
| @@ -675,9 +672,143 @@ def _wrap_applied_output( | 
| 675 | 672 |     """ | 
| 676 | 673 |     ) | 
| 677 | 674 | 
 | 
| 678 |  | -    @Substitution(klass="Series", example=__examples_series_doc) | 
| 679 |  | -    @Appender(_transform_template) | 
| 680 | 675 |     def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): | 
|  | 676 | +        """ | 
|  | 677 | +        Call function producing a same-indexed Series on each group. | 
|  | 678 | +
 | 
|  | 679 | +        Returns a Series having the same indexes as the original object | 
|  | 680 | +        filled with the transformed values. | 
|  | 681 | +
 | 
|  | 682 | +        Parameters | 
|  | 683 | +        ---------- | 
|  | 684 | +        func : function, str | 
|  | 685 | +            Function to apply to each group. See the Notes section below for | 
|  | 686 | +            requirements. | 
|  | 687 | +
 | 
|  | 688 | +            Accepted inputs are: | 
|  | 689 | +
 | 
|  | 690 | +            - String | 
|  | 691 | +            - Python function | 
|  | 692 | +            - Numba JIT function with ``engine='numba'`` specified. | 
|  | 693 | +
 | 
|  | 694 | +            Only passing a single function is supported with this engine. | 
|  | 695 | +            If the ``'numba'`` engine is chosen, the function must be | 
|  | 696 | +            a user defined function with ``values`` and ``index`` as the | 
|  | 697 | +            first and second arguments respectively in the function signature. | 
|  | 698 | +            Each group's index will be passed to the user defined function | 
|  | 699 | +            and optionally available for use. | 
|  | 700 | +
 | 
|  | 701 | +            If a string is chosen, then it needs to be the name | 
|  | 702 | +            of the groupby method you want to use. | 
|  | 703 | +        *args | 
|  | 704 | +            Positional arguments to pass to func. | 
|  | 705 | +        engine : str, default None | 
|  | 706 | +            * ``'cython'`` : Runs the function through C-extensions from cython. | 
|  | 707 | +            * ``'numba'`` : Runs the function through JIT compiled code from numba. | 
|  | 708 | +            * ``None`` : Defaults to ``'cython'`` or the global setting | 
|  | 709 | +            ``compute.use_numba`` | 
|  | 710 | +
 | 
|  | 711 | +        engine_kwargs : dict, default None | 
|  | 712 | +            * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` | 
|  | 713 | +            * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` | 
|  | 714 | +              and ``parallel`` dictionary keys. The values must either be ``True`` or | 
|  | 715 | +              ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is | 
|  | 716 | +              ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be | 
|  | 717 | +              applied to the function | 
|  | 718 | +
 | 
|  | 719 | +        **kwargs | 
|  | 720 | +            Keyword arguments to be passed into func. | 
|  | 721 | +
 | 
|  | 722 | +        Returns | 
|  | 723 | +        ------- | 
|  | 724 | +        Series | 
|  | 725 | +            Series with the same indexes as the original object filled | 
|  | 726 | +            with transformed values. | 
|  | 727 | +
 | 
|  | 728 | +        See Also | 
|  | 729 | +        -------- | 
|  | 730 | +        Series.groupby.apply : Apply function ``func`` group-wise and combine | 
|  | 731 | +            the results together. | 
|  | 732 | +        Series.groupby.aggregate : Aggregate using one or more operations. | 
|  | 733 | +        Series.transform : Call ``func`` on self producing a Series with the | 
|  | 734 | +            same axis shape as self. | 
|  | 735 | +
 | 
|  | 736 | +        Notes | 
|  | 737 | +        ----- | 
|  | 738 | +        Each group is endowed the attribute 'name' in case you need to know | 
|  | 739 | +        which group you are working on. | 
|  | 740 | +
 | 
|  | 741 | +        The current implementation imposes three requirements on f: | 
|  | 742 | +
 | 
|  | 743 | +        * f must return a value that either has the same shape as the input | 
|  | 744 | +          subframe or can be broadcast to the shape of the input subframe. | 
|  | 745 | +          For example, if `f` returns a scalar it will be broadcast to have the | 
|  | 746 | +          same shape as the input subframe. | 
|  | 747 | +        * if this is a DataFrame, f must support application column-by-column | 
|  | 748 | +          in the subframe. If f also supports application to the entire subframe, | 
|  | 749 | +          then a fast path is used starting from the second chunk. | 
|  | 750 | +        * f must not mutate groups. Mutation is not supported and may | 
|  | 751 | +          produce unexpected results. See :ref:`gotchas.udf-mutation` for more details. | 
|  | 752 | +
 | 
|  | 753 | +        When using ``engine='numba'``, there will be no "fall back" behavior internally. | 
|  | 754 | +        The group data and group index will be passed as numpy arrays to the JITed | 
|  | 755 | +        user defined function, and no alternative execution attempts will be tried. | 
|  | 756 | +
 | 
|  | 757 | +        .. versionchanged:: 1.3.0 | 
|  | 758 | +
 | 
|  | 759 | +            The resulting dtype will reflect the return value of the passed ``func``, | 
|  | 760 | +            see the examples below. | 
|  | 761 | +
 | 
|  | 762 | +        .. versionchanged:: 2.0.0 | 
|  | 763 | +
 | 
|  | 764 | +            When using ``.transform`` on a grouped DataFrame and the | 
|  | 765 | +            transformation function returns a DataFrame, pandas now aligns the | 
|  | 766 | +            result's index with the input's index. You can call ``.to_numpy()`` | 
|  | 767 | +            on the result of the transformation function to avoid alignment. | 
|  | 768 | +
 | 
|  | 769 | +        Examples | 
|  | 770 | +        -------- | 
|  | 771 | +            >>> ser = pd.Series( | 
|  | 772 | +            ...     [390.0, 350.0, 30.0, 20.0], | 
|  | 773 | +            ...     index=["Falcon", "Falcon", "Parrot", "Parrot"], | 
|  | 774 | +            ...     name="Max Speed", | 
|  | 775 | +            ... ) | 
|  | 776 | +            >>> grouped = ser.groupby([1, 1, 2, 2]) | 
|  | 777 | +            >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) | 
|  | 778 | +                Falcon    0.707107 | 
|  | 779 | +                Falcon   -0.707107 | 
|  | 780 | +                Parrot    0.707107 | 
|  | 781 | +                Parrot   -0.707107 | 
|  | 782 | +                Name: Max Speed, dtype: float64 | 
|  | 783 | +
 | 
|  | 784 | +            Broadcast result of the transformation | 
|  | 785 | +
 | 
|  | 786 | +            >>> grouped.transform(lambda x: x.max() - x.min()) | 
|  | 787 | +            Falcon    40.0 | 
|  | 788 | +            Falcon    40.0 | 
|  | 789 | +            Parrot    10.0 | 
|  | 790 | +            Parrot    10.0 | 
|  | 791 | +            Name: Max Speed, dtype: float64 | 
|  | 792 | +
 | 
|  | 793 | +            >>> grouped.transform("mean") | 
|  | 794 | +            Falcon    370.0 | 
|  | 795 | +            Falcon    370.0 | 
|  | 796 | +            Parrot     25.0 | 
|  | 797 | +            Parrot     25.0 | 
|  | 798 | +            Name: Max Speed, dtype: float64 | 
|  | 799 | +
 | 
|  | 800 | +            .. versionchanged:: 1.3.0 | 
|  | 801 | +
 | 
|  | 802 | +            The resulting dtype will reflect the return value of the passed ``func``, | 
|  | 803 | +            for example: | 
|  | 804 | +
 | 
|  | 805 | +            >>> grouped.transform(lambda x: x.astype(int).max()) | 
|  | 806 | +            Falcon    390 | 
|  | 807 | +            Falcon    390 | 
|  | 808 | +            Parrot     30 | 
|  | 809 | +            Parrot     30 | 
|  | 810 | +            Name: Max Speed, dtype: int64 | 
|  | 811 | +        """ | 
| 681 | 812 |         return self._transform( | 
| 682 | 813 |             func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs | 
| 683 | 814 |         ) | 
| @@ -2298,9 +2429,154 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs): | 
| 2298 | 2429 |     """ | 
| 2299 | 2430 |     ) | 
| 2300 | 2431 | 
 | 
| 2301 |  | -    @Substitution(klass="DataFrame", example=__examples_dataframe_doc) | 
| 2302 |  | -    @Appender(_transform_template) | 
| 2303 | 2432 |     def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): | 
|  | 2433 | +        """ | 
|  | 2434 | +        Call function producing a same-indexed DataFrame on each group. | 
|  | 2435 | +
 | 
|  | 2436 | +        Returns a DataFrame having the same indexes as the original object | 
|  | 2437 | +        filled with the transformed values. | 
|  | 2438 | +
 | 
|  | 2439 | +        Parameters | 
|  | 2440 | +        ---------- | 
|  | 2441 | +        func : function, str | 
|  | 2442 | +            Function to apply to each group. See the Notes section below for | 
|  | 2443 | +            requirements. | 
|  | 2444 | +
 | 
|  | 2445 | +            Accepted inputs are: | 
|  | 2446 | +
 | 
|  | 2447 | +            - String | 
|  | 2448 | +            - Python function | 
|  | 2449 | +            - Numba JIT function with ``engine='numba'`` specified. | 
|  | 2450 | +
 | 
|  | 2451 | +            Only passing a single function is supported with this engine. | 
|  | 2452 | +            If the ``'numba'`` engine is chosen, the function must be | 
|  | 2453 | +            a user defined function with ``values`` and ``index`` as the | 
|  | 2454 | +            first and second arguments respectively in the function signature. | 
|  | 2455 | +            Each group's index will be passed to the user defined function | 
|  | 2456 | +            and optionally available for use. | 
|  | 2457 | +
 | 
|  | 2458 | +            If a string is chosen, then it needs to be the name | 
|  | 2459 | +            of the groupby method you want to use. | 
|  | 2460 | +        *args | 
|  | 2461 | +            Positional arguments to pass to func. | 
|  | 2462 | +        engine : str, default None | 
|  | 2463 | +            * ``'cython'`` : Runs the function through C-extensions from cython. | 
|  | 2464 | +            * ``'numba'`` : Runs the function through JIT compiled code from numba. | 
|  | 2465 | +            * ``None`` : Defaults to ``'cython'`` or the global setting | 
|  | 2466 | +            ``compute.use_numba`` | 
|  | 2467 | +
 | 
|  | 2468 | +        engine_kwargs : dict, default None | 
|  | 2469 | +            * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` | 
|  | 2470 | +            * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` | 
|  | 2471 | +            and ``parallel`` dictionary keys. The values must either be ``True`` or | 
|  | 2472 | +            ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is | 
|  | 2473 | +            ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be | 
|  | 2474 | +            applied to the function | 
|  | 2475 | +
 | 
|  | 2476 | +        **kwargs | 
|  | 2477 | +            Keyword arguments to be passed into func. | 
|  | 2478 | +
 | 
|  | 2479 | +        Returns | 
|  | 2480 | +        ------- | 
|  | 2481 | +        DataFrame | 
|  | 2482 | +            DataFrame with the same indexes as the original object filled | 
|  | 2483 | +            with transformed values. | 
|  | 2484 | +
 | 
|  | 2485 | +        See Also | 
|  | 2486 | +        -------- | 
|  | 2487 | +        DataFrame.groupby.apply : Apply function ``func`` group-wise and combine | 
|  | 2488 | +            the results together. | 
|  | 2489 | +        DataFrame.groupby.aggregate : Aggregate using one or more operations. | 
|  | 2490 | +        DataFrame.transform : Call ``func`` on self producing a DataFrame with the | 
|  | 2491 | +            same axis shape as self. | 
|  | 2492 | +
 | 
|  | 2493 | +        Notes | 
|  | 2494 | +        ----- | 
|  | 2495 | +        Each group is endowed the attribute 'name' in case you need to know | 
|  | 2496 | +        which group you are working on. | 
|  | 2497 | +
 | 
|  | 2498 | +        The current implementation imposes three requirements on f: | 
|  | 2499 | +
 | 
|  | 2500 | +        * f must return a value that either has the same shape as the input | 
|  | 2501 | +        subframe or can be broadcast to the shape of the input subframe. | 
|  | 2502 | +        For example, if `f` returns a scalar it will be broadcast to have the | 
|  | 2503 | +        same shape as the input subframe. | 
|  | 2504 | +        * if this is a DataFrame, f must support application column-by-column | 
|  | 2505 | +        in the subframe. If f also supports application to the entire subframe, | 
|  | 2506 | +        then a fast path is used starting from the second chunk. | 
|  | 2507 | +        * f must not mutate groups. Mutation is not supported and may | 
|  | 2508 | +        produce unexpected results. See :ref:`gotchas.udf-mutation` for more details. | 
|  | 2509 | +
 | 
|  | 2510 | +        When using ``engine='numba'``, there will be no "fall back" behavior internally. | 
|  | 2511 | +        The group data and group index will be passed as numpy arrays to the JITed | 
|  | 2512 | +        user defined function, and no alternative execution attempts will be tried. | 
|  | 2513 | +
 | 
|  | 2514 | +        .. versionchanged:: 1.3.0 | 
|  | 2515 | +
 | 
|  | 2516 | +            The resulting dtype will reflect the return value of the passed ``func``, | 
|  | 2517 | +            see the examples below. | 
|  | 2518 | +
 | 
|  | 2519 | +        .. versionchanged:: 2.0.0 | 
|  | 2520 | +
 | 
|  | 2521 | +            When using ``.transform`` on a grouped DataFrame and the transformation | 
|  | 2522 | +            function returns a DataFrame, pandas now aligns the result's index | 
|  | 2523 | +            with the input's index. You can call ``.to_numpy()`` on the | 
|  | 2524 | +            result of the transformation function to avoid alignment. | 
|  | 2525 | +
 | 
|  | 2526 | +        Examples | 
|  | 2527 | +        -------- | 
|  | 2528 | +        >>> df = pd.DataFrame( | 
|  | 2529 | +        ...     { | 
|  | 2530 | +        ...         "A": ["foo", "bar", "foo", "bar", "foo", "bar"], | 
|  | 2531 | +        ...         "B": ["one", "one", "two", "three", "two", "two"], | 
|  | 2532 | +        ...         "C": [1, 5, 5, 2, 5, 5], | 
|  | 2533 | +        ...         "D": [2.0, 5.0, 8.0, 1.0, 2.0, 9.0], | 
|  | 2534 | +        ...     } | 
|  | 2535 | +        ... ) | 
|  | 2536 | +        >>> grouped = df.groupby("A")[["C", "D"]] | 
|  | 2537 | +        >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) | 
|  | 2538 | +                C         D | 
|  | 2539 | +        0 -1.154701 -0.577350 | 
|  | 2540 | +        1  0.577350  0.000000 | 
|  | 2541 | +        2  0.577350  1.154701 | 
|  | 2542 | +        3 -1.154701 -1.000000 | 
|  | 2543 | +        4  0.577350 -0.577350 | 
|  | 2544 | +        5  0.577350  1.000000 | 
|  | 2545 | +
 | 
|  | 2546 | +        Broadcast result of the transformation | 
|  | 2547 | +
 | 
|  | 2548 | +        >>> grouped.transform(lambda x: x.max() - x.min()) | 
|  | 2549 | +            C    D | 
|  | 2550 | +        0  4.0  6.0 | 
|  | 2551 | +        1  3.0  8.0 | 
|  | 2552 | +        2  4.0  6.0 | 
|  | 2553 | +        3  3.0  8.0 | 
|  | 2554 | +        4  4.0  6.0 | 
|  | 2555 | +        5  3.0  8.0 | 
|  | 2556 | +
 | 
|  | 2557 | +        >>> grouped.transform("mean") | 
|  | 2558 | +            C    D | 
|  | 2559 | +        0  3.666667  4.0 | 
|  | 2560 | +        1  4.000000  5.0 | 
|  | 2561 | +        2  3.666667  4.0 | 
|  | 2562 | +        3  4.000000  5.0 | 
|  | 2563 | +        4  3.666667  4.0 | 
|  | 2564 | +        5  4.000000  5.0 | 
|  | 2565 | +
 | 
|  | 2566 | +        .. versionchanged:: 1.3.0 | 
|  | 2567 | +
 | 
|  | 2568 | +        The resulting dtype will reflect the return value of the passed ``func``, | 
|  | 2569 | +        for example: | 
|  | 2570 | +
 | 
|  | 2571 | +        >>> grouped.transform(lambda x: x.astype(int).max()) | 
|  | 2572 | +        C  D | 
|  | 2573 | +        0  5  8 | 
|  | 2574 | +        1  5  9 | 
|  | 2575 | +        2  5  8 | 
|  | 2576 | +        3  5  9 | 
|  | 2577 | +        4  5  8 | 
|  | 2578 | +        5  5  9 | 
|  | 2579 | +        """ | 
| 2304 | 2580 |         return self._transform( | 
| 2305 | 2581 |             func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs | 
| 2306 | 2582 |         ) | 
|  | 
0 commit comments