Source code for gs_quant.timeseries.technicals

# Copyright 2018 Goldman Sachs.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#  http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# Marquee Plot Service will attempt to make public functions (not prefixed with _) from this module available.
# Such functions should be fully documented: docstrings should describe parameters and the return value, and provide
# a 1-line description. Type annotations should be provided for parameters.
import pandas as pd
import statsmodels.tsa.seasonal

from gs_quant.timeseries import diff, annualize, returns
from .statistics import *

"""
Technicals library is for technical analysis functions on timeseries, including moving averages,
volatility indicators and and other numerical operations which are finance-oriented for analyzing
statistical properties of trading activity, such as price movement and volume changes
"""


class Seasonality(Enum):
    MONTH = 'month'
    QUARTER = 'quarter'


class SeasonalModel(Enum):
    ADDITIVE = 'additive'
    MULTIPLICATIVE = 'multiplicative'


class Frequency(Enum):
    WEEK = 'week'
    MONTH = 'month'
    QUARTER = 'quarter'
    YEAR = 'year'


[docs]@plot_function
def moving_average(x: pd.Series, w: Union[Window, int, str] = Window(None, 0)) -> pd.Series:
    """
    Moving average over specified window

    :param x: time series of prices
    :param w: Window or int: size of window and ramp up to use. e.g. Window(22, 10) where 22 is the window size
              and 10 the ramp up value.  If w is a string, it should be a relative date like '1m', '1d', etc.
              Window size defaults to length of series.
    :return: date-based time series of return

    **Usage**

    Simple arithmetic moving average over the specified window (number of observations). Shorter windows will be more
    reactive to changes in the asset price, but more volatile. Larger windows will be smoother but less reactive to
    near term changes in asset prices.

    :math:`R_t = \\frac{\\sum_{i=t-w+1}^{t} X_t}{N}`

    where N is the number of observations in each rolling window, :math:`w`. If window is not provided, computes
    rolling mean over the full series

    Equivalent to ``mean``

    **Examples**

    Generate price series with 100 observations starting from today's date:

    >>> prices = generate_series(100)
    >>> moving_average(prices, 22)

    **See also**

    :func:`mean`

    """
    w = normalize_window(x, w)
    return apply_ramp(mean(x, Window(w.w, 0)), w)


[docs]@plot_function
def bollinger_bands(x: pd.Series, w: Union[Window, int, str] = Window(None, 0), k: float = 2) -> pd.DataFrame:
    """
    Bollinger bands with given window and width

    :param x: time series of prices
    :param w: Window or int: size of window and ramp up to use. e.g. Window(22, 10) where 22 is the window size
              and 10 the ramp up value.  If w is a string, it should be a relative date like '1m', '1d', etc.
              Window size defaults to length of series.
    :param k: band width in standard deviations (default: 2)
    :return: date-based time series of return

    **Usage**

    Standard deviation bands around the moving average of asset price level. Bollinger bands can be used to determine
    a range around the price level which responds to local volatility changes. Returns two series,
    upper, :math:`u_t` and lower, :math:`l_t`

    :math:`u_t = \\bar{X_t} + k\sigma_t`

    :math:`l_t = \\bar{X_t} - k\sigma_t`

    where :math:`\\bar{X_t}` is the moving average over specified window, and :math:`\\sigma_t` is the rolling
    standard deviation over the specified window

    See `Bollinger Bands <https://en.wikipedia.org/wiki/Bollinger_Bands>`_ for more information

    **Examples**

    Compute bollinger bands around :math:`20` day moving average at :math:`2` standard deviations:

    >>> prices = generate_series(100)
    >>> bollinger_bands(prices, 20, 2)

    **See also**

    :func:`moving_average` :func:`std`
    """
    w = normalize_window(x, w)
    avg = moving_average(x, w)
    sigma_t = std(x, w)

    upper = avg + k * sigma_t
    lower = avg - k * sigma_t

    return pd.concat([lower, upper], axis=1)


[docs]@plot_function
def smoothed_moving_average(x: pd.Series, w: Union[Window, int, str] = Window(None, 0)) -> pd.Series:
    """
    Smoothed moving average over specified window

    :param x: time series of prices
    :param w: Window or int: size of window and ramp up to use. e.g. Window(22, 10) where 22 is the window size
              and 10 the ramp up value.  If w is a string, it should be a relative date like '1m', '1d', etc.
              Window size defaults to length of series.
    :return: date-based time series of return

    **Usage**

    A modified moving average (MMA), running moving average (RMA), or smoothed moving average (SMMA) is defined as:

    :math:`P_{MM,today} = \\frac{(N-1)P_{MM,yesterday} + P_today}{N}`

    where N is the number of observations in each rolling window, :math:`w`. If window is not provided, computes
    rolling mean over the full series

    See `Modified moving average <https://en.wikipedia.org/wiki/Moving_average#Modified_moving_average>`_ for more
    information


    **Examples**

    Generate price series with 100 observations starting from today's date:

    >>> prices = generate_series(100)
    >>> smoothed_moving_average(prices, 22)

    **See also**

    :func:`mean` :func:'moving_average'

    """
    w = normalize_window(x, w)
    window_size = w.w
    ramp = w.r
    means = apply_ramp(mean(x, Window(window_size, 0)), w)
    if means.size < 1:
        return pd.Series(dtype=float)
    initial_moving_average = means[0]
    if (isinstance(ramp, int) and ramp > 0) or isinstance(ramp, pd.DateOffset):
        x = apply_ramp(x, w)

    smoothed_moving_averages = x.copy()
    smoothed_moving_averages *= 0
    smoothed_moving_averages[0] = initial_moving_average
    for i in range(1, len(x)):
        if isinstance(window_size, int):
            window_num_elem = window_size
        else:
            window_num_elem = len(x[(x.index > (x.index[i] - window_size).date()) & (x.index <= x.index[i])])
        smoothed_moving_averages[i] = ((window_num_elem - 1) * smoothed_moving_averages[i - 1] + x[i]) / window_num_elem
    return smoothed_moving_averages


[docs]@plot_function
def relative_strength_index(x: pd.Series, w: Union[Window, int, str] = 14) -> pd.DataFrame:
    """
    Relative Strength Index

    :param x: time series of prices
    :param w: Window or int: size of window and ramp up to use. e.g. Window(22, 10) where 22 is the window size
              and 10 the ramp up value.  If w is a string, it should be a relative date like '1m', '1d', etc.
              Window size defaults to length of series.
    :return: date-based time series of RSI

    **Usage**

    The RSI computes momentum as the ratio of higher closes to lower closes: stocks which have had more or stronger
    positive changes have a higher RSI than stocks which have had more or stronger negative changes.

    See `RSI <https://en.wikipedia.org/wiki/Relative_strength_index>`_ for more information

    **Examples**

    Compute relative strength index over a :math:`14` day window:

    >>> prices = generate_series(100)
    >>> relative_strength_index(prices, 14)

    **See also**

    :func:`moving_average` :func:`std` :func:`smoothed_moving_average`
    """
    w = normalize_window(x, w)
    one_period_change = diff(x, 1)[1:]
    gains = one_period_change.copy()
    losses = one_period_change.copy()
    gains[gains < 0] = 0
    losses[losses > 0] = 0
    losses[losses < 0] *= -1

    moving_avg_gains = smoothed_moving_average(gains, w)
    moving_avg_losses = smoothed_moving_average(losses, w)

    rsi_len = len(moving_avg_gains)
    rsi = moving_avg_gains.copy()
    rsi *= 0

    for index in range(0, rsi_len):
        if moving_avg_losses[index] == 0:
            rsi[index] = 100
        else:
            relative_strength = moving_avg_gains[index] / moving_avg_losses[index]
            rsi[index] = 100 - (100 / (1 + relative_strength))

    return rsi


[docs]@plot_function
def exponential_moving_average(x: pd.Series, beta: float = 0.75) -> pd.Series:
    """
    Exponentially weighted moving average

    :param x: time series of prices
    :param beta: how much to weigh the previous observations in the time series, thus controlling how much importance we
        place on the (more distant) past. Must be between 0 (inclusive) and 1 (exclusive)
    :return: date-based time series of return

    **Usage**

    The exponential(ly weighted) moving average (EMA) of a series [:math:`X_0`, :math:`X_1`, :math:`X_2`, ...],
    is defined as:

    :math:`Y_0 = X_0`

    :math:`Y_t = \\beta \\cdot Y_{t-1} + (1 - \\beta) \\cdot X_t`

    where :math:`\\beta` is the weight we place on the previous average.

    See `Exponential moving average <https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average>`_ for
    more information

    **Examples**

    Generate price series with 100 observations starting from today's date:

    >>> prices = generate_series(100)
    >>> exponential_moving_average(prices, 0.9)

    **See also**

    :func:`mean` :func:`moving_average` :func:`smoothed_moving_average`

    """
    return x.ewm(alpha=1 - beta, adjust=False).mean()


[docs]@plot_function
def macd(x: pd.Series, m: int = 12, n: int = 26, s: int = 1) -> pd.Series:
    """
    Moving average convergence divergence (MACD).

    Moving average convergence divergence (MACD) is a trend-following momentum indicator that shows the relationship
    between two moving averages of a timeseries. It is the result of subtracting the exponential moving average of `x`
    with a period of :math:`m` from the exponential moving average of :math:`x` with a period of :math:`n`.

    Optionally, specify :math:`s` to apply an exponential moving average to the resulting series with a period of
    :math:`s` (default 1, equivalent to no exponential moving average).

    :param x: time series
    :param m: period of first, short exponential moving average (default 12)
    :param n: period of second, long exponential moving average (default 26)
    :param s: optional smoothing parameter (default 1)
    :return: date-based time series of return

    **Usage**

    The exponential(ly weighted) moving average (EMA) of a series [:math:`X_0`, :math:`X_1`, :math:`X_2`, ...],
    is defined as:

    :math:`Y_0 = X_0`

    :math:`Y_t = \\beta \cdot Y_{t-1} + (1 - \\beta) \cdot X_t`

    where :math:`\\beta = \\frac{2}{\\text{period} + 1}` is the weight we place on the previous average.

    The MACD of a series is defined as :math:`\\text{EMA}(\\text{EMA}(X, M) - \\text{EMA}(X, N), S)`

    **Examples**

    Generate price series with 100 observations starting from today's date:

    >>> prices = generate_series(100)
    >>> macd(prices, 12, 26)

    **See also**

    :func:`exponential_moving_average` :func:`moving_average` :func:`smoothed_moving_average`

    """
    a = x.ewm(adjust=False, span=m).mean()
    b = x.ewm(adjust=False, span=n).mean()
    return subtract(a, b).ewm(adjust=False, span=s).mean()


[docs]@plot_function
def exponential_volatility(x: pd.Series, beta: float = 0.75) -> pd.Series:
    """
    Exponentially weighted volatility

    :param x: time series of prices
    :param beta: how much to weigh the previous price in the time series, thus controlling how much importance we
                  place on the (more distant) past. Must be between 0 (inclusive) and 1 (exclusive)
    :return: date-based time series of exponential volatility of the input series

    **Usage**

    Calculates the exponentially weighted standard deviation of the return of the input series, and annualizes the
    standard deviation

    **Examples**

    Generate price series and compute exponentially weighted standard deviation of returns

    >>> prices = generate_series(100)
    >>> exponential_volatility(prices, 0.9)

    The above is equivalent to

    >>> annualize(exponential_std(returns(prices), 0.9)) * 100

    **See also**

    :func:`volatility` :func:`exponential_std` :func:`exponential_spread_volatility`

    """
    return annualize(exponential_std(returns(x), beta)).mul(100)


[docs]@plot_function
def exponential_spread_volatility(x: pd.Series, beta: float = 0.75) -> pd.Series:
    """
    Exponentially weighted spread volatility

    :param x: time series of prices
    :param beta: how much to weigh the previous price in the time series, thus controlling how much importance we
                  place on the (more distant) past. Must be between 0 (inclusive) and 1 (exclusive)
    :return: date-based time series of exponential spread volatility of the input series

    **Usage**

    Exponentially weights the daily differences of the input series, calculates the annualized standard deviation

    **Examples**

    Generate price series and compute exponentially weighted standard deviation of returns

    >>> prices = generate_series(100)
    >>> exponential_volatility(prices, 0.9)

    The above is equivalent to

    >>> annualize(exponential_std(diff(prices, 1), 0.9))

    **See also**

    :func:`volatility` :func:`exponential_std` :func:`exponential_volatility`

    """
    return annualize(exponential_std(diff(x, 1), beta))


def _freq_to_period(x: pd.Series, freq: Frequency = Frequency.YEAR):
    """
    Given input series x with a DateTimeIndex and a desired temporal frequency (period), returns x with all NaNs
    forward-filled (according to x's index's DateTime frequency) and the number of data points in a period.

    freq should be the length of time in which x's cycles repeat. For example: yearly retail sales cycle, yearly
    temperature fluctuation cycle.

    For example: 1) If x is a daily series and freq = YEARLY, then there are 365 data points in a period; 2) If x is
    a monthly series and freq = QUARTERLY, then there are 3 data points in a period.

    Freq parameter only applies when data frequency is:
    'B' and frequency == Weekly --> period = 5
    'B' and frequency == Monthly --> convert to 'D' and period = 30
    'D' and frequency == Weekly --> period = 7
    'D' and frequency == Monthly --> period = 30
    'M' and frequency == Quarterly --> Period = 3
    'W' and frequency == Quarterly --> period = 13
    """
    if not isinstance(x.index, pd.DatetimeIndex):
        raise MqValueError("Series must have a pandas.DateTimeIndex.")
    pfreq = getattr(getattr(x, 'index', None), 'inferred_freq', None)
    period = None if pfreq is None else statsmodels.tsa.seasonal.freq_to_period(pfreq)
    if period in [7, None]:  # daily
        x = x.asfreq('D', method='ffill')
        if freq == Frequency.YEAR:
            return x, 365
        elif freq == Frequency.QUARTER:
            return x, 91
        elif freq == Frequency.MONTH:
            return x, 30
        else:
            return x, 7
    elif period == 5:  # business day
        if freq == Frequency.YEAR:
            return x.asfreq('D', method='ffill'), 365
        if freq == Frequency.QUARTER:
            return x.asfreq('D', method='ffill'), 91
        elif freq == Frequency.MONTH:
            return x.asfreq('D', method='ffill'), 30
        else:  # freq == Frequency.WEEKLY:
            return x.asfreq('B', method='ffill'), 5
    elif period == 52:  # weekly frequency
        x = x.asfreq('W', method='ffill')
        if freq == Frequency.YEAR:
            return x, period
        elif freq == Frequency.QUARTER:
            return x, 13
        elif freq == Frequency.MONTH:
            return x, 4
        else:
            raise MqValueError(f'Frequency {freq.value} not compatible with series with frequency {pfreq}.')
    elif period == 12:  # monthly frequency
        x = x.asfreq('M', method='ffill')
        if freq == Frequency.YEAR:
            return x, period
        elif freq == Frequency.QUARTER:
            return x, 3
        else:
            raise MqValueError(f'Frequency {freq.value} not compatible with series with frequency {pfreq}.')
    return x, period


def _seasonal_decompose(x: pd.Series, method: SeasonalModel = SeasonalModel.ADDITIVE,
                        freq: Frequency = Frequency.YEAR):
    x, period = _freq_to_period(x, freq)
    if x.shape[0] < 2 * period:
        # Replace ValueError in seasonal_decompose with more descriptive error
        raise MqValueError(f"Series must have two complete cycles to be analyzed. Series has only {x.shape[0]} dpts.")
    decompose_obj = statsmodels.tsa.seasonal.seasonal_decompose(x, period=period, model=method.value)
    return decompose_obj


[docs]@plot_function
def seasonally_adjusted(x: pd.Series, method: SeasonalModel = SeasonalModel.ADDITIVE,
                        freq: Frequency = Frequency.YEAR) -> pd.Series:
    """
    Seasonally adjusted series

    :param x: time series with at least two years worth of data.
    :param method: 'additive' or 'multiplicative'. Type of seasonal model to use. 'multiplicative' is appropriate when
        the magnitude of the series's values affect the magnitude of seasonal swings; 'additive' is appropriate
        when seasonal swings' sizes are independent of the series's values.
    :param freq: 'year', 'quarter', 'month', or 'week'. Period in which full cycle occurs (i.e. the "period" of
        a wave).
    :return: date-based time series of seasonally-adjusted input series.

    **Usage**

    Uses a centered moving average and convolution to decompose the input series into seasonal, trend, and
    residual components. This function returns the series with the seasonal component removed.

    If using the default additive model:

    :math:`Y_t = X_t - S_t`

    If using the multiplicative model:

    :math:`Y_t = X_t / S_t`

    **Examples**

    Generate price series and compute seasonally-adjusted series.

    >>> prices = generate_series(1000)
    >>> seasonally_adjusted(prices)

    **See also**

    :func:`trend`

    """
    decompose_obj = _seasonal_decompose(x, method, freq)
    if method == SeasonalModel.ADDITIVE:
        return decompose_obj.trend + decompose_obj.resid
    else:
        return decompose_obj.trend * decompose_obj.resid


[docs]@plot_function
def trend(x: pd.Series, method: SeasonalModel = SeasonalModel.ADDITIVE, freq: Frequency = Frequency.YEAR) -> \
        pd.Series:
    """
    Trend of series with seasonality and residuals removed.

    :param x: time series with at least two years worth of data.
    :param method: 'additive' or 'multiplicative'. Type of seasonal model to use. 'multiplicative' is appropriate when
        the magnitude of the series's values affect the magnitude of seasonal swings; 'additive' is appropriate
        when seasonal swings' sizes are independent of the series's values.
    :param freq: 'year', 'quarter', 'month', or 'week'. Period in which full cycle occurs (i.e. the "period" of
        a wave).
    :return: date-based time series with trend of input series.

    **Usage**

    Uses a centered moving average and convolution to decompose the input series into seasonal, trend, and
    residual components. This function returns the trend component.

    If using the default additive model:

    :math:`Y_t = X_t - S_t - R_t`

    If using the multiplicative model:

    :math:`Y_t = X_t / (S_t * R_t)`

    **Examples**

    Generate price series and compute its trend.

    >>> prices = generate_series(1000)
    >>> trend(prices)

    **See also**

    :func:`seasonally_adjusted`

    """
    decompose_obj = _seasonal_decompose(x, method, freq)
    return decompose_obj.trend