⇦ Back

This type of regression analysis was first published in Passing & Bablok (1983)1. As the Wikipedia article states:

Passing–Bablok regression is a statistical method for non-parametric regression analysis suitable for method comparison studies.

Essentially, when two different methods are being used to take the same measurement a line-of-best-fit can fitted to the points. The Passing-Bablok method does this by:

The median gradient and median y-intercept create the line-of-best-fit. It has an associated confidence interval which can be interpreted as follows:

Let’s use the following hypothetical example dataset which comes from Giavarina (2015)2. Imagine that a set of objects were each measured twice - once using ‘Method A’ and once using ‘Method B’ - giving the two lists of measurements below:

import pandas as pd

df = pd.DataFrame({
    'Method A': [
        1.0, 5.0, 10.0, 20.0, 50.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0,
        150.0, 200.0, 250.0, 300.0, 350.0, 400.0, 450.0, 500.0, 550.0, 600.0,
        650.0, 700.0, 750.0, 800.0, 850.0, 900.0, 950.0, 1000.0
    ],
    'Method B': [
        8.0, 16.0, 30.0, 24.0, 39.0, 54.0, 40.0, 68.0, 72.0, 62.0, 122.0, 80.0,
        181.0, 259.0, 275.0, 380.0, 320.0, 434.0, 479.0, 587.0, 626.0, 648.0,
        738.0, 766.0, 793.0, 851.0, 871.0, 957.0, 1001.0, 960.0
    ]
})

First, let’s visualise this data:

import matplotlib.pyplot as plt

# Options
x = 5  # Want figures to be A5
plt.rc('figure', figsize=[46.82 * .5**(.5 * x), 33.11 * .5**(.5 * x)])
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
plt.rc('text.latex', preamble=r'\usepackage{textgreek}')

ax = plt.axes()
ax.set_title('The Raw Data')
ax.set_xlabel('Method A')
ax.set_ylabel('Method B')
# Scatter plot
ax.scatter(df['Method A'], df['Method B'], c='k', s=20, alpha=0.6, marker='o')
# Get axis limits
left, right = plt.xlim()
bottom, top = plt.ylim()
# Change axis limits
ax.set_xlim(0, right)
ax.set_ylim(0, top)
# Reference line
label = 'Reference line'
ax.plot([left, right], [left, right], c='grey', ls='--', label=label)
# Set aspect ratio
ax.set_aspect('equal')
# Legend
ax.legend(frameon=False)
# Show
plt.show()

Next, take each pair of points on this scatter plot and calculate the gradient of the straight line that connects them:

#
# Calculate the gradients of the lines between each pair of points
#
method1 = df['Method A']
method2 = df['Method B']
n_points = len(method1)
# sv is a list of the gradients between of each pair of points
sv = []
# k is the number of gradients less than -1
k = 0
for i in range(n_points - 1):
    for j in range(i + 1, n_points):
        dy = method2[j] - method2[i]
        dx = method1[j] - method1[i]
        # Ignore gradients that are vertical (ie the x values of the points
        # are the same)
        if dx != 0:
            gradient = dy / dx
        elif dy < 0:
            gradient = -1.e+23
        elif dy > 0:
            gradient = 1.e+23
        else:
            gradient = None
        if gradient is not None:
            sv.append(gradient)
            k += (gradient < -1)
# Sort the gradients into ascending order
sv.sort()

Find the estimated gradient of the line-of-best-fit from the list of all between-point gradients and calculate the confidence interval of this value:

import math

#
# Find the estimated gradient and confidence limits
#
m0 = (len(sv) - 1) / 2
if m0 == int(m0):
    # If odd
    gradient_est = sv[k + int(m0)]
else:
    # If even
    gradient_est = 0.5 * (sv[k + int(m0 - 0.5)] + sv[k + int(m0 + 0.5)])
# Calculate the index of the upper and lower confidence bounds
w = 1.96
ci = w * math.sqrt((n_points * (n_points - 1) * (2 * n_points + 5)) / 18)
n_gradients = len(sv)
m1 = int(round((n_gradients - ci) / 2))
m2 = n_gradients - m1 - 1
# Calculate the lower and upper bounds of the gradient
(gradient_lb, gradient_ub) = (sv[k + m1], sv[k + m2])

Finally, get the estimated y-intercept of the line-of-best-fit from the list of all y-intercepts:

import numpy as np


def calc_intercept(method1, method2, gradient):
    """Calculate intercept given points and a gradient."""
    temp = []
    for i in range(len(method1)):
        temp.append(method2[i] - gradient * method1[i])
    return np.median(temp)


# Calculate the intercept as the median of all the intercepts of all the
# lines connecting each pair of points
int_est = calc_intercept(method1, method2, gradient_est)
int_ub = calc_intercept(method1, method2, gradient_lb)
int_lb = calc_intercept(method1, method2, gradient_ub)

This yields a result:

print(f'Gradient = {gradient_est:4.2f} ({gradient_ub:4.2f} - {gradient_lb:4.2f})')
print(f'Y-intercept = {int_est:4.2f} ({int_ub:4.2f} - {int_lb:4.2f})')
## Gradient = 1.06 (1.09 - 1.02)
## y-intercept = 7.19 (19.79 - -0.15)

We can again visualise this, with the line-of-best-fit and its confidence interval added in:

#
# Plot
#
ax = plt.axes()
ax.set_title('Passing-Bablok Regression')
ax.set_xlabel('Method A')
ax.set_ylabel('Method B')
# Scatter plot
ax.scatter(df['Method A'], df['Method B'], c='k', s=20, alpha=0.6, marker='o')
# Get axis limits
left, right = plt.xlim()
bottom, top = plt.ylim()
# Change axis limits
ax.set_xlim(0, right)
ax.set_ylim(0, top)
# Reference line
label = 'Reference line'
ax.plot([left, right], [left, right], c='grey', ls='--', label=label)
# Passing-Bablok regression line
x = np.array([left, right])
y = gradient_est * x + int_est
ax.plot(x, y, label=f'{gradient_est:4.2f}x + {int_est:4.2f}')
# Passing-Bablok regression line - confidence intervals
x = np.array([left, right])
y_lb = gradient_lb * x + int_lb
y_ub = gradient_ub * x + int_ub
label = f'Upper CI: {gradient_ub:4.2f}x + {int_ub:4.2f}'
ax.plot(x, y_ub, c='tab:blue', alpha=0.2, label=label)
label = f'Lower CI: {gradient_lb:4.2f}x + {int_lb:4.2f}'
ax.plot(x, y_lb, c='tab:blue', alpha=0.2, label=label)
ax.fill_between(x, y_ub, y_lb, alpha=0.2)
# Set aspect ratio
ax.set_aspect('equal')
# Legend
ax.legend(frameon=False)
# Show
plt.show()

If we are doing multiple Passing-Bablok analyses it will be more convenient in the long run to create a function for it. This looks like this:

def passing_bablok(method1, method2):
    """Perform Passing-Bablok analysis."""
    #
    # Calculate the gradients of the lines between each pair of points
    #
    n_points = len(method1)
    # sv is a list of the gradients between of each pair of points
    sv = []
    # k is the number of gradients less than -1
    k = 0
    for i in range(n_points - 1):
        for j in range(i + 1, n_points):
            dy = method2[j] - method2[i]
            dx = method1[j] - method1[i]
            # Ignore gradients that are vertical (ie the x values of the points
            # are the same)
            if dx != 0:
                gradient = dy / dx
            elif dy < 0:
                gradient = -1.e+23
            elif dy > 0:
                gradient = 1.e+23
            else:
                gradient = None
            if gradient is not None:
                sv.append(gradient)
                k += (gradient < -1)
    # Sort the gradients into ascending order
    sv.sort()

    #
    # Find the estimated gradient and confidence limits
    #
    m0 = (len(sv) - 1) / 2
    if m0 == int(m0):
        # If odd
        gradient_est = sv[k + int(m0)]
    else:
        # If even
        gradient_est = 0.5 * (sv[k + int(m0 - 0.5)] + sv[k + int(m0 + 0.5)])
    # Calculate the index of the upper and lower confidence bounds
    w = 1.96
    ci = w * math.sqrt((n_points * (n_points - 1) * (2 * n_points + 5)) / 18)
    n_gradients = len(sv)
    m1 = int(round((n_gradients - ci) / 2))
    m2 = n_gradients - m1 - 1
    # Calculate the lower and upper bounds of the gradient
    (gradient_lb, gradient_ub) = (sv[k + m1], sv[k + m2])

    def calc_intercept(method1, method2, gradient):
        """Calculate intercept given points and a gradient."""
        temp = []
        for i in range(len(method1)):
            temp.append(method2[i] - gradient * method1[i])
        return np.median(temp)

    # Calculate the intercept as the median of all the intercepts of all the
    # lines connecting each pair of points
    int_est = calc_intercept(method1, method2, gradient_est)
    int_ub = calc_intercept(method1, method2, gradient_lb)
    int_lb = calc_intercept(method1, method2, gradient_ub)

    return (gradient_est, gradient_lb, gradient_ub), (int_est, int_lb, int_ub)

Here it is all put together:

df = pd.DataFrame({
    'Method A': [
        1.0, 5.0, 10.0, 20.0, 50.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0,
        150.0, 200.0, 250.0, 300.0, 350.0, 400.0, 450.0, 500.0, 550.0, 600.0,
        650.0, 700.0, 750.0, 800.0, 850.0, 900.0, 950.0, 1000.0
    ],
    'Method B': [
        8.0, 16.0, 30.0, 24.0, 39.0, 54.0, 40.0, 68.0, 72.0, 62.0, 122.0, 80.0,
        181.0, 259.0, 275.0, 380.0, 320.0, 434.0, 479.0, 587.0, 626.0, 648.0,
        738.0, 766.0, 793.0, 851.0, 871.0, 957.0, 1001.0, 960.0
    ]
})

beta, alpha = passing_bablok(df['Method A'], df['Method B'])

#
# Plot
#
ax = plt.axes()
ax.set_title('Passing-Bablok Regression')
ax.set_xlabel('Method A')
ax.set_ylabel('Method B')
# Scatter plot
ax.scatter(df['Method A'], df['Method B'], c='k', s=20, alpha=0.6, marker='o')
# Get axis limits
left, right = plt.xlim()
bottom, top = plt.ylim()
# Change axis limits
ax.set_xlim(0, right)
ax.set_ylim(0, top)
# Reference line
label = 'Reference line'
ax.plot([left, right], [left, right], c='grey', ls='--', label=label)
# Passing-Bablok regression line
x = np.array([left, right])
y = beta[0] * x + alpha[0]
ax.plot(x, y, label=f'{beta[0]:4.2f}x + {alpha[0]:4.2f}')
# Passing-Bablok regression line - confidence intervals
x = np.array([left, right])
y_lb = beta[1] * x + alpha[1]
y_ub = beta[2] * x + alpha[2]
label = f'Upper CI: {beta[2]:4.2f}x + {alpha[2]:4.2f}'
ax.plot(x, y_ub, c='tab:blue', alpha=0.2, label=label)
label = f'Lower CI: {beta[1]:4.2f}x + {alpha[1]:4.2f}'
ax.plot(x, y_lb, c='tab:blue', alpha=0.2, label=label)
ax.fill_between(x, y_ub, y_lb, alpha=0.2)
# Set aspect ratio
ax.set_aspect('equal')
# Legend
ax.legend(frameon=False)
# Show
plt.show()

References

1Passing H, Bablok W (1983). “A new biometrical procedure for testing the equality of measurements from two different analytical methods. Application of linear regression procedures for method comparison studies in Clinical Chemistry, Part I”. Journal of Clinical Chemistry and Clinical Biochemistry. 21 (11): 709–20. doi: 10.1515/cclm.1983.21.11.709. PMID: 6655447
2Giavarina D (2015). “Understanding Bland Altman analysis”. Biochemia Medica. 25 (2):141-151. doi: 10.11613/BM.2015.015.

⇦ Back