import sys
sys.path.insert(0, '..')

import pandas as pd
import matplotlib.pyplot as plt

from pubmatrix import (
    pubmatrix,
    pubmatrix_from_file,
    plot_pubmatrix_heatmap,
    pubmatrix_heatmap,
)

API_KEY = "YOUR_KEY_HERE"

API_KEY = None  # replace with your key to increase rate limit

A = ["WNT1", "WNT2", "CTNNB1"]
B = ["obesity", "diabetes", "cancer"]

result = pubmatrix(A=A, B=B, api_key=API_KEY)
result

Querying NCBI: 100%|███████████████████████████████████████| 9/9 [00:04<00:00,  1.85query/s]

wnt_genes     = ["WNT1", "WNT2", "WNT3A", "WNT5A", "WNT7B", "CTNNB1", "DVL1"]
obesity_genes = ["LEPR", "ADIPOQ", "PPARG", "TNF", "IL6", "ADRB2", "INSR"]

result_wnt = pubmatrix(A=wnt_genes, B=obesity_genes, api_key=API_KEY)
result_wnt

Querying NCBI: 100%|█████████████████████████████████████| 49/49 [00:19<00:00,  2.55query/s]

result_pmc = pubmatrix(A=A, B=B, database="pmc", api_key=API_KEY)
result_pmc

Querying NCBI: 100%|███████████████████████████████████████| 9/9 [00:03<00:00,  2.58query/s]

# Side-by-side comparison
print("PubMed:")
print(result)
print("\nPMC:")
print(result_pmc)

PubMed:
          WNT1  WNT2  CTNNB1
obesity     63     6      90
diabetes   118    18     267
cancer    1278   297    8363

PMC:
           WNT1  WNT2  CTNNB1
obesity    2765  1013    4904
diabetes   4396  1610    7073
cancer    12695  5089   29362

result_2000_2010 = pubmatrix(A=A, B=B, daterange=[2000, 2010], api_key=API_KEY)
result_2011_2024 = pubmatrix(A=A, B=B, daterange=[2011, 2024], api_key=API_KEY)

print("2000–2010:")
print(result_2000_2010)
print("\n2011–2024:")
print(result_2011_2024)

Querying NCBI: 100%|███████████████████████████████████████| 9/9 [00:03<00:00,  2.59query/s]
Querying NCBI: 100%|███████████████████████████████████████| 9/9 [00:03<00:00,  2.45query/s]

2000–2010:
          WNT1  WNT2  CTNNB1
obesity      1     0       5
diabetes    11     4      41
cancer     361    82    2100

2011–2024:
          WNT1  WNT2  CTNNB1
obesity     60     6      79
diabetes   103    12     201
cancer     768   169    5351

pubmatrix(A=A, B=B, outfile="output", export_format="csv", api_key=API_KEY)

Querying NCBI: 100%|███████████████████████████████████████| 9/9 [00:03<00:00,  2.58query/s]

Saved CSV to output.csv

pubmatrix(A=A, B=B, outfile="output", export_format="ods", api_key=API_KEY)

Querying NCBI: 100%|███████████████████████████████████████| 9/9 [00:03<00:00,  2.56query/s]

Saved ODS to output.ods

result_retry = pubmatrix(A=A, B=B, n_tries=5, api_key=API_KEY)
result_retry

Querying NCBI: 100%|███████████████████████████████████████| 9/9 [00:03<00:00,  2.57query/s]

sample_terms = "WNT1\nWNT2\nCTNNB1\n#\nobesity\ndiabetes\ncancer\n"
with open("sample_terms.txt", "w") as f:
    f.write(sample_terms)

result_file = pubmatrix_from_file("sample_terms.txt", api_key=API_KEY)
result_file

Querying NCBI: 100%|███████████████████████████████████████| 9/9 [00:03<00:00,  2.55query/s]

# With optional arguments
result_file_dated = pubmatrix_from_file(
    "sample_terms.txt",
    daterange=[2015, 2024],
    api_key=API_KEY,
)
result_file_dated

Querying NCBI: 100%|███████████████████████████████████████| 9/9 [00:03<00:00,  2.41query/s]

import os
os.remove("sample_terms.txt")

pubmatrix_heatmap(result)

<Axes: title={'center': 'PubMatrix Results'}>

plot_pubmatrix_heatmap(
    result,
    title="WNT Genes × Disease Co-occurrence",
    cluster_rows=True,
    cluster_cols=True,
    show_numbers=True,
    width=8,
    height=5,
)

<Axes: title={'center': 'WNT Genes × Disease Co-occurrence'}>

plot_pubmatrix_heatmap(
    result,
    title="No clustering",
    cluster_rows=False,
    cluster_cols=False,
)

<Axes: title={'center': 'No clustering'}>

plot_pubmatrix_heatmap(
    result,
    title="No cell annotations",
    show_numbers=False,
)

<Axes: title={'center': 'No cell annotations'}>

plot_pubmatrix_heatmap(
    result,
    title="Blue gradient",
    color_palette=["#deebf7", "#9ecae1", "#3182bd"],
)

<Axes: title={'center': 'Blue gradient'}>

plot_pubmatrix_heatmap(
    result,
    title="Green gradient",
    color_palette=["#e5f5e0", "#a1d99b", "#31a354"],
)

<Axes: title={'center': 'Green gradient'}>

plot_pubmatrix_heatmap(
    result,
    title="Saved heatmap",
    filename="heatmap_full.png",
    width=8,
    height=5,
)

Saved heatmap to heatmap_full.png

<Axes: title={'center': 'Saved heatmap'}>

print("Column sums (total co-occurrences per A term):")
print(result.sum(axis=0))
print()
print("Row sums (total co-occurrences per B term):")
print(result.sum(axis=1))

Column sums (total co-occurrences per A term):
WNT1      1459
WNT2       321
CTNNB1    8720
dtype: int64

Row sums (total co-occurrences per B term):
obesity      159
diabetes     403
cancer      9938
dtype: int64

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

col_totals = result.sum(axis=0).sort_values(ascending=False)
axes[0].bar(col_totals.index, col_totals.values, color="#de2d26")
axes[0].set_title("Co-occurrences per column term (A)")
axes[0].set_ylabel("Total publication count")
axes[0].tick_params(axis="x", rotation=45)

row_totals = result.sum(axis=1).sort_values(ascending=False)
axes[1].bar(row_totals.index, row_totals.values, color="#3182bd")
axes[1].set_title("Co-occurrences per row term (B)")
axes[1].set_ylabel("Total publication count")
axes[1].tick_params(axis="x", rotation=45)

plt.tight_layout()
plt.show()

# Reuse results computed above
diff = result_2011_2024 - result_2000_2010
print("Absolute change in co-occurrence counts (2011–2024 vs 2000–2010):")
diff

Absolute change in co-occurrence counts (2011–2024 vs 2000–2010):

plot_pubmatrix_heatmap(
    diff,
    title="Change in co-occurrences: 2011–2024 vs 2000–2010",
    color_palette=["#f7f7f7", "#fc8d59", "#d73027"],
    cluster_rows=False,
    cluster_cols=False,
    width=7,
    height=4,
)

<Axes: title={'center': 'Change in co-occurrences: 2011–2024 vs 2000–2010'}>

result.to_csv("my_results.csv")
print("Saved.")

Saved.

PubMatrixPython — full reference notebook¶

Setup¶

NCBI API key¶

`pubmatrix()` — core query function¶

Basic usage¶

Larger matrix — 7 × 7 WNT × obesity genes¶

`database` parameter¶

`daterange` parameter¶

Export to CSV¶

Export to ODS¶

`n_tries` — retry on network failure¶

`pubmatrix_from_file()` — load terms from a text file¶

Heatmap visualisation¶

`pubmatrix_heatmap()` — quick plot with defaults¶

`plot_pubmatrix_heatmap()` — full control¶

Clustering disabled¶

Numbers hidden¶

Custom colour palette¶

Save to PNG¶

Working with the result DataFrame¶

Summary statistics¶

Bar charts — co-occurrences per term¶

Temporal trend — comparing two date windows¶

Save results to CSV manually¶

	WNT1	WNT2	WNT3A	WNT5A	WNT7B	CTNNB1	DVL1
LEPR	6	0	0	2	0	4	0
ADIPOQ	2	0	0	6	0	9	0
PPARG	2	3	7	5	1	26	0
TNF	83	4	110	123	6	216	3
IL6	75	7	87	143	9	151	3
ADRB2	1	0	0	1	0	0	0
INSR	1	1	1	1	0	4	0

PubMatrixPython — full reference notebook¶

Setup¶

NCBI API key¶

pubmatrix() — core query function¶

Basic usage¶

Larger matrix — 7 × 7 WNT × obesity genes¶

database parameter¶

daterange parameter¶

Export to CSV¶

Export to ODS¶

n_tries — retry on network failure¶

pubmatrix_from_file() — load terms from a text file¶

Heatmap visualisation¶

pubmatrix_heatmap() — quick plot with defaults¶

plot_pubmatrix_heatmap() — full control¶

Clustering disabled¶

Numbers hidden¶

Custom colour palette¶

Save to PNG¶

Working with the result DataFrame¶

Summary statistics¶

Bar charts — co-occurrences per term¶

Temporal trend — comparing two date windows¶

Save results to CSV manually¶

`pubmatrix()` — core query function¶

`database` parameter¶

`daterange` parameter¶

`n_tries` — retry on network failure¶

`pubmatrix_from_file()` — load terms from a text file¶

`pubmatrix_heatmap()` — quick plot with defaults¶

`plot_pubmatrix_heatmap()` — full control¶