Python: Create a multi-page PDF file#

Description

After the generation of a plot with Matplotlib’s pyplot.savefig() method it is easy to write the figure to a PDF file. However, if we want to create a PDF file in which several plots are to be saved on different pages, we have to take a different approach here.

Content

  • Create PDF object

  • Add metadata to PDF object

  • Generate multiple plots

  • Save the plots to PDF object

  • Write PDF file

Software requirements

  • Python 3

  • os

  • datetime

  • xarray

  • matplotlib

  • cartopy

Example script#

create_pdf_multipage_example.py

#!/usr/bin/env python
# coding: utf-8
#------------------------------------------------------------------------------
# Create multiple PDF files
# 
#------------------------------------------------------------------------------
# 2025 copyright DKRZ licensed under CC BY-NC-SA 4.0
#                (https://creativecommons.org/licenses/by-nc-sa/4.0/deed.en)
#------------------------------------------------------------------------------
# 
# In this example, we explain how to create a multi-page PDF document from 
# different plots. Therefore we use the method `PdfPages` from Matplotlib's PDF 
# backend. We also show how to add some more metadata to the PDF file.
# 
# Matplotlib preset the following metadata: 'Creator', 'Producer', 'CreationDate'
# 
#------------------------------------------------------------------------------
import os
import datetime
import xarray as xr
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import cartopy.crs as ccrs

#------------------------------------------------------------------------------
# Example data
#
# Open example data set.
#------------------------------------------------------------------------------
ds = xr.open_dataset(os.environ['HOME']+'/data/rectilinear_grid_2D.nc')

# Choose variables and color maps.
vars  = ['tsurf', 'precip', 'slp']
cmaps = ['RdYlBu_r', 'GnBu', 'PiYG']

# Use LaTex when text, e.g. units, contains a math formula.
LATEX = False

if LATEX:
    ds.precip.attrs['units'] = r'$\frac{kg}{m^{2}s}$'

#------------------------------------------------------------------------------
# Page size
# 
# Define the figure size in inches, here we want to use DIN A4 in portrait or lanscape mode.
#------------------------------------------------------------------------------
figsize_a4p = (8.27, 11.69)
figsize_a4l = (11.69, 8.27)

#------------------------------------------------------------------------------
# PDF file
# 
# Set the PDF output file name.
#------------------------------------------------------------------------------
pdf_filename = 'multiple_pdf_pages.pdf'

# We want to set some additional metadata for the PDF file. 
#
# https://matplotlib.org/stable/api/backend_pdf_api.html#matplotlib.backends.backend_pdf.PdfPages
#
# The standard keys are:
#     'Title', 'Author', 'Subject', 'Keywords', 'Creator', 
#     'Producer', 'CreationDate', 'ModDate', and 'Trapped'. 
#
# Predefined keys:
#     'Creator', 'Producer' and 'CreationDate'. 
#
# They can be removed by setting them to None.
pdf_metadata = {'Title' : 'Example PDF creation: Plot pages showing tsurf, precip, and slp',
                'Author' : 'DKRZ',
                'Subject' : 'Create a 3 pages PDF file',
                'Keywords' : 'Matplotlib PDF multipage Author Title Subject ModDate',
                'ModDate' : datetime.datetime.today()}

#------------------------------------------------------------------------------
# Example 1
# 
# Generate one plot per page of each variable's first time step.
# 
# Set the metadata for the PDF object directly when creating it with PdfPages.
#------------------------------------------------------------------------------
with PdfPages(pdf_filename, metadata=pdf_metadata) as pdf:
    
    if LATEX: 
        plt.rcParams['text.usetex'] = True
    else:
        plt.rcParams['text.usetex'] = False
    
    for i, var in enumerate(vars):
        print(f'-- page {i} :  {var}')
        data = ds[var].mean(axis=0)
        
        fig, ax = plt.subplots(figsize=figsize_a4l, 
                               subplot_kw=dict(projection=ccrs.PlateCarree()))
        ax.coastlines()
        ax.gridlines(draw_labels=True)
        ax.set_title(ds[var].attrs['long_name'])
        
        plot = ax.contourf(data.lon, data.lat, data, levels=20, cmap=cmaps[i], 
                           transform=ccrs.PlateCarree())
        
        cbar = plt.colorbar(plot, orientation='horizontal', pad=0.05, shrink=0.7)
        cbar.set_label(ds[var].attrs['units'])

        #-- put more white space around each plot
        plt.subplots_adjust(left=0.15, right=0.85, top=0.85, bottom=0.15)

        #-- add page numbers
        fig.text(0.5, 0.05, f'- {i+1} -')
        
        pdf.savefig(fig)
        #plt.show()
        plt.close()
    
    pdf.close()

#------------------------------------------------------------------------------
# Example 2
# 
# For each variable generate 4 plots showing the data for 4 different time steps.
# 
# Set PDF output file name.
#------------------------------------------------------------------------------
pdf_filename = 'multiple_pdf_pages_2.pdf'

# Create the plots and write the resulting figures to the PDF file.
with PdfPages(pdf_filename, metadata=pdf_metadata) as pdf:
    
    if LATEX: 
        plt.rcParams['text.usetex'] = True
    else:
        plt.rcParams['text.usetex'] = False
    
    for i, var in enumerate(vars):
        print(f'-- page {i} :  {var}')
        
        data = ds[var]
        
        fig, axes = plt.subplots(nrows=2, ncols=2, figsize=figsize_a4l, 
                                 subplot_kw=dict(projection=ccrs.PlateCarree()))

        for t,ax in enumerate(axes.flat):
            time_str = ds.time.isel(time=t).dt.strftime('%Y-%m-%d %H:%M').data
            
            ax.coastlines()
            gl = ax.gridlines(draw_labels=True)
            #-- don't draw top, right or left tick labels
            gl.top_labels = False
            if t in (0,2):
                gl.right_labels = False
            else:
                gl.left_labels = False

            ax.set_title(ds[var].attrs['long_name']+ f'- time: {time_str}')
            
            plot = ax.contourf(data.lon, data.lat, data.isel(time=t), levels=20, cmap=cmaps[i], 
                               transform=ccrs.PlateCarree())
            
            cbar = plt.colorbar(plot, orientation='horizontal', pad=0.1, shrink=0.9)
            cbar.set_label(ds[var].attrs['units'])

            t += 9

        #-- put more white space around each plot
        plt.subplots_adjust(left=0.15, right=0.85, top=0.85, bottom=0.15)
    
        #-- add page numbers
        fig.text(0.5, 0.05, f'- {i+1} -')
        
        pdf.savefig(fig)
        #plt.show()
        plt.close()

#------------------------------------------------------------------------------
# Example 3
# 
# Same as example 2, but instead of adding the metadata when we create the PDF 
# object they will be added after the plot generation.
# 
# Set PDF output file name.
#------------------------------------------------------------------------------
pdf_filename = 'multiple_pdf_pages_3.pdf'

# Generate the plots and write the results to a PDF file.
with PdfPages(pdf_filename) as pdf:
    
    if LATEX: 
        plt.rcParams['text.usetex'] = True
    else:
        plt.rcParams['text.usetex'] = False
    
    for i, var in enumerate(vars):
        print(f'-- page {i} :  {var}')
        
        data = ds[var]
        
        fig, axes = plt.subplots(nrows=2, ncols=2, figsize=figsize_a4l, 
                                 subplot_kw=dict(projection=ccrs.PlateCarree()))

        for t,ax in enumerate(axes.flat):
            time_str = ds.time.isel(time=t).dt.strftime('%Y-%m-%d %H:%M').data
            
            ax.coastlines()
            gl = ax.gridlines(draw_labels=True)
            #-- don't draw top, right or left tick labels
            gl.top_labels = False
            if t in (0,2):
                gl.right_labels = False
            else:
                gl.left_labels = False

            ax.set_title(ds[var].attrs['long_name']+ f'- time: {time_str}')
            
            plot = ax.contourf(data.lon, data.lat, data.isel(time=t), levels=20, cmap=cmaps[i], 
                               transform=ccrs.PlateCarree())
            
            cbar = plt.colorbar(plot, orientation='horizontal', pad=0.1, shrink=0.9)
            cbar.set_label(ds[var].attrs['units'])

            t += 9

        #-- put more white space around each plot
        plt.subplots_adjust(left=0.15, right=0.85, top=0.85, bottom=0.15)
    
        #-- add page numbers
        fig.text(0.5, 0.05, f'- {i+1} -')
        
        pdf.savefig(fig)
        #plt.show()
        plt.close()

    #-- get pre-defined metadata
    md = pdf.infodict()
    
    #-- add metadata
    for (key, value) in pdf_metadata.items():
        md[key] = value
    print(md)
    
    pdf.close()

Plot result#

image0