PyVista Examples Benchmarks#

The following benchmarks evaluate zvtk performance against VTK’s native XML writer using datasets from pyvista.examples.

Datasets tested include:

File Size Comparison#

File size comparison: ZVTK vs VTK XML

File size: ZVTK vs. VTK XML (default compression)#

Most datasets show smaller file sizes when using zvtk compared to VTK XML. Points fall below the unity line (dashed), indicating reduced disk usage with Zstandard compression.

Write Time Comparison#

Write performance comparison: ZVTK vs VTK XML

Write performance: ZVTK vs. VTK XML (default compression)#

Write operations with zvtk are several times faster across all dataset types. Larger datasets benefit more from multi-threaded compression.

Read Time Comparison#

Read performance comparison: ZVTK vs VTK XML

Read performance: ZVTK vs. VTK XML (default compression)#

Reading zvtk files is significantly faster than VTK XML.

Note

Performance gains persist even without multi-threading.

Top 10 Largest Datasets: Speedup and Compression Ratios#

Read/write speedup and compression ratios for top 10 largest datasets

Read/Write Speedup and Compression Ratios for Top 10 pyvista.examples Datasets#

The left panel shows read/write speedups: all top datasets achieve multiple-fold speed increases when using zvtk. The right panel shows compression ratios (log scale): zvtk achieves higher compression than VTK XML in all cases while retaining full dataset fidelity.

Benchmark Script#

The benchmarks were executed using the following Python script:

"""Benchmarks using PyVista examples."""

from __future__ import annotations

from pathlib import Path
import shutil
import time

import matplotlib.pyplot as plt
import pandas as pd
import pyvista as pv
from pyvista import examples
import seaborn as sns
from tqdm import tqdm

import zvtk

tmp_dir = Path("/tmp/zvtk_test")
tmp_dir.mkdir(exist_ok=True)

# supported pyvista objects
allowed = (
    pv.MultiBlock,
    pv.PolyData,
    pv.ImageData,
    pv.UnstructuredGrid,
    pv.RectilinearGrid,
    pv.ExplicitStructuredGrid,
    pv.PointSet,
    pv.StructuredGrid,
)


def get_dir_size(path: Path) -> float:
    return sum(f.stat().st_size for f in Path(path).rglob("*") if f.is_file())


def timed_read_write(ds: pv.DataSet | pv.MultiBlock, n_try: int = 5) -> tuple[float, float, int]:
    if isinstance(ds, pv.MultiBlock):
        suffix = ".vtm"
    elif isinstance(ds, pv.PolyData):
        suffix = ".vtp"
    elif isinstance(ds, pv.ImageData):
        suffix = ".vti"
    elif isinstance(ds, pv.UnstructuredGrid):
        suffix = ".vtu"
    elif isinstance(ds, pv.RectilinearGrid):
        suffix = ".vtr"
    elif isinstance(ds, pv.StructuredGrid):
        suffix = ".vts"
    else:
        raise TypeError(f"Unsupported type {type(ds)}")

    filename = Path(f"/tmp/dataset{suffix}")
    mblock_path = Path("/tmp/dataset")
    if mblock_path.exists():
        shutil.rmtree(mblock_path)

    tstart = time.time()
    for _ in range(n_try):
        ds.save(filename)

    write_time = (time.time() - tstart) / n_try

    tstart = time.time()
    for _ in range(n_try):
        pv.read(filename)

    read_time = (time.time() - tstart) / n_try

    if isinstance(ds, pv.MultiBlock):
        vtk_fs = get_dir_size(mblock_path)
    else:
        vtk_fs = filename.stat().st_size

    return write_time, read_time, vtk_fs


results = []
for name in tqdm(dir(examples)):
    if not name.startswith("download_"):
        continue
    if name.startswith("download_file"):
        continue

    func = getattr(examples, name)
    if not callable(func):
        continue

    try:
        ds = func()  # get dataset
    except Exception as err:
        print(f"skipping '{name}' due to:\n\n{err}")

    if not isinstance(ds, allowed):
        continue

    # unfair comparison non-native XYZ type
    if isinstance(ds, pv.PointSet):
        continue

    # read/write using VTK's native XML format
    vtk_write_time, vtk_read_time, vtk_fs = timed_read_write(ds)

    # read/write using zvtk using standard settings:
    # - 4 threads
    # - default compression
    out_file = tmp_dir / f"{name}.zvtk"
    tstart = time.time()
    n_try_zvtk = 10
    for _ in range(n_try_zvtk):
        writer = zvtk.Writer(ds, out_file)
        writer.write(n_threads=4)
        zvtk_fs = out_file.stat().st_size
    zvtk_write_time = (time.time() - tstart) / n_try_zvtk

    tstart = time.time()
    n_try_zvtk = 10
    for _ in range(n_try_zvtk):
        reader = zvtk.Reader(out_file)
        reader.read()
    zvtk_read_time = (time.time() - tstart) / n_try_zvtk

    results.append(
        {
            "name": name,
            "ds_type": type(ds).__name__,
            "raw_nbytes": reader.nbytes,
            "zvtk_nbytes": zvtk_fs,
            "zvtk_write_time": zvtk_write_time,
            "zvtk_read_time": zvtk_read_time,
            "vtk_nbytes": vtk_fs,
            "vtk_write_time": vtk_write_time,
            "vtk_read_time": vtk_read_time,
        }
    )


df = pd.DataFrame(results)
df["write_speedup"] = df["vtk_write_time"] / df["zvtk_write_time"]
df["read_speedup"] = df["vtk_read_time"] / df["zvtk_read_time"]
df["compression_ratio_zvtk"] = df["zvtk_nbytes"] / df["raw_nbytes"]
df["compression_ratio_vtk"] = df["vtk_nbytes"] / df["raw_nbytes"]

summary = df[
    [
        "name",
        "ds_type",
        "raw_nbytes",
        "vtk_nbytes",
        "zvtk_nbytes",
        "compression_ratio_zvtk",
        "compression_ratio_vtk",
        "vtk_write_time",
        "zvtk_write_time",
        "write_speedup",
        "vtk_read_time",
        "zvtk_read_time",
        "read_speedup",
    ]
].sort_values("raw_nbytes", ascending=False)


###############################################################################
# 1. File size comparison

sns.set(style="whitegrid")

summary["vtk_MB"] = summary["vtk_nbytes"] / 1e6
summary["zvtk_MB"] = summary["zvtk_nbytes"] / 1e6

plt.figure(figsize=(8, 6))
sns.scatterplot(data=summary, x="vtk_MB", y="zvtk_MB", hue="ds_type")

min_val = min(summary["vtk_MB"].min(), summary["zvtk_MB"].min())
max_val = max(summary["vtk_MB"].max(), summary["zvtk_MB"].max())
plt.plot([min_val, max_val], [min_val, max_val], "k--", lw=1)

plt.xlabel("VTK XML file size (MB)")
plt.ylabel("ZVTK file size (MB)")
plt.title("File size: ZVTK vs. VTK XML (default compression)")
plt.tight_layout()
plt.show()

###############################################################################
# 2. Write time comparison
plt.figure(figsize=(8, 6))
sns.scatterplot(data=summary, x="vtk_write_time", y="zvtk_write_time", hue="ds_type")
plt.plot(
    [summary["vtk_write_time"].min(), summary["vtk_write_time"].max()],
    [summary["vtk_write_time"].min(), summary["vtk_write_time"].max()],
    "k--",
    lw=1,
)
plt.xlabel("VTK write time (s)")
plt.ylabel("ZVTK write time (s)")
plt.title("Write performance: ZVTK vs. VTK XML (default compression)")
plt.tight_layout()
plt.show()

###############################################################################
# 3. Read time comparison
plt.figure(figsize=(8, 6))
sns.scatterplot(data=summary, x="vtk_read_time", y="zvtk_read_time", hue="ds_type")
plt.plot(
    [summary["vtk_read_time"].min(), summary["vtk_read_time"].max()],
    [summary["vtk_read_time"].min(), summary["vtk_read_time"].max()],
    "k--",
    lw=1,
)
plt.xlabel("VTK read time (s)")
plt.ylabel("ZVTK read time (s)")
plt.title("Read performance: ZVTK vs. VTK XML (default compression)")
plt.tight_layout()
plt.show()


###############################################################################
# 4. Ratios summary (bar plot)
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import seaborn as sns

top10 = summary.nlargest(10, "raw_nbytes").copy()
top10["compression_ratio_vtk"] = 1 / top10["compression_ratio_vtk"]
top10["compression_ratio_zvtk"] = 1 / top10["compression_ratio_zvtk"]

# Build DataSet Size and Types with size (MB) and dataset type
top10["DataSet Size and Type"] = (top10["raw_nbytes"] / 1e6).round(1).astype(str) + " MB | " + top10["ds_type"]

palette = sns.color_palette()

fig, axes = plt.subplots(1, 2, figsize=(16, 6))
plt.suptitle("Top 10 pyvista.examples Datasets by Size")

sns.barplot(
    data=top10.melt(
        id_vars=["DataSet Size and Type"],
        value_vars=["write_speedup", "read_speedup"],
        var_name="metric",
        value_name="value",
    ),
    x="DataSet Size and Type",
    y="value",
    hue="metric",
    palette=palette[:2],
    ax=axes[0],
)
axes[0].set_title("Read/Write Speedup: zvtk vs VTK XML")
axes[0].tick_params(axis="x", rotation=90)
axes[0].set_ylabel("Read/Write Speedup: zvtk vs VTK XML")

sns.barplot(
    data=top10.melt(
        id_vars=["DataSet Size and Type"],
        value_vars=["compression_ratio_vtk", "compression_ratio_zvtk"],
        var_name="metric",
        value_name="value",
    ),
    x="DataSet Size and Type",
    y="value",
    hue="metric",
    palette=palette[2:4],
    ax=axes[1],
)
axes[1].set_yscale("log")
axes[1].yaxis.set_major_formatter(mticker.ScalarFormatter())  # non-log label
axes[1].set_title("Compression Ratios: zvtk vs VTK XML")
axes[1].tick_params(axis="x", rotation=90)
axes[1].set_ylabel("Compression Ratios (higher is better)")

plt.tight_layout()
plt.show()