""" Indie games study: analysis pipeline. Reads the count of games released on Steam per calendar year and forecasts new releases through 2030 with a DAMPED linear-trend model (Holt, damped_trend=True), which captures the post-2017 deceleration rather than extrapolating the early explosion. Output: ../../src/data/indie-games/clean.json Run: cd analysis .venv/bin/python indie-games/pipeline.py Source: SteamDB, "Steam new releases by year" (count of apps of type 'game' first released each calendar year). Figures are SteamDB-reported counts as of the snapshot date in meta.asOf and are periodically revised. https://steamdb.info/stats/releases/ """ from pathlib import Path import pandas as pd import sys sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from _lib import holt_forecast, write_clean HERE = Path(__file__).parent RAW = HERE / "raw" / "steam-releases-by-year.csv" OUT = HERE.parent.parent / "src" / "data" / "indie-games" / "clean.json" FORECAST_TO = 2030 SNAPSHOT = "2025 snapshot of SteamDB release counts" def main() -> None: df = pd.read_csv(RAW).sort_values("year").reset_index(drop=True) years = df["year"].to_numpy() releases = df["releases"].to_numpy(dtype=float) latest_year = int(years[-1]) first_year = int(years[0]) # Damped linear trend (Holt). Captures the post-2017 slowdown instead of # extrapolating the early explosion to implausible numbers. fc = holt_forecast( releases, latest_year, FORECAST_TO, damped=True, ndigits=None, nonneg=True, scale=1.0, ) mean = fc["mean"] lower = fc["lower"] upper = fc["upper"] # Early-explosion growth rate, for narrative context (2010 to 2017 peak ramp). growth_factor = round(releases[-1] / releases[0]) n_years = latest_year - first_year cagr_pct = round(((releases[-1] / releases[0]) ** (1 / n_years) - 1) * 100, 1) landmark_years = [2010, 2013, 2016, 2019, 2022, 2024] lookup = dict(zip(years.tolist(), releases.tolist())) base = lookup[2010] landmarks = [ { "label": str(y), "value": int(lookup[y]), "note": "baseline" if y == 2010 else f"{round(lookup[y] / base)}× 2010", } for y in landmark_years if y in lookup ] payload = { "meta": { "source": "SteamDB, Steam new releases by year", "sourceUrl": "https://steamdb.info/stats/releases/", "item": "Games first released on Steam per calendar year", "asOf": SNAPSHOT, "unitNote": "Counts of games released on Steam, not indie-only, but indie titles drive the surge.", "method": "Damped linear-trend (Holt, damped_trend=True); 80% band from residual std scaled by sqrt(horizon).", }, "headline": { "latestYear": latest_year, "latestReleases": int(releases[-1]), "sinceYear": first_year, "growthFactor": growth_factor, "cagrPct": cagr_pct, "forecastYear": FORECAST_TO, "forecastReleases": int(round(mean[-1])), }, "releases": [{"year": int(y), "value": int(v)} for y, v in zip(years, releases)], "forecast": fc, "landmarks": landmarks, } write_clean(OUT, payload) print(f"wrote {OUT.relative_to(HERE.parent.parent)}") print(f" {latest_year}: {int(releases[-1])} releases ({growth_factor}× {first_year}); " f"~{cagr_pct}%/yr CAGR") print(f" forecast {FORECAST_TO}: {int(round(mean[-1]))} " f"(80% {int(round(lower[-1]))}–{int(round(upper[-1]))})") if __name__ == "__main__": main()