""" Wolves study: analysis pipeline. Reads two transcribed datasets on Europe's wolf recovery and writes a single cleaned JSON the static site consumes: 1. Germany's wolf TERRITORY count per DBBW monitoring year (raw/germany-wolf- territories.csv). Each year is the start year of a monitoring year that runs May to April, so 2022 denotes the 2022/23 monitoring year. The series climbs from a single confirmed pair around 2000 to roughly 184 territories in 2022/23, the figure widely reported by the DBBW. 2. Wolf POPULATION estimates by European country (raw/europe-wolf-population.csv), transcribed from Large Carnivore Initiative for Europe (LCIE) reporting around 2022. These are estimates and in the original sources are given as ranges, so they are presented here as rounded midpoint figures and labelled "est.". The German territory series is forecast to 2030 with a DAMPED linear-trend model (Holt, damped_trend=True), which is appropriate because recovery slows as suitable habitat fills rather than continuing to accelerate. Output: ../../src/data/wolves/clean.json Run: cd analysis .venv/bin/python wolves/pipeline.py Sources: DBBW (Dokumentations- und Beratungsstelle des Bundes zum Thema Wolf), the German federal documentation and advice centre on wolves. Wolf territory counts per monitoring year. https://www.dbb-wolf.de/ LCIE (Large Carnivore Initiative for Europe). Wolf population estimates by country, around 2022. https://www.lcie.org/ """ from pathlib import Path import pandas as pd import sys sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from _lib import holt_forecast, write_clean HERE = Path(__file__).parent RAW_DE = HERE / "raw" / "germany-wolf-territories.csv" RAW_EU = HERE / "raw" / "europe-wolf-population.csv" OUT = HERE.parent.parent / "src" / "data" / "wolves" / "clean.json" FORECAST_TO = 2030 AS_OF = ( "Germany territory counts: DBBW monitoring years up to 2022/23. " "European population: transcribed LCIE estimates, around 2022 (ranges)." ) def germany_series() -> pd.Series: df = pd.read_csv(RAW_DE).sort_values("year").reset_index(drop=True) df["year"] = df["year"].astype(int) df["territories"] = df["territories"].astype(int) return df.set_index("year")["territories"] def europe_bars() -> list[dict]: df = pd.read_csv(RAW_EU) df = df.sort_values("wolves", ascending=False).reset_index(drop=True) return [ {"label": str(r.country), "value": int(r.wolves), "note": "est."} for r in df.itertuples() ] def main() -> None: de = germany_series() latest_year = int(de.index[-1]) first_year = int(de.index[0]) germany_latest = int(de.loc[latest_year]) bars = europe_bars() europe_total = int(sum(b["value"] for b in bars)) fc = holt_forecast( de.to_numpy(dtype=float), latest_year, FORECAST_TO, damped=True, ndigits=None, nonneg=True, scale=1.0, ) payload = { "meta": { "source": ( "DBBW (German federal documentation centre on wolves) for German " "territory counts; LCIE (Large Carnivore Initiative for Europe) for " "European population estimates." ), "sourceUrl": "https://www.dbb-wolf.de/", "item": ( "Germany: wolf territories per DBBW monitoring year. " "Europe: wolf population estimates by country (LCIE)." ), "asOf": AS_OF, "unitNote": ( "German figures are confirmed wolf territories, not individual " "animals. European population figures are transcribed LCIE estimates " "and are given as ranges in the original sources; values shown here are " "rounded and labelled 'est.'." ), "method": ( "Damped linear-trend (Holt, damped_trend=True) on the German territory " "series; 80% band from residual std scaled by sqrt(horizon)." ), }, "headline": { "germanyLatestYear": latest_year, "germanyTerritories": germany_latest, "germanyFirstYear": first_year, "europeTotal": europe_total, "europeCountries": len(bars), "forecastYear": FORECAST_TO, "forecastTerritories": fc["mean"][-1], }, "series": [ {"year": int(yr), "value": int(v)} for yr, v in de.items() ], "forecast": fc, "bars": bars, } write_clean(OUT, payload) print(f"wrote {OUT.relative_to(HERE.parent.parent)}") print( f" Germany {latest_year}/{(latest_year + 1) % 100:02d}: " f"{germany_latest} territories (from {de.loc[first_year]} in {first_year})" ) print( f" forecast {FORECAST_TO}: {fc['mean'][-1]} " f"(80% {fc['lower'][-1]}-{fc['upper'][-1]})" ) print(f" Europe total (top {len(bars)} countries, est.): {europe_total} wolves") if __name__ == "__main__": main()