""" Oceans study: analysis pipeline. Reads raw FAO wild-capture fisheries data (via Our World in Data), cleans it, derives the world wild-catch trend + top capture nations, and forecasts world capture through 2030 with a Holt DAMPED-trend model. The world wild catch is a famously flat series: it climbed steeply until the late 1980s and has bounced around ~90 million tonnes ever since. A damped trend (damped_trend=True) fits a plateauing series far better than the undamped Holt linear trend used in the coffee study; this is a deliberately different model for a deliberately different shape. Output: ../../src/data/oceans/clean.json (consumed by the static site) Run: cd analysis .venv/bin/python oceans/pipeline.py Source: Our World in Data — "Capture fishery production", based on FAOSTAT (FAO of the UN). Element: Capture fisheries production (tonnes, live weight). Column: er_fsh_capt_mt. https://ourworldindata.org/grapher/capture-fishery-production """ import re from pathlib import Path import pandas as pd import sys sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from _lib import holt_forecast, write_clean HERE = Path(__file__).parent RAW = HERE / "raw" / "capture-fishery-production.csv" OUT = HERE.parent.parent / "src" / "data" / "oceans" / "clean.json" FORECAST_TO = 2030 CAPTURE_COL = "er_fsh_capt_mt" # Non-country entities in the OWID/FAO file: the World aggregate, FAO/World Bank # regional groupings, income groups, and the EU bloc. Matched by suffix or exact # name so real countries that merely contain a region word (for example # "Central African Republic", "South Africa") are NOT excluded. AGGREGATE_PATTERNS = ( re.compile(r"\(FAO\)$"), re.compile(r"\(WB\)$"), re.compile(r"income countries$", re.IGNORECASE), re.compile(r"^World$"), re.compile(r"^European Union"), ) def is_country(name: str) -> bool: return not any(p.search(name) for p in AGGREGATE_PATTERNS) def load() -> pd.DataFrame: df = pd.read_csv(RAW) df = df.rename(columns={CAPTURE_COL: "tonnes"}) df = df[["entity", "code", "year", "tonnes"]].dropna(subset=["tonnes"]) df["year"] = df["year"].astype(int) return df def world_series(df: pd.DataFrame) -> pd.Series: """World wild-capture total per year (continuous annual series).""" return df[df["entity"] == "World"].set_index("year")["tonnes"].sort_index() def main() -> None: df = load() world = world_series(df) latest_year = int(world.index[-1]) first_year = int(world.index[0]) world_latest = float(world.loc[latest_year]) peak_year = int(world.idxmax()) # --- Top capture nations, latest world year (shares vs the world total) --- latest = df[(df["year"] == latest_year) & (df["entity"].map(is_country))] top = ( latest.sort_values("tonnes", ascending=False) .head(10)[["entity", "tonnes"]] .reset_index(drop=True) ) bars = [ { "label": r.entity, "value": round(r.tonnes / 1e6, 2), "note": f"{round(100 * r.tonnes / world_latest, 1)}% of world", } for r in top.itertuples() ] fc = holt_forecast( world.values, latest_year, FORECAST_TO, damped=True, ndigits=3, nonneg=True, scale=1e6, ) payload = { "meta": { "source": "Our World in Data — Capture fishery production (FAOSTAT, FAO of the UN)", "sourceUrl": "https://ourworldindata.org/grapher/capture-fishery-production", "item": "Capture fisheries production (wild catch, tonnes live weight)", "asOf": f"{latest_year} (latest full year in the FAOSTAT release)", "unitNote": "Values shown in million tonnes of wild capture (live weight).", "method": "Holt damped-trend exponential smoothing; 80% band from residual std scaled by sqrt(horizon).", }, "headline": { "latestYear": latest_year, "worldMilTonnes": round(world_latest / 1e6, 1), "peakYear": peak_year, "peakMilTonnes": round(float(world.loc[peak_year]) / 1e6, 1), "firstYear": first_year, "firstMilTonnes": round(float(world.loc[first_year]) / 1e6, 1), "forecastYear": FORECAST_TO, "forecastMilTonnes": fc["mean"][-1], }, "series": [ {"year": int(yr), "value": round(float(v) / 1e6, 3)} for yr, v in world.items() ], "forecast": fc, "bars": bars, } write_clean(OUT, payload) print(f"wrote {OUT.relative_to(HERE.parent.parent)}") print( f" world {latest_year}: {payload['headline']['worldMilTonnes']} Mt " f"(peak {peak_year}: {payload['headline']['peakMilTonnes']} Mt) " f"-> forecast {FORECAST_TO}: {fc['mean'][-1]} Mt" ) print(f" top nation: {bars[0]['label']} ({bars[0]['note']})") if __name__ == "__main__": main()