""" China's green sanctuaries study: analysis pipeline. Reads the raw World Bank "Forest area (% of land area)" series for China (indicator AG.LND.FRST.ZS, sourced from FAO), cleans it into a continuous yearly series, derives milestone-decade bars, and forecasts forest cover percent through 2030 with a Holt linear-trend model. Output: ../../src/data/china-green/clean.json (consumed by the static site) Run: cd analysis .venv/bin/python china-green/pipeline.py Source: World Bank, "Forest area (% of land area)" (AG.LND.FRST.ZS) for China, data sourced from the Food and Agriculture Organization (FAO). https://data.worldbank.org/indicator/AG.LND.FRST.ZS?locations=CN """ import json from pathlib import Path import pandas as pd import sys sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from _lib import holt_forecast, write_clean HERE = Path(__file__).parent RAW = HERE / "raw" / "china-forest-area.json" OUT = HERE.parent.parent / "src" / "data" / "china-green" / "clean.json" FORECAST_TO = 2030 MILESTONE_YEARS = (1990, 2000, 2010, 2020) def load() -> pd.Series: """Parse the World Bank JSON array [metadata, data[]] into a yearly series.""" payload = json.loads(RAW.read_text()) rows = payload[1] records = [ (int(r["date"]), float(r["value"])) for r in rows if r["value"] is not None ] df = pd.DataFrame(records, columns=["year", "value"]).sort_values("year") return df.set_index("year")["value"] def nearest_year(series: pd.Series, target: int) -> int: return int(min(series.index, key=lambda y: abs(y - target))) def main() -> None: series = load() first_year = int(series.index[0]) latest_year = int(series.index[-1]) first_pct = round(float(series.loc[first_year]), 2) forest_pct = round(float(series.loc[latest_year]), 2) fc = holt_forecast(series.values, latest_year, FORECAST_TO, ndigits=2, scale=1.0) # --- Milestone-decade bars --- base_year = nearest_year(series, MILESTONE_YEARS[0]) base_pct = float(series.loc[base_year]) bars = [] for target in MILESTONE_YEARS: yr = nearest_year(series, target) pct = round(float(series.loc[yr]), 2) if yr == base_year: note = "baseline" else: note = f"+{round(pct - base_pct, 1)} pts since {base_year}" bars.append({"label": str(yr), "value": pct, "note": note}) payload = { "meta": { "source": "World Bank — Forest area (% of land area), China (data sourced from FAO)", "sourceUrl": "https://data.worldbank.org/indicator/AG.LND.FRST.ZS?locations=CN", "item": "Forest area as a percent of land area, China (AG.LND.FRST.ZS)", "asOf": f"{latest_year} (latest year in the World Bank release)", "unitNote": "Values shown as percent of total land area under forest.", "method": "Holt linear-trend exponential smoothing; 80% band from residual std scaled by sqrt(horizon).", }, "headline": { "latestYear": latest_year, "forestPct": forest_pct, "firstYear": first_year, "firstPct": first_pct, "forecastYear": FORECAST_TO, "forecastPct": fc["mean"][-1], "gainPts": round(forest_pct - first_pct, 1), }, "series": [ {"year": int(yr), "value": round(float(v), 2)} for yr, v in series.items() ], "forecast": fc, "bars": bars, } write_clean(OUT, payload) print(f"wrote {OUT.relative_to(HERE.parent.parent)}") print( f" China {first_year}: {first_pct}% -> {latest_year}: {forest_pct}% " f"(+{payload['headline']['gainPts']} pts)" ) print( f" forecast {FORECAST_TO}: {fc['mean'][-1]}% " f"(80% {fc['lower'][-1]}–{fc['upper'][-1]})" ) if __name__ == "__main__": main()