""" Pine & timber study: analysis pipeline. Reads raw FAOSTAT Forestry data (Roundwood, Production, m3), cleans it, derives the global trend and producer concentration, and forecasts world roundwood production through 2030 with a Holt linear-trend model. Output: ../../src/data/pine/clean.json (consumed by the static site) Run: cd analysis .venv/bin/python pine/pipeline.py Source: FAOSTAT Forestry (FAO of the UN). Item: Roundwood (item code 1861). Element: Production (element code 5516). Unit: m3. Bulk file "Forestry_E_All_Data_(Normalized)", downloaded 2026-06-22. https://www.fao.org/faostat/en/#data/FO Note on the data: this CSV was DOWNLOADED from the FAOSTAT bulk endpoint (bulks-faostat.fao.org), not transcribed by hand. "Roundwood" is the broadest FAOSTAT wood category and includes both industrial roundwood and woodfuel. """ from pathlib import Path import pandas as pd import sys sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from _lib import holt_forecast, write_clean HERE = Path(__file__).parent RAW = HERE / "raw" / "roundwood-production-faostat.csv" OUT = HERE.parent.parent / "src" / "data" / "pine" / "clean.json" FORECAST_TO = 2030 # FAOSTAT Area Codes >= 5000 are regional / income-group aggregates. # Area code 351 is "China" (an aggregate of mainland + Hong Kong + Macao + # Taiwan); we keep "China, mainland" (41) instead to avoid double counting. AGG_MIN_CODE = 5000 CHINA_AGG_CODE = 351 WORLD_CODE = 5000 def load() -> pd.DataFrame: df = pd.read_csv(RAW) df = df.rename( columns={ "Area Code": "area_code", "Area": "entity", "Year": "year", "Value": "m3", } ) df = df[["area_code", "entity", "year", "m3"]].dropna(subset=["m3"]) df["year"] = df["year"].astype(int) df["area_code"] = df["area_code"].astype(int) return df def is_country(area_code: int) -> bool: return area_code < AGG_MIN_CODE and area_code != CHINA_AGG_CODE def world_series(df: pd.DataFrame) -> pd.Series: w = df[df["area_code"] == WORLD_CODE].set_index("year")["m3"].sort_index() return w def main() -> None: df = load() world = world_series(df) latest_year = int(world.index[-1]) first_year = int(world.index[0]) world_latest = float(world.loc[latest_year]) # --- Top producing countries, latest year --- latest = df[(df["year"] == latest_year) & (df["area_code"].map(is_country))] top = ( latest.sort_values("m3", ascending=False) .head(10)[["entity", "m3"]] .reset_index(drop=True) ) bars = [ { "label": r.entity.replace("China, mainland", "China"), "value": round(r.m3 / 1e6, 1), "note": f"{round(100 * r.m3 / world_latest, 1)}%", } for r in top.itertuples() ] fc = holt_forecast(world.values, latest_year, FORECAST_TO, ndigits=1, scale=1e6) payload = { "meta": { "source": "FAOSTAT Forestry (FAO of the UN), Roundwood, Production", "sourceUrl": "https://www.fao.org/faostat/en/#data/FO", "item": "Roundwood (item 1861), Production, m3", "asOf": f"{latest_year} (FAOSTAT bulk file downloaded 2026-06-22)", "unitNote": "Values shown in million cubic metres (M m3) of roundwood.", "method": "Holt linear-trend exponential smoothing; 80% band from residual std scaled by sqrt(horizon).", }, "headline": { "latestYear": latest_year, "worldMilM3": round(world_latest / 1e6, 0), "growthSince": first_year, "growthFactor": round(world_latest / float(world.loc[first_year]), 1), "topProducer": bars[0]["label"], "topProducerSharePct": round(100 * float(top.iloc[0]["m3"]) / world_latest, 1), "forecastYear": FORECAST_TO, "forecastMilM3": fc["mean"][-1], }, "series": [ {"year": int(yr), "value": round(float(v) / 1e6, 1)} for yr, v in world.items() ], "forecast": fc, "bars": bars, } write_clean(OUT, payload) print(f"wrote {OUT.relative_to(HERE.parent.parent)}") print( f" world {latest_year}: {payload['headline']['worldMilM3']} M m3 " f"-> forecast {FORECAST_TO}: {fc['mean'][-1]} M m3" ) print( f" top producer: {bars[0]['label']} ({bars[0]['note']}); " f"growth x{payload['headline']['growthFactor']} since {first_year}" ) if __name__ == "__main__": main()