This software is licensed under the terms of the Apache Licence Version 2.0 which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. In applying this licence, ECMWF does not waive the privileges and immunities granted to it by virtue of its status as an intergovernmental organisation nor does it submit to any jurisdiction.

POF Forecast¶

Importing all the necessary python libraries

import xarray as xr
import numpy as np
import pandas as pd
import joblib
from pathlib import Path
import cftime
from xarray.coding.times import CFDatetimeCoder
import os
import cdsapi
import zipfile

Configuration Options

Here we define the paths for files and the dates we will use for our forecast.

We also load the model we created in the Trainer notebook

# Folder structure
base_path = Path("./data/")
os.makedirs("./outputs/", exist_ok=True)

# Dates of Forecast
year = 2019
years=[year]
months = [12]
cds_months=[f'{m:02d}' for m in months]
cds_days= [f"{d:02d}" for d in range(1, 32)]

# Where we will store our predictions
output_file = f"./outputs/POF_prediction_{year}_{months[0]:02d}.nc"

# Load trained model
model = joblib.load("./data/POF_model.joblib")


cds_kwargs = {
    'url': 'https://cds.climate.copernicus.eu/api',
    'key': None
}

xds_kwargs = {
    'url': 'https://xds-preprod.ecmwf.int/api',
    'key': None,
}

for year in years:
    for month in cds_months: 
        for shortname,var in [["T2M","2m_temperature"],
                      ["D2M","2m_dewpoint_temperature"],
                      ["10U","10m_u_component_of_wind"],
                      ["10V","10m_v_component_of_wind"],
                      ["P","total_precipitation"]]:

            dataset = "reanalysis-era5-land"
            request = {
                "variable": [
                    var,
                ],
                "year": year,
                "month": month,
                "day": cds_days,
                "time": [
                    "00:00"
                ],
                "data_format": "netcdf",
                "download_format": "unarchived"
            }
            client = cdsapi.Client(**cds_kwargs)
            if not os.path.exists(f"data/{shortname}_{year}_{month}.nc"):
                client.retrieve(dataset, request, target=f"data/{shortname}_{year}_{month}.nc")
        if not os.path.exists(f"data/WS_{year}_{month}.nc"):
            with xr.open_dataset(f"data/10U_{year}_{month}.nc") as dsu,\
                xr.open_dataset(f"data/10V_{year}_{month}.nc") as dsv:
                dsw=dsu.u10**2+ dsv.v10**2
                dsw=dsw.rename("ws")
                dsw.to_netcdf(f"data/WS_{year}_{month}.nc")
                del dsw

        if not os.path.exists(f"data/RH_{year}_{month}.nc"):
            with xr.open_dataset(f"data/T2M_{year}_{month}.nc") as dst,\
                xr.open_dataset(f"data/D2M_{year}_{month}.nc") as dsd:
                es = 10**(7.5*(dst.t2m-273.15)/(237.7+(dst.t2m-273.15))) * 6.11
                e =  10**(7.5*(dsd.d2m-273.15)/(237.7+(dsd.d2m-273.15))) * 6.11
                dsrh = (e/es)*100
                dsrh = dsrh.rename("rh")
                dsrh.to_netcdf(f"data/RH_{year}_{month}.nc")
                del dsrh

This section will download data from the XDS-Preprod.

The data is large and so may take some time to download.

lat_lon_data = xr.open_dataset(f"data/T2M_{years[0]}_{cds_months[0]}.nc").sortby("latitude").sortby("longitude")
lat_lon_data=lat_lon_data.rename(valid_time="time",latitude='lat',longitude='lon')
lat_lon_data=lat_lon_data.drop_vars(['expver','number'])

for year in years:
    for month in cds_months: 
        for shortname, var in \
            [["DFMC_MAP","dead_fuel_moisture_content_group"],
                ["LFMC_MAP","live_fuel_moisture_content_group"],
                ["FUEL_MAP","fuel_group"]]:


            dataset = "derived-fire-fuel-biomass"
            request = {
                "variable": [
                    var,
                ],
                "version": ["1"],
                "year": [
                    f'{year}'
                ],
                "month": [
                    month
                ]
            }

            client = cdsapi.Client(**xds_kwargs)
            if not os.path.exists(f"data/{shortname}_{year}_{month}_R.nc"):
                client.retrieve(dataset, request, target=f"data/{shortname}_{year}_{month}.zip")

                with zipfile.ZipFile(f"data/{shortname}_{year}_{month}.zip", 'r') as zip_ref:
                    zip_ref.extractall("data/")
                print(f"Regridding {shortname} for {year}-{month}")
                with xr.open_dataset(f"data/{shortname}_{year}_{month}.nc") as ds_temp:
                    ds_regrid=ds_temp.interp_like(lat_lon_data, method="linear")
                    ds_regrid.to_netcdf(f"data/{shortname}_{year}_{month}_R.nc")
                    del ds_regrid

2026-02-18 11:54:29,743 INFO Request ID is 936974c5-114c-4049-8cd3-4f48a9b130f8
2026-02-18 11:54:29,855 INFO status has been updated to accepted
2026-02-18 11:54:30,990 INFO status has been updated to running
2026-02-18 11:54:32,567 INFO status has been updated to successful

Regridding DFMC_MAP for 2019-12

2026-02-18 12:15:27,282 INFO Request ID is 2fe4b605-3d27-4a7c-81ff-80b9a4e576dd
2026-02-18 12:15:27,395 INFO status has been updated to running
2026-02-18 12:15:28,536 INFO status has been updated to successful

Regridding LFMC_MAP for 2019-12

2026-02-18 12:26:31,715 INFO Request ID is ca1a8c9c-a164-41bf-af0a-a1455994ca8b
2026-02-18 12:26:31,840 INFO status has been updated to accepted
2026-02-18 12:26:32,974 INFO status has been updated to running
2026-02-18 12:26:34,558 INFO status has been updated to successful

Regridding FUEL_MAP for 2019-12

Load Static Data

We will load some CLIMATE data files

# Open the Climate files
time_coder = CFDatetimeCoder(use_cftime=True)
PO = xr.open_dataset(base_path / "CLIMATE/POP_2020.nc")
RD = xr.open_dataset(base_path / "CLIMATE/road_density_2015_agg_r.nc")

# Extract the arrays
PO_arr = PO.population_density.values  
RD_arr = RD.road_length.values

Load Dynamic Data

Here we open the data files we retrieved from the XDS and the CDS as well as the Active Fire Maps

We iterate through each day within the dataset and store the into an output NetCDF file.

for year in years:
    for month in months:
        # Define the paths of the files
        ds_paths = {
            "AF": f"ACTIVE_FIRE_MAP_{year}_{month:02d}_R.nc",
            "FU": f"FUEL_MAP_{year}_{month:02d}_R.nc",
            "DF": f"DFMC_MAP_{year}_{month:02d}_R.nc",
            "LF": f"LFMC_MAP_{year}_{month:02d}_R.nc",
            "PR": f"P_{year}_{month:02d}.nc",
            "T2": f"T2M_{year}_{month:02d}.nc",
            "RH": f"RH_{year}_{month:02d}.nc",
            "WS": f"WS_{year}_{month:02d}.nc",
        }
        # Open all dynamic datasets
        with xr.open_dataset(base_path / ds_paths["FU"]) as FU, \
            xr.open_dataset(base_path / ds_paths["DF"]) as DF, \
            xr.open_dataset(base_path / ds_paths["LF"]) as LF, \
            xr.open_dataset(base_path / ds_paths["PR"]) as PR, \
            xr.open_dataset(base_path / ds_paths["T2"]) as T2, \
            xr.open_dataset(base_path / ds_paths["RH"]) as RH, \
            xr.open_dataset(base_path / ds_paths["WS"]) as WS, \
            xr.open_dataset(base_path / ds_paths["AF"]) as AF:

            n_days = len(AF.ACTIVE_FIRE)
            all_grids = []

            for i in range(n_days):
                # Extract arrays for timestep i
                FU_LL = FU.Live_Leaf[i].values
                FU_LW = FU.Live_Wood[i].values
                FU_DF = FU.Dead_Foliage[i].values
                FU_DW = FU.Dead_Wood[i].values
                DF_ = DF.DFMC_Foliage[i].values
                DW_ = DF.DFMC_Wood[i].values
                LF_ = LF.LFMC[i].values
                PR_ = PR.tp[i].values
                T2_ = T2.t2m[i].values
                RH_ = RH.rh[i].values
                WS_ = WS.ws[i].values

                # Mask where total fuel > 0
                ft = FU_LL + FU_LW + FU_DF + FU_DW
                mask = ft > 0

                # Flatten and build dataframe for prediction
                feature_arrays = {
                    "PR": PR_[mask],
                    "T2": T2_[mask],
                    "RH": RH_[mask],
                    "WS": WS_[mask],
                    "FU_LL": FU_LL[mask],
                    "FU_LW": FU_LW[mask],
                    "FU_DF": FU_DF[mask],
                    "FU_DW": FU_DW[mask],
                    "DF": DF_[mask],
                    "DW": DW_[mask],
                    "LF": LF_[mask],
                    "PO": PO_arr[mask],
                    "RD": RD_arr[mask],
                }
                X_pred = pd.DataFrame(feature_arrays)

                # Predict probability
                y_proba = model.predict_proba(X_pred)[:, 1]

                # Create full grid and fill masked values
                fire_prob_grid = np.full(ft.shape, np.nan, dtype=float)
                fire_prob_grid[mask] = y_proba
                all_grids.append(fire_prob_grid)

            # Stack all timesteps into 3D array (time, lat, lon)
            fire_prob_array = np.stack(all_grids, axis=0)
            
            # -----------------------------
            # SAVE TO NETCDF
            # -----------------------------
            time = [cftime.DatetimeJulian(year, month, day+1) for day in range(n_days)]

            ds_out = xr.Dataset(
                {"fire_probability": (["time", "latitude", "longitude"], fire_prob_array)},
                coords={
                    "time": time,
                    "latitude": PO.latitude,
                    "longitude": PO.longitude,
                }
            )
            os.remove(output_file) if os.path.exists(output_file) else None
            ds_out.to_netcdf(output_file)
            print(f"✅ Prediction saved → {output_file}")

✅ Prediction saved → ./outputs/POF_prediction_2019_12.nc