Data Cleaning

Data CleaningΒΆ

# Import packages
import pandas as pd
from tools import utils
# Load data
data = pd.read_csv("data/Algerian_forest_fires_dataset_UPDATE.csv")
# Text replacement
data["Bejaia Region Dataset "] = (data["Bejaia Region Dataset "]
                                  .str.replace(r'\s', '', regex=True))
data = data.reset_index()
# Specifiy column names and correct index
column_names = data.iloc[0, :].str.replace(r'\s', '', regex=True).values
header_index = data[data["Bejaia Region Dataset "] == "Classes"].index
# Put cleaned data together
cleaned = utils.restructure_data(data, header_index, column_names)
print(f"Cleaned data have shape {cleaned.shape}")
cleaned.head()
Cleaned data have shape (243, 15)
day month year Temperature RH Ws Rain FFMC DMC DC ISI BUI FWI Classes Region
1 01 06 2012 29 57 18 0 65.7 3.4 7.6 1.3 3.4 0.5 notfire Bejaia
2 02 06 2012 29 61 13 1.3 64.4 4.1 7.6 1 3.9 0.4 notfire Bejaia
3 03 06 2012 26 82 22 13.1 47.1 2.5 7.1 0.3 2.7 0.1 notfire Bejaia
4 04 06 2012 25 89 13 2.5 28.6 1.3 6.9 0 1.7 0 notfire Bejaia
5 05 06 2012 27 77 16 0 64.8 3 14.2 1.2 3.9 0.5 notfire Bejaia
# Output cleaned data
cleaned.to_csv("data/Algerian_forest_fires_dataset_CLEANED.csv")