Source code for acept.uhp_csv_io

"""Module for writing and reading .csv files for Urban Heat Pro (UHP).

UHP expects .csv files in one of the following two formats:

.. code-block:: text

    information;on;the;file;content;...
    column1;column2;column3;column4;column5;...
    value1;value2;value3;value4;value5;...
    ...

.. code-block:: text

    column1;column2;column3;column4;column5;...
    unit1;unit2;unit3;unit4;unit5;...
    value1;value2;value3;value4;value5;...
    ...

Use this module to:
    - Write a DataFrame to a .csv file in the UHP format.
    - Read a .csv file in the UHP format and return a DataFrame.
    - Prepare the input .csv file with the buildings for UHP from a GeoDataFrame.
    - Save a GeoDataFrame with the buildings or a BBD query result to a .csv file in the format expected by UHP.

For buildings the format of the .csv file is:

.. code-block:: text

    field1;field2;field3;field4;field5;...
    value1;value2;value3;value4;value5;...
    ...

Buildings have the following required columns:
    - bid: ID of the building
    - area: The size of the area in m2
    - use: Use type of the building as a number from 0 to 3
    - free_walls: Number of free walls of the building
    - lat: Latitude in degrees
    - lon: Longitude in degrees
    - dist2hp: Distance to the heat pump in m
These additional columns are optional:
    - year_class: TABULAR construction year class of the building as a number
    - size_class: TABULAR size class of the building as a number
    - floors: Number of floors
    - occupants: Number of occupants
    - dwellings: Number of dwellings
    - ref_level_roof: Refurbishment level of the roof
    - ref_level_wall: Refurbishment level of the walls
    - ref_level_floor: Refurbishment level of the floors
    - ref_level_window: Refurbishment level of the windows
The refurbishment level is a number from 1 to 3:
    - 1 = National minimum requirement
    - 2 = Improved standard
    - 3 = Ambitious standard

"""

import os
from typing import Any

import geopandas as gpd
import pandas as pd
from pandas import DataFrame

from acept import acept_utils
from acept.acept_constants import TEMP_PATH
from acept.buildings_information import calculate_missing_uhp_building_fields
from acept.uhp_input_formatting import map_building_use_types_to_numbers, map_building_types_to_numeric_size_class, \
    map_construction_year_to_tabular_construction_year_class, map_tabular_construction_year_class_to_numbers, \
    map_refurbishment_levels_to_uhp_format



[docs]
def write_geopandas_to_uhp_csv(filepath: str, values_df: pd.DataFrame, first_row_header: list,
                               second_row_info: list = [], sep: str = ";") -> str:
    """ Writes a DataFrame to a .csv file in the format expected by UHP.

    Writes a .csv file to the given path with a header row, an optional second row with additional information on the
    data (e.g. units), followed by the data.

    :param filepath: Path where to save the .csv file.
    :param values_df: DataFrame to save. The column names in the DataFrame are not written to the file.
        The order of the columns has to be the same as in the header rows.
    :param first_row_header: List of column names to be written to the first line of the file.
    :param second_row_info: Optional list of information for each column written to the second line of the file.
    :param sep: Column seperator in the .csv file. Default: ';'.
    :return: Path to the saved file.
    """
    os.makedirs(acept_utils.uppath(filepath, 1), exist_ok=True)
    # header (and optional unit info)
    if not second_row_info:
        # list is empty
        column_info_df = pd.DataFrame([], columns=first_row_header)
    else:
        # list is not empty
        column_info_df = pd.DataFrame([second_row_info], columns=first_row_header)

    column_info_df.to_csv(filepath, mode='w', sep=sep, index=False, columns=first_row_header, header=True)
    values_df.to_csv(filepath, mode='a', sep=sep, index=False, header=False)
    return filepath




[docs]
def read_uhp_csv_to_dataframe(filepath: str, header_row: int | tuple[int, list] = 0, ignore_index: bool = True,
                              additional_info: int | None = -1, sep: str = ";") -> tuple[DataFrame, list[Any]]:
    """Reads a .csv file in the format expected by UHP.

    Reads a .csv file with a header row, an optional second row with additional information on the data (e.g. units),
    and returns a DataFrame with the data and a list of additional information if requested.
    The column names are optionally renamed.

    :param filepath: Path to the .csv file.
    :param header_row: Row number of the header row, or a tuple with the row number and a list of new column names.
        Default: 0.
    :param ignore_index: If True, the index is ignored. Default: True
    :param additional_info: Optional Integer indicating the row number of the row with additional information.
        If negative, the additional information is ignored. If None, no additional information exists. Default: -1.
    :param sep: Column seperator in the .csv file. Default: ';'.
    :raises ValueError: If header_row is not an int or a tuple of the form (int, list)
    :return: DataFrame with the data, and a list of additional information if requested.
    """
    if type(header_row) is int:
        df = pd.read_csv(filepath, sep=sep, header=header_row,
                         skiprows=[abs(additional_info)] if additional_info is not None else None,
                         index_col=False if ignore_index else None)
    elif type(header_row) is tuple and type(header_row[0]) is int and type(header_row[1]) is list:
        df = pd.read_csv(filepath, sep=sep,
                         skiprows=[header_row[0], abs(additional_info)] if additional_info is not None else [
                             header_row[0]],
                         names=header_row[1],
                         index_col=False if ignore_index else None)
    else:
        raise ValueError("header_row must be an int or a tuple of (int, list) is: " + str(type(header_row)))
    if additional_info < 0:
        additional_info_val = []
    else:
        additional_info_val = pd.read_csv(filepath, sep=sep, header=additional_info,
                                          index_col=False if ignore_index else None).columns.to_list()
    return df, additional_info_val




[docs]
def prepare_buildings_for_uhp_csv(area_id: str | int, buildings: gpd.GeoDataFrame, debug: bool = True) -> str:
    """Prepare buildings GeoDataFrame as input CSV for UrbanHeatPro.

    Add missing fields and map the values to the UHP format.
    Writes the result to a .csv file in the format used by UrbanHeatPro in the respective /temp directory.

    :param area_id: The area ID e.g. PLZ.
    :param buildings: The GeoDataFrame of buildings.
    :param debug: Whether to enable debug mode and print debug messages. Default: True.
    :raises ValueError: If the required column names are not in the buildings GeoDataFrame
    :return: The path to the saved UHP CSV file.
    """
    buildings, _ = calculate_missing_uhp_building_fields(buildings, debug)
    column_names = ['bid', 'area', 'use', 'free_walls', 'lat', 'lon', 'dist2hp']
    if not set(column_names).issubset(buildings.columns.to_list()):
        raise ValueError("Something is wrong with the input buildings. The required column names are not in "
                         "buildings.columns")

    # add optional fields and map the values to the UHP format
    map_building_use_types_to_numbers(buildings)
    map_building_types_to_numeric_size_class(buildings)
    map_construction_year_to_tabular_construction_year_class(buildings)
    map_tabular_construction_year_class_to_numbers(buildings)
    map_refurbishment_levels_to_uhp_format(buildings)

    return save_buildings_to_temp_uhp_csv(str(area_id), buildings, "All", debug)




[docs]
def save_buildings_to_temp_uhp_csv(plz_or_area_id: str, result_gdf: gpd.GeoDataFrame, building_use: str = "All",
                                   debug: bool = True) -> str:
    """
    Save the buildings or a BBD query result to a .csv file in the format used by UrbanHeatPro in the /temp directory.

    :param plz_or_area_id: The queried PLZ or the ID of the area the buildings belong to.
    :param building_use: Use type of the buildings in the BBD query, default: 'All' selects all use types. Possible: 'All',
        'Residential', 'Industrial', 'Commercial', 'Public', 'Non-Residential'.
    :param result_gdf: GeoDataFrame with all buildings in result_gdf.
    :param debug: Whether to enable debug mode and print debug messages. Default: True.
    :raises ValueError: if the required column names are not in the result_gdf
    :return: File path to the CSV file with the buildings.
    """
    # save result to file
    if building_use == "All":
        combined_filepath = os.path.join(TEMP_PATH, f"PLZ_{plz_or_area_id}", f"buildings_{plz_or_area_id}.csv")
    else:
        combined_filepath = os.path.join(TEMP_PATH, f"PLZ_{plz_or_area_id}",
                                         f"buildings_{plz_or_area_id}_{building_use}.csv")
    # recursively create output directory
    os.makedirs(acept_utils.uppath(combined_filepath, 1), exist_ok=True)
    # header of the csv file
    column_names = ['bid', 'area', 'use', 'free_walls', 'lat', 'lon', 'dist2hp', 'year_class', 'size_class', 'floors',
                    'dwellings', 'occupants', 'ref_level_roof', 'ref_level_wall', 'ref_level_floor', 'ref_level_window']
    # note: all column names are lowercase in the (PLZ) modified BBD
    print(result_gdf.columns.to_list())
    if not set(column_names[0:7]).issubset(result_gdf.columns.to_list()):
        raise ValueError("Something is wrong with the input result_gdf. The required column names are not in "
                         "result_gdf.columns")
    body_df = result_gdf[column_names]
    write_geopandas_to_uhp_csv(combined_filepath, body_df, column_names)
    if debug:
        print('  buildings or query BBD result as UHP input .csv file saved at: ' + combined_filepath)
    return combined_filepath