Skip to content

Module extract_load.brocolib_extract_load.processing

View Source
import pandas as pd

def transform(dataframe, numeric_col, date_col, date_format_str, keep_col=None):

  '''

  Function to transform dataframe

    - transpose dataframe

    - reset and drop the index of dataframe

    - set numeric columns to numeric

    - set date column to date

  Parameters:

    dataframe (pandas.DataFrame): A dataframe

    numeric_col(list): A list of column names containing numbers

    date_col (list): A list of column names containing dates

    date_format_str (str): A datetime formatting string

    keep_col (list): A list of column names to keep

  Returns:

    dataframe (pandas.DataFrame): Transformed Dataframe

  '''

  test_list = keep_col + numeric_col + date_col

  test_list_2 = numeric_col + date_col

  for col in test_list_2:

    if col not in keep_col:

      raise ValueError(f'{col} not in cols provided in keep_col')

  dataframe = dataframe.transpose()

  for col in test_list:

    if col not in dataframe.columns:

      raise ValueError(f'{col} not in dataframe')

  if keep_col:

    dataframe = dataframe[keep_col]

  dataframe[numeric_col] = dataframe[numeric_col].apply(pd.to_numeric)

  dataframe[date_col] = dataframe[date_col].apply(pd.to_datetime, format=date_format_str)

  dataframe = dataframe.reset_index(drop=True)

  return dataframe

Functions

transform

def transform(
    dataframe,
    numeric_col,
    date_col,
    date_format_str,
    keep_col=None
)

Function to transform dataframe

  • transpose dataframe
  • reset and drop the index of dataframe
  • set numeric columns to numeric
  • set date column to date

Parameters:

Name Type Description Default
dataframe pandas.DataFrame A dataframe None
numeric_col list A list of column names containing numbers None
date_col list A list of column names containing dates None
date_format_str str A datetime formatting string None
keep_col list A list of column names to keep None

Returns:

Type Description
None dataframe (pandas.DataFrame): Transformed Dataframe
View Source
def transform(dataframe, numeric_col, date_col, date_format_str, keep_col=None):

  '''

  Function to transform dataframe

    - transpose dataframe

    - reset and drop the index of dataframe

    - set numeric columns to numeric

    - set date column to date

  Parameters:

    dataframe (pandas.DataFrame): A dataframe

    numeric_col(list): A list of column names containing numbers

    date_col (list): A list of column names containing dates

    date_format_str (str): A datetime formatting string

    keep_col (list): A list of column names to keep

  Returns:

    dataframe (pandas.DataFrame): Transformed Dataframe

  '''

  test_list = keep_col + numeric_col + date_col

  test_list_2 = numeric_col + date_col

  for col in test_list_2:

    if col not in keep_col:

      raise ValueError(f'{col} not in cols provided in keep_col')

  dataframe = dataframe.transpose()

  for col in test_list:

    if col not in dataframe.columns:

      raise ValueError(f'{col} not in dataframe')

  if keep_col:

    dataframe = dataframe[keep_col]

  dataframe[numeric_col] = dataframe[numeric_col].apply(pd.to_numeric)

  dataframe[date_col] = dataframe[date_col].apply(pd.to_datetime, format=date_format_str)

  dataframe = dataframe.reset_index(drop=True)

  return dataframe