Skip to content

Module extract_load.tests.test_datalake

View Source
# contents of test_app.py, a simple test for our API retrieval

from datetime import datetime

import pytest

import pandas as pd

from requests import get

from ..brocolib_extract_load import datalake

DEFAULT_BUCKET = "test-bucket"

DEFAULT_FILE = "test_file"

DEFAULT_SUBFOLDERS = "macro"

DEFAULT_TABLE_FOLDER = "table_folder"

DEFAULT_BLOB_NAME = f"{DEFAULT_SUBFOLDERS}/{DEFAULT_TABLE_FOLDER}/{DEFAULT_FILE}"

DEFAULT_DBT_TOPIC = "test_dbt_topic"

DEFAULT_GCP_PROJECT = "default-gcp-project"

DEFAULT_PARTITION_KEYS = {"year":"","month":""}

## -----| Fixtures |-----

# custom class to be the mock pandas.DataFrame.to_*() methods

class MockPandas:

    @staticmethod

    def json():

        return None

# fixture monkeypatch

@pytest.fixture

def mock_pandas(monkeypatch):

    """Requests.get() mocked to return {'mock_key':'mock_response'}."""

    def mock_get(*args, **kwargs):

        return MockPandas()

    monkeypatch.setattr(pd.DataFrame, "to_csv", mock_get)

    monkeypatch.setattr(pd.DataFrame, "to_parquet", mock_get)

    monkeypatch.setattr(pd.DataFrame, "to_json", mock_get)

@pytest.fixture(params=["get_dummy_df", "get_empty_df"])

def matrix_df(request):

    df_fixture = request.getfixturevalue(request.param)

    return df_fixture

@pytest.fixture(params=["csv", "parquet", "json"])

def matrix_format(request):

    return request.param

## -----| Tests |-----

def test_dataframe_to_bucket(mock_pandas, matrix_df, matrix_format):

    result = datalake.dataframe_to_bucket(

        dataframe=matrix_df,

        bucket_name=DEFAULT_BUCKET,

        blob_name=DEFAULT_BLOB_NAME,

        file_type=matrix_format

    )

    assert result == f"gs://{DEFAULT_BUCKET}/{DEFAULT_BLOB_NAME}.{matrix_format}"

def test_external_table(mock_pandas):

    external_table = datalake.ExternalTable(

        bucket_name=DEFAULT_BUCKET,

        partition_keys=DEFAULT_PARTITION_KEYS,

        bucket_file=DEFAULT_FILE,

        bucket_table_directory=DEFAULT_TABLE_FOLDER,

        bucket_directory=DEFAULT_SUBFOLDERS,

        dbt_topic=DEFAULT_DBT_TOPIC,

        gcp_project=DEFAULT_GCP_PROJECT,

    )

    now = datetime.now()

    path_prefix = f'{DEFAULT_SUBFOLDERS}/{DEFAULT_TABLE_FOLDER}'

    path_partitions = f'year={now.year}/month={now.month}'

    assert external_table.add_partition_keys(path_prefix) == f'{path_prefix}/{path_partitions}'

    assert external_table.format_filename() == f'{path_prefix}/{path_partitions}/{DEFAULT_FILE}_{str(now.day)}'

Variables

DEFAULT_BLOB_NAME
DEFAULT_BUCKET
DEFAULT_DBT_TOPIC
DEFAULT_FILE
DEFAULT_GCP_PROJECT
DEFAULT_PARTITION_KEYS
DEFAULT_SUBFOLDERS
DEFAULT_TABLE_FOLDER

Functions

matrix_df

def matrix_df(
    request
)
View Source
@pytest.fixture(params=["get_dummy_df", "get_empty_df"])

def matrix_df(request):

    df_fixture = request.getfixturevalue(request.param)

    return df_fixture

matrix_format

def matrix_format(
    request
)
View Source
@pytest.fixture(params=["csv", "parquet", "json"])

def matrix_format(request):

    return request.param

mock_pandas

def mock_pandas(
    monkeypatch
)

Requests.get() mocked to return {'mock_key':'mock_response'}.

View Source
@pytest.fixture

def mock_pandas(monkeypatch):

    """Requests.get() mocked to return {'mock_key':'mock_response'}."""

    def mock_get(*args, **kwargs):

        return MockPandas()

    monkeypatch.setattr(pd.DataFrame, "to_csv", mock_get)

    monkeypatch.setattr(pd.DataFrame, "to_parquet", mock_get)

    monkeypatch.setattr(pd.DataFrame, "to_json", mock_get)

test_dataframe_to_bucket

def test_dataframe_to_bucket(
    mock_pandas,
    matrix_df,
    matrix_format
)
View Source
def test_dataframe_to_bucket(mock_pandas, matrix_df, matrix_format):

    result = datalake.dataframe_to_bucket(

        dataframe=matrix_df,

        bucket_name=DEFAULT_BUCKET,

        blob_name=DEFAULT_BLOB_NAME,

        file_type=matrix_format

    )

    assert result == f"gs://{DEFAULT_BUCKET}/{DEFAULT_BLOB_NAME}.{matrix_format}"

test_external_table

def test_external_table(
    mock_pandas
)
View Source
def test_external_table(mock_pandas):

    external_table = datalake.ExternalTable(

        bucket_name=DEFAULT_BUCKET,

        partition_keys=DEFAULT_PARTITION_KEYS,

        bucket_file=DEFAULT_FILE,

        bucket_table_directory=DEFAULT_TABLE_FOLDER,

        bucket_directory=DEFAULT_SUBFOLDERS,

        dbt_topic=DEFAULT_DBT_TOPIC,

        gcp_project=DEFAULT_GCP_PROJECT,

    )

    now = datetime.now()

    path_prefix = f'{DEFAULT_SUBFOLDERS}/{DEFAULT_TABLE_FOLDER}'

    path_partitions = f'year={now.year}/month={now.month}'

    assert external_table.add_partition_keys(path_prefix) == f'{path_prefix}/{path_partitions}'

    assert external_table.format_filename() == f'{path_prefix}/{path_partitions}/{DEFAULT_FILE}_{str(now.day)}'

Classes

MockPandas

class MockPandas(
    /,
    *args,
    **kwargs
)
View Source
class MockPandas:

    @staticmethod

    def json():

        return None

Static methods

json

def json(

)
View Source
    @staticmethod

    def json():

        return None