Module extract_load.brocolib_extract_load.ingest
View Source
import pandas as pd
import requests as rq
def extract(url, source_type, params={}, nested_key=None):
'''
Function to extract data
- read json from url
- convert json to dataframe
Parameters:
url (str): url of the data source
source_type (str): type of the data to fetch
params (dict) : request parameters
Returns:
(pandas.DataFrame): Dataframe created from source
Exceptions:
NotImplementedError: if the source type is not implemented
'''
source_type = source_type.lower()
if source_type == 'json':
if nested_key:
response = rq.get(url, params=params)
data = response.json()[nested_key]
return pd.DataFrame(data)
else:
response = rq.get(url, params=params)
data = response.json()
return pd.DataFrame(data)
else:
raise NotImplementedError("sources available: json")
Functions
extract
def extract(
url,
source_type,
params={},
nested_key=None
)
Function to extract data
- read json from url
- convert json to dataframe
Parameters:
Name | Type | Description | Default |
---|---|---|---|
url | str | url of the data source | None |
source_type | str | type of the data to fetch | None |
params | dict | request parameters | None |
Returns:
Type | Description |
---|---|
(pandas.DataFrame) | Dataframe created from source |
Raises:
Type | Description |
---|---|
NotImplementedError | if the source type is not implemented |
View Source
def extract(url, source_type, params={}, nested_key=None):
'''
Function to extract data
- read json from url
- convert json to dataframe
Parameters:
url (str): url of the data source
source_type (str): type of the data to fetch
params (dict) : request parameters
Returns:
(pandas.DataFrame): Dataframe created from source
Exceptions:
NotImplementedError: if the source type is not implemented
'''
source_type = source_type.lower()
if source_type == 'json':
if nested_key:
response = rq.get(url, params=params)
data = response.json()[nested_key]
return pd.DataFrame(data)
else:
response = rq.get(url, params=params)
data = response.json()
return pd.DataFrame(data)
else:
raise NotImplementedError("sources available: json")