PIP library
FeatureMesh Python client library for executing FeatureQL queries using various database backends.
Installation
pip install featuremesh bash
Features
- Multiple Backend Support: DuckDB, Trino, BigQuery, DataFusion
- Offline & Online Modes: Execute queries on SQL backends or FeatureMesh serving
- Python Integration: Query results returned as DataFrames
- Jupyter Magic Commands: Execute queries directly in notebooks
Quick Start
Offline Client (Local SQL Execution)
Execute FeatureQL queries via local SQL backends like DuckDB:
from featuremesh import OfflineClient, Backend
import duckdb
# Create a SQL executor function for DuckDB
def query_duckdb(sql: str):
"""Execute SQL query and return results as DataFrame."""
conn = duckdb.connect(":memory:")
result = conn.sql(sql)
return result.df()
# Get your access token for your project on https://console.featuremesh.com/login (page settings)
__YOUR_ACCESS_TOKEN__ = "your_access_token"
# Create an offline client
client_offline = OfflineClient(
access_token=__YOUR_ACCESS_TOKEN__, # Can be None if no persistence is needed
backend=Backend.DUCKDB,
sql_executor=query_duckdb
)
# Execute a FeatureQL query
result = client_offline.query("""
WITH
FEATURE1 := INPUT(BIGINT)
SELECT
FEATURE1 := BIND_TABLE(ARRAY[1, 2, 3]),
FEATURE2 := FEATURE1 * 2
""")
# Access results
print(result.dataframe) # Pandas DataFrame
print(result.sql) # Translated SQL
print(result.success) # True if query succeeded python
Online Client (API Execution)
Execute FeatureQL queries via the FeatureMesh serving API:
from featuremesh import OnlineClient
# Create an online client
client_online = OnlineClient(access_token=__YOUR_ACCESS_TOKEN__)
# Execute a FeatureQL query
result = client_online.query("""
WITH
FEATURE1 := INPUT(BIGINT)
SELECT
FEATURE1 := BIND_TABLE(ARRAY[1, 2, 3]),
FEATURE2 := FEATURE1 * 2
""")
# Access results
print(result.dataframe) python
Translation Only
Translate FeatureQL to SQL without executing:
# Only available with OfflineClient
featureql_query = """
WITH
FEATURE1 := INPUT(BIGINT)
SELECT
FEATURE1 := BIND_TABLE(ARRAY[1, 2, 3]),
FEATURE2 := FEATURE1 * 2
"""
translate_result = client_offline.translate(featureql_query)
print(translate_result.sql) # Generated SQL
print(translate_result.success) # True if translation succeeded python
Jupyter Notebook Integration
Load the FeatureMesh magic extension in Jupyter:
%load_ext featuremesh python
Set a default client:
from featuremesh import set_default, OfflineClient, Backend
import duckdb
# Create SQL executor
def query_duckdb(sql: str):
return duckdb.sql(sql).df()
# Create and set default client
client = OfflineClient(
access_token=__YOUR_ACCESS_TOKEN__,
backend=Backend.DUCKDB,
sql_executor=query_duckdb
)
set_default("client", client) python
Execute queries using the %%featureql cell magic:
%%featureql
WITH
FEATURE1 := INPUT(BIGINT)
SELECT
FEATURE1 := BIND_TABLE(ARRAY[1, 2, 3]),
FEATURE2 := FEATURE1 * 2 python
| FEATURE1 | FEATURE2 |
|---|---|
| 1 | 2 |
| 2 | 4 |
| 3 | 6 |
Available Magic Options
--client CLIENT: Use a specific client variable from the notebook namespace--debug: Enable debug mode for detailed query information--show-sql: Print the translated SQL query--hide-dataframe: Hide the DataFrame output--show-slt: Print the SLT (SQL Logic Test) format--hook VARIABLE: Store complete results as a dictionary in a variable
Example:
%%featureql --client client_duckdb --show-sql --hook results
WITH
FEATURE1 := INPUT(BIGINT)
SELECT
FEATURE1 := BIND_TABLE(ARRAY[1, 2, 3]),
FEATURE2 := FEATURE1 * 2 python
Configuration
Configure default settings using set_default():
from featuremesh import set_default
# API endpoints
set_default("registry.host", "https://api.featuremesh.com")
set_default("registry.path", "/v1/featureql")
set_default("registry.timeout", 30)
# Display preferences
set_default("debug_mode", False)
set_default("show_sql", True)
# Get current settings
from featuremesh import get_default, get_all_defaults
debug_mode = get_default("debug_mode")
all_settings = get_all_defaults() python
Supported Backends
DuckDB
from featuremesh import OfflineClient, Backend
import duckdb
# Option 1: Using a persistent connection
_duckdb_conn = None
def get_duckdb_conn(storage_path: str = ":memory:"):
"""Get or create a DuckDB connection."""
global _duckdb_conn
if _duckdb_conn is None:
_duckdb_conn = duckdb.connect(storage_path)
return _duckdb_conn
def query_duckdb(sql: str, storage_path: str = ":memory:"):
"""Execute SQL query and return results as DataFrame."""
conn = get_duckdb_conn(storage_path)
result = conn.sql(sql)
return result.df()
client = OfflineClient(
access_token=__YOUR_ACCESS_TOKEN__,
backend=Backend.DUCKDB,
sql_executor=query_duckdb
)
# Option 2: Simple in-memory executor
def simple_duckdb_executor(sql: str):
return duckdb.sql(sql).df()
client = OfflineClient(
access_token=__YOUR_ACCESS_TOKEN__,
backend=Backend.DUCKDB,
sql_executor=simple_duckdb_executor
) python
Trino
from featuremesh import OfflineClient, Backend
import pandas as pd
import trino.dbapi
def query_trino(sql: str):
"""Execute SQL query on Trino and return results as DataFrame."""
# Configure your Trino connection details
conn = trino.dbapi.connect(
host="localhost", # or host.docker.internal for docker
port=8080,
user="admin",
catalog="memory",
schema="default"
)
cur = conn.cursor()
cur.execute(sql)
# Fetch results
cols = cur.description
rows = cur.fetchall()
if len(rows) > 0:
df = pd.DataFrame(rows, columns=[col[0] for col in cols])
return df
else:
return pd.DataFrame()
client = OfflineClient(
access_token=__YOUR_ACCESS_TOKEN__,
backend=Backend.TRINO,
sql_executor=query_trino
)
# For production with OAuth2 authentication:
import trino.auth
def query_trino_oauth(sql: str):
"""Execute SQL query on Trino with OAuth2 authentication."""
conn = trino.dbapi.connect(
host="trino.your-domain.com",
port=443,
user="your-username",
catalog="your-catalog",
schema="default",
http_scheme="https",
auth=trino.auth.OAuth2Authentication()
)
cur = conn.cursor()
cur.execute(sql)
cols = cur.description
rows = cur.fetchall()
if len(rows) > 0:
return pd.DataFrame(rows, columns=[col[0] for col in cols])
return pd.DataFrame() python
BigQuery
from featuremesh import OfflineClient, Backend
from google.cloud import bigquery
def query_bigquery(sql: str):
"""Execute SQL query on BigQuery and return results as DataFrame."""
client = bigquery.Client(project=__YOUR_PROJECT_ID__)
return client.query(sql).to_dataframe()
client = OfflineClient(
access_token=__YOUR_ACCESS_TOKEN__,
backend=Backend.BIGQUERY,
sql_executor=query_bigquery
) python
Error Handling
All operations return result objects with error information:
result = client.query("""
WITH
FEATURE1 := INPUT(BIGINT)
SELECT
FEATURE1 := BIND_TABLE(ARRAY[1, 2, 3]),
FEATURE2 := FEATURE1 * 2
""")
if result.success:
print("Query succeeded!")
print(result.dataframe)
else:
print("Query failed!")
for error in result.errors:
print(f"Error [{error.code}]: {error.message}")
if error.context:
print(f"Context: {error.context}") python
Display utilities are also available:
from featuremesh import display_errors, display_warnings
display_errors(result.errors)
display_warnings(result.warnings) python
Debug Mode
Enable debug mode to see detailed translation information:
result = client.query("""
WITH
FEATURE1 := INPUT(BIGINT)
SELECT
FEATURE1 := BIND_TABLE(ARRAY[1, 2, 3]),
FEATURE2 := FEATURE1 * 2
""", debug_mode=True)
if result.debug_logs:
print(result.debug_logs) python
Result Objects
QueryResult
Returned by client.query():
@dataclass
class QueryResult:
featureql: str # Original FeatureQL query
sql: Optional[str] # Translated SQL
dataframe: Optional[pd.DataFrame] # Query results
slt: Optional[str] # SLT format (offline only)
warnings: list[Warning] # Non-blocking warnings
errors: list[Error] # Errors that occurred
backend: Optional[str] # Backend used
debug_mode: bool # Debug mode enabled
debug_logs: Optional[dict] # Debug information
client_type: str # "OfflineClient" or "OnlineClient"
success: bool # Property: True if no errors python
TranslateResult
Returned by client.translate() (OfflineClient only):
@dataclass
class TranslateResult:
featureql: str # Original FeatureQL query
sql: Optional[str] # Translated SQL
warnings: list[Warning] # Non-blocking warnings
errors: list[Error] # Errors that occurred
full_response: Optional[dict] # Full API response
backend: Optional[str] # Backend used
debug_mode: bool # Debug mode enabled
debug_logs: Optional[dict] # Debug information
client_type: str # "OfflineClient"
success: bool # Property: True if no errors python
Version
Current version: See featuremesh.__version__
import featuremesh
print(featuremesh.__version__) python