Exporting Data
Export your Carbon Arc data to CSV, Excel, JSON, Parquet, and databases.
Setup
import pandas as pd
from carbonarc import CarbonArcClient
import os
client = CarbonArcClient(
host="https://api.carbonarc.co",
token="YOUR_API_TOKEN"
)
# Get some data to export
datasets = client.data.get_datasets()
df = pd.DataFrame(datasets.get("datasources", []))
Export Formats
CSV
Best for: Excel compatibility, simple data sharing, universal format.
# Basic export
df.to_csv("data_export.csv", index=False)
# With custom options
df.to_csv(
"data_export_custom.csv",
index=False,
encoding="utf-8-sig", # Excel-friendly UTF-8
date_format="%Y-%m-%d", # Consistent date format
float_format="%.2f", # 2 decimal places
na_rep="NULL" # Replace NaN with "NULL"
)
# Compressed (great for large datasets)
df.to_csv("data_export.csv.gz", index=False, compression="gzip")
Excel
Best for: Business users, formatted reports, multiple sheets.
pip install openpyxl
# Basic export
df.to_excel("data_export.xlsx", index=False, sheet_name="Data")
# Multiple sheets in one workbook
with pd.ExcelWriter("multi_sheet_export.xlsx", engine="openpyxl") as writer:
df.to_excel(writer, sheet_name="All Data", index=False)
df.head(5).to_excel(writer, sheet_name="Sample", index=False)
# Summary sheet
summary = pd.DataFrame({
"Metric": ["Total Rows", "Total Columns", "Export Date"],
"Value": [len(df), len(df.columns), pd.Timestamp.now().strftime("%Y-%m-%d")]
})
summary.to_excel(writer, sheet_name="Summary", index=False)
JSON
Best for: APIs, web apps, nested data, programmatic access.
import json
# Records format (list of dicts) - most common
df.to_json("data_export.json", orient="records", indent=2)
# Other orientations
df.to_json("data_columns.json", orient="columns", indent=2) # Column-based
df.to_json("data_index.json", orient="index", indent=2) # Row index as keys
# JSON Lines (one object per line) - great for streaming
df.to_json("data_export.jsonl", orient="records", lines=True)
Parquet
Best for: Large data assets, data pipelines, analytics, cloud storage.
pip install pyarrow
# Basic export (highly compressed, preserves types)
df.to_parquet("data_export.parquet", index=False)
# With compression options
df.to_parquet("data_snappy.parquet", compression="snappy") # Fast
df.to_parquet("data_gzip.parquet", compression="gzip") # Smaller
# Partitioned (for very large datasets)
df.to_parquet("data_partitioned/", partition_cols=["column_name"])
Parquet Benefits
- 10-100x smaller than CSV
- Preserves data types (dates, numbers, etc.)
- Columnar format = fast queries
- Native support in Spark, Athena, BigQuery
Database Exports
SQLite (Local)
Best for: Local analysis, prototyping, SQL queries.
import sqlite3
conn = sqlite3.connect("carbonarc_data.db")
df.to_sql(
"datasets", # Table name
conn,
if_exists="replace", # Options: "fail", "replace", "append"
index=False
)
# Verify
result = pd.read_sql("SELECT COUNT(*) as count FROM datasets", conn)
print(f"Rows: {result['count'].iloc[0]}")
conn.close()
PostgreSQL
pip install sqlalchemy psycopg2-binary
from sqlalchemy import create_engine
def export_to_postgres(df, table_name, connection_string):
"""
Export DataFrame to PostgreSQL.
connection_string: postgresql://user:password@host:port/database
"""
engine = create_engine(connection_string)
df.to_sql(table_name, engine, if_exists="replace", index=False)
print(f"Exported {len(df)} rows to {table_name}")
# Usage
export_to_postgres(
df,
"carbonarc_data",
"postgresql://user:password@localhost:5432/mydb"
)
MySQL
pip install sqlalchemy pymysql
from sqlalchemy import create_engine
def export_to_mysql(df, table_name, connection_string):
"""
Export DataFrame to MySQL.
connection_string: mysql+pymysql://user:password@host:port/database
"""
engine = create_engine(connection_string)
df.to_sql(table_name, engine, if_exists="replace", index=False)
print(f"Exported {len(df)} rows to {table_name}")
# Usage
export_to_mysql(
df,
"carbonarc_data",
"mysql+pymysql://user:password@localhost:3306/mydb"
)
Cloud Storage
AWS S3
pip install s3fs boto3
def export_to_s3(df, bucket, key, file_format="parquet"):
"""Export DataFrame to AWS S3."""
s3_path = f"s3://{bucket}/{key}"
if file_format == "parquet":
df.to_parquet(s3_path, index=False)
elif file_format == "csv":
df.to_csv(s3_path, index=False)
print(f"Exported to {s3_path}")
# Usage
export_to_s3(df, "my-bucket", "data/carbonarc/export.parquet")
Google Cloud Storage
pip install gcsfs
def export_to_gcs(df, bucket, key, file_format="parquet"):
"""Export DataFrame to Google Cloud Storage."""
gcs_path = f"gs://{bucket}/{key}"
if file_format == "parquet":
df.to_parquet(gcs_path, index=False)
elif file_format == "csv":
df.to_csv(gcs_path, index=False)
print(f"Exported to {gcs_path}")
# Usage
export_to_gcs(df, "my-bucket", "data/carbonarc/export.parquet")
Azure Blob Storage
pip install adlfs
def export_to_azure(df, container, blob_path, file_format="parquet"):
"""Export DataFrame to Azure Blob Storage."""
azure_path = f"az://{container}/{blob_path}"
if file_format == "parquet":
df.to_parquet(azure_path, index=False)
elif file_format == "csv":
df.to_csv(azure_path, index=False)
print(f"Exported to {azure_path}")
# Usage
export_to_azure(df, "my-container", "data/carbonarc/export.parquet")
Framework Data Export
Export Single Framework
def export_framework_data(client, framework_id, output_path, file_format="csv"):
"""
Export purchased framework data to file.
Args:
client: CarbonArcClient instance
framework_id: ID of the purchased framework
output_path: Output file path (without extension)
file_format: "csv", "excel", "parquet", or "json"
Returns:
pandas DataFrame
"""
# Get framework data (handles pagination)
all_data = []
page = 1
while True:
response = client.explorer.get_framework_data(
framework_id=framework_id,
page=page,
page_size=10000
)
data = response.get("data", [])
if not data:
break
all_data.extend(data)
total = response.get("total", 0)
if len(all_data) >= total:
break
page += 1
df = pd.DataFrame(all_data)
# Export based on format
extensions = {"csv": ".csv", "excel": ".xlsx", "parquet": ".parquet", "json": ".json"}
path = f"{output_path}{extensions[file_format]}"
if file_format == "csv":
df.to_csv(path, index=False)
elif file_format == "excel":
df.to_excel(path, index=False)
elif file_format == "parquet":
df.to_parquet(path, index=False)
elif file_format == "json":
df.to_json(path, orient="records", indent=2)
print(f"Exported {len(df)} rows to {path}")
return df
# Usage
df = export_framework_data(
client,
framework_id="fw_abc123",
output_path="exports/tesla_revenue",
file_format="parquet"
)
Batch Export (Multiple Frameworks)
def batch_export_frameworks(client, framework_ids, output_dir="exports", file_format="parquet"):
"""Export multiple frameworks to separate files."""
import os
os.makedirs(output_dir, exist_ok=True)
results = {}
for fw_id in framework_ids:
try:
metadata = client.explorer.get_framework_metadata(framework_id=fw_id)
name = metadata.get("name", fw_id).replace(" ", "_").lower()
output_path = os.path.join(output_dir, name)
df = export_framework_data(client, fw_id, output_path, file_format)
results[fw_id] = df
except Exception as e:
print(f"Failed to export {fw_id}: {e}")
results[fw_id] = None
success_count = len([r for r in results.values() if r is not None])
print(f"\nBatch export: {success_count}/{len(framework_ids)} successful")
return results
# Usage
results = batch_export_frameworks(
client,
framework_ids=["fw_123", "fw_456", "fw_789"],
output_dir="exports/batch_2024",
file_format="parquet"
)
Scheduled Exports
from datetime import datetime
def scheduled_export(client, framework_id, output_dir="exports"):
"""
Export with timestamp for scheduled runs.
Creates: exports/framework_2024-01-15_143022.parquet
"""
timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
# Get all data with pagination
all_data = []
page = 1
while True:
response = client.explorer.get_framework_data(
framework_id=framework_id,
page=page,
page_size=10000
)
data = response.get("data", [])
if not data:
break
all_data.extend(data)
if len(all_data) >= response.get("total", 0):
break
page += 1
df = pd.DataFrame(all_data)
# Export with timestamp
os.makedirs(output_dir, exist_ok=True)
filename = f"{output_dir}/framework_{timestamp}.parquet"
df.to_parquet(filename, index=False)
print(f"Exported: {filename} ({len(df)} rows)")
return filename
Cron example:
# Run daily at 6 AM
0 6 * * * python -c "from export_script import scheduled_export; scheduled_export(...)"
Quick Reference
Format Comparison
| Format | Size | Speed | Best For |
|---|---|---|---|
| CSV | Large | Fast | Excel, universal sharing |
| Excel | Large | Medium | Business users, reports |
| JSON | Large | Fast | APIs, web apps |
| Parquet | Small | Fast | Analytics, data lakes |
| SQLite | Medium | Medium | Local SQL queries |
Quick Commands
# CSV
df.to_csv("data.csv", index=False)
# Excel
df.to_excel("data.xlsx", index=False)
# JSON
df.to_json("data.json", orient="records", indent=2)
# Parquet
df.to_parquet("data.parquet", index=False)
# SQLite
df.to_sql("table", conn, if_exists="replace", index=False)
Compression
# CSV with gzip
df.to_csv("data.csv.gz", compression="gzip")
# Parquet with snappy
df.to_parquet("data.parquet", compression="snappy")
Required Packages
# Core
pip install pandas openpyxl pyarrow
# Databases
pip install sqlalchemy psycopg2-binary # PostgreSQL
pip install sqlalchemy pymysql # MySQL
# Cloud Storage
pip install s3fs boto3 # AWS S3
pip install gcsfs # Google Cloud Storage
pip install adlfs # Azure Blob Storage
Next Steps
- Explorer API — Build and retrieve framework data
- Filters & Date Ranges — Customize your data queries
- Error Handling — Handle export failures gracefully