Exporting Data

Export your Carbon Arc data to CSV, Excel, JSON, Parquet, and databases.

Setup

import pandas as pd
from carbonarc import CarbonArcClient
import os

client = CarbonArcClient(
    host="https://api.carbonarc.co",
    token="YOUR_API_TOKEN"
)

# Get some data to export
datasets = client.data.get_datasets()
df = pd.DataFrame(datasets.get("datasources", []))

Export Formats

CSV

Best for: Excel compatibility, simple data sharing, universal format.

# Basic export
df.to_csv("data_export.csv", index=False)

# With custom options
df.to_csv(
    "data_export_custom.csv",
    index=False,
    encoding="utf-8-sig",    # Excel-friendly UTF-8
    date_format="%Y-%m-%d",  # Consistent date format
    float_format="%.2f",     # 2 decimal places
    na_rep="NULL"            # Replace NaN with "NULL"
)

# Compressed (great for large datasets)
df.to_csv("data_export.csv.gz", index=False, compression="gzip")

Excel

Best for: Business users, formatted reports, multiple sheets.

pip install openpyxl

# Basic export
df.to_excel("data_export.xlsx", index=False, sheet_name="Data")

# Multiple sheets in one workbook
with pd.ExcelWriter("multi_sheet_export.xlsx", engine="openpyxl") as writer:
    df.to_excel(writer, sheet_name="All Data", index=False)
    df.head(5).to_excel(writer, sheet_name="Sample", index=False)
    
    # Summary sheet
    summary = pd.DataFrame({
        "Metric": ["Total Rows", "Total Columns", "Export Date"],
        "Value": [len(df), len(df.columns), pd.Timestamp.now().strftime("%Y-%m-%d")]
    })
    summary.to_excel(writer, sheet_name="Summary", index=False)

JSON

Best for: APIs, web apps, nested data, programmatic access.

import json

# Records format (list of dicts) - most common
df.to_json("data_export.json", orient="records", indent=2)

# Other orientations
df.to_json("data_columns.json", orient="columns", indent=2)  # Column-based
df.to_json("data_index.json", orient="index", indent=2)      # Row index as keys

# JSON Lines (one object per line) - great for streaming
df.to_json("data_export.jsonl", orient="records", lines=True)

Parquet

Best for: Large data assets, data pipelines, analytics, cloud storage.

pip install pyarrow

# Basic export (highly compressed, preserves types)
df.to_parquet("data_export.parquet", index=False)

# With compression options
df.to_parquet("data_snappy.parquet", compression="snappy")  # Fast
df.to_parquet("data_gzip.parquet", compression="gzip")      # Smaller

# Partitioned (for very large datasets)
df.to_parquet("data_partitioned/", partition_cols=["column_name"])

Parquet Benefits

10-100x smaller than CSV
Preserves data types (dates, numbers, etc.)
Columnar format = fast queries
Native support in Spark, Athena, BigQuery

Database Exports

SQLite (Local)

Best for: Local analysis, prototyping, SQL queries.

import sqlite3

conn = sqlite3.connect("carbonarc_data.db")

df.to_sql(
    "datasets",           # Table name
    conn,
    if_exists="replace",  # Options: "fail", "replace", "append"
    index=False
)

# Verify
result = pd.read_sql("SELECT COUNT(*) as count FROM datasets", conn)
print(f"Rows: {result['count'].iloc[0]}")

conn.close()

PostgreSQL

pip install sqlalchemy psycopg2-binary

from sqlalchemy import create_engine

def export_to_postgres(df, table_name, connection_string):
    """
    Export DataFrame to PostgreSQL.
    
    connection_string: postgresql://user:password@host:port/database
    """
    engine = create_engine(connection_string)
    df.to_sql(table_name, engine, if_exists="replace", index=False)
    print(f"Exported {len(df)} rows to {table_name}")

# Usage
export_to_postgres(
    df,
    "carbonarc_data",
    "postgresql://user:password@localhost:5432/mydb"
)

MySQL

pip install sqlalchemy pymysql

from sqlalchemy import create_engine

def export_to_mysql(df, table_name, connection_string):
    """
    Export DataFrame to MySQL.
    
    connection_string: mysql+pymysql://user:password@host:port/database
    """
    engine = create_engine(connection_string)
    df.to_sql(table_name, engine, if_exists="replace", index=False)
    print(f"Exported {len(df)} rows to {table_name}")

# Usage
export_to_mysql(
    df,
    "carbonarc_data",
    "mysql+pymysql://user:password@localhost:3306/mydb"
)

Cloud Storage

AWS S3

pip install s3fs boto3

def export_to_s3(df, bucket, key, file_format="parquet"):
    """Export DataFrame to AWS S3."""
    s3_path = f"s3://{bucket}/{key}"
    
    if file_format == "parquet":
        df.to_parquet(s3_path, index=False)
    elif file_format == "csv":
        df.to_csv(s3_path, index=False)
    
    print(f"Exported to {s3_path}")

# Usage
export_to_s3(df, "my-bucket", "data/carbonarc/export.parquet")

Google Cloud Storage

pip install gcsfs

def export_to_gcs(df, bucket, key, file_format="parquet"):
    """Export DataFrame to Google Cloud Storage."""
    gcs_path = f"gs://{bucket}/{key}"
    
    if file_format == "parquet":
        df.to_parquet(gcs_path, index=False)
    elif file_format == "csv":
        df.to_csv(gcs_path, index=False)
    
    print(f"Exported to {gcs_path}")

# Usage
export_to_gcs(df, "my-bucket", "data/carbonarc/export.parquet")

Azure Blob Storage

pip install adlfs

def export_to_azure(df, container, blob_path, file_format="parquet"):
    """Export DataFrame to Azure Blob Storage."""
    azure_path = f"az://{container}/{blob_path}"
    
    if file_format == "parquet":
        df.to_parquet(azure_path, index=False)
    elif file_format == "csv":
        df.to_csv(azure_path, index=False)
    
    print(f"Exported to {azure_path}")

# Usage
export_to_azure(df, "my-container", "data/carbonarc/export.parquet")

Framework Data Export

Export Single Framework

def export_framework_data(client, framework_id, output_path, file_format="csv"):
    """
    Export purchased framework data to file.
    
    Args:
        client: CarbonArcClient instance
        framework_id: ID of the purchased framework
        output_path: Output file path (without extension)
        file_format: "csv", "excel", "parquet", or "json"
    
    Returns:
        pandas DataFrame
    """
    # Get framework data (handles pagination)
    all_data = []
    page = 1
    
    while True:
        response = client.explorer.get_framework_data(
            framework_id=framework_id,
            page=page,
            page_size=10000
        )
        
        data = response.get("data", [])
        if not data:
            break
            
        all_data.extend(data)
        
        total = response.get("total", 0)
        if len(all_data) >= total:
            break
        page += 1
    
    df = pd.DataFrame(all_data)
    
    # Export based on format
    extensions = {"csv": ".csv", "excel": ".xlsx", "parquet": ".parquet", "json": ".json"}
    path = f"{output_path}{extensions[file_format]}"
    
    if file_format == "csv":
        df.to_csv(path, index=False)
    elif file_format == "excel":
        df.to_excel(path, index=False)
    elif file_format == "parquet":
        df.to_parquet(path, index=False)
    elif file_format == "json":
        df.to_json(path, orient="records", indent=2)
    
    print(f"Exported {len(df)} rows to {path}")
    return df

# Usage
df = export_framework_data(
    client,
    framework_id="fw_abc123",
    output_path="exports/tesla_revenue",
    file_format="parquet"
)

Batch Export (Multiple Frameworks)

def batch_export_frameworks(client, framework_ids, output_dir="exports", file_format="parquet"):
    """Export multiple frameworks to separate files."""
    import os
    os.makedirs(output_dir, exist_ok=True)
    
    results = {}
    
    for fw_id in framework_ids:
        try:
            metadata = client.explorer.get_framework_metadata(framework_id=fw_id)
            name = metadata.get("name", fw_id).replace(" ", "_").lower()
            
            output_path = os.path.join(output_dir, name)
            df = export_framework_data(client, fw_id, output_path, file_format)
            results[fw_id] = df
            
        except Exception as e:
            print(f"Failed to export {fw_id}: {e}")
            results[fw_id] = None
    
    success_count = len([r for r in results.values() if r is not None])
    print(f"\nBatch export: {success_count}/{len(framework_ids)} successful")
    return results

# Usage
results = batch_export_frameworks(
    client,
    framework_ids=["fw_123", "fw_456", "fw_789"],
    output_dir="exports/batch_2024",
    file_format="parquet"
)

Scheduled Exports

from datetime import datetime

def scheduled_export(client, framework_id, output_dir="exports"):
    """
    Export with timestamp for scheduled runs.
    Creates: exports/framework_2024-01-15_143022.parquet
    """
    timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
    
    # Get all data with pagination
    all_data = []
    page = 1
    while True:
        response = client.explorer.get_framework_data(
            framework_id=framework_id,
            page=page,
            page_size=10000
        )
        data = response.get("data", [])
        if not data:
            break
        all_data.extend(data)
        if len(all_data) >= response.get("total", 0):
            break
        page += 1
    
    df = pd.DataFrame(all_data)
    
    # Export with timestamp
    os.makedirs(output_dir, exist_ok=True)
    filename = f"{output_dir}/framework_{timestamp}.parquet"
    df.to_parquet(filename, index=False)
    
    print(f"Exported: {filename} ({len(df)} rows)")
    return filename

Cron example:

# Run daily at 6 AM
0 6 * * * python -c "from export_script import scheduled_export; scheduled_export(...)"

Quick Reference

Format Comparison

Format	Size	Speed	Best For
CSV	Large	Fast	Excel, universal sharing
Excel	Large	Medium	Business users, reports
JSON	Large	Fast	APIs, web apps
Parquet	Small	Fast	Analytics, data lakes
SQLite	Medium	Medium	Local SQL queries

Quick Commands

# CSV
df.to_csv("data.csv", index=False)

# Excel  
df.to_excel("data.xlsx", index=False)

# JSON
df.to_json("data.json", orient="records", indent=2)

# Parquet
df.to_parquet("data.parquet", index=False)

# SQLite
df.to_sql("table", conn, if_exists="replace", index=False)

Compression

# CSV with gzip
df.to_csv("data.csv.gz", compression="gzip")

# Parquet with snappy
df.to_parquet("data.parquet", compression="snappy")

Required Packages

# Core
pip install pandas openpyxl pyarrow

# Databases
pip install sqlalchemy psycopg2-binary  # PostgreSQL
pip install sqlalchemy pymysql          # MySQL

# Cloud Storage
pip install s3fs boto3    # AWS S3
pip install gcsfs         # Google Cloud Storage
pip install adlfs         # Azure Blob Storage

Next Steps

Explorer API — Build and retrieve framework data
Filters & Date Ranges — Customize your data queries
Error Handling — Handle export failures gracefully