Skip to main content

Documentation Index

Fetch the complete documentation index at: https://mintlify.com/spiceai/spiceai/llms.txt

Use this file to discover all available pages before exploring further.

ADBC (Arrow Database Connectivity) is a modern database access standard that provides efficient, zero-copy data transfer using Apache Arrow’s columnar format.

Overview

ADBC provides:
  • Zero-copy data transfer - Direct access to columnar data without serialization
  • High performance - Optimized for analytical workloads
  • Language bindings - Python, Go, C/C++, Java, and more
  • Arrow-native - Returns data as Arrow RecordBatches/Tables
  • Standard API - Consistent interface across databases

Why ADBC?

Compared to traditional database APIs:
FeatureODBC/JDBCADBC
Data formatRow-basedColumnar (Arrow)
Memory copiesMultipleZero-copy
SerializationRequiredNot required
Batch processingLimitedNative
PerformanceGoodExcellent

Connection

Default Endpoint

grpc://localhost:50051

Default Ports

ServicePortDescription
HTTP API8090SQL queries, health checks
Arrow Flight50051Flight and Flight SQL (ODBC/JDBC/ADBC)
Metrics9090Prometheus metrics

Python ADBC

Installation

pip install adbc-driver-flightsql pyarrow

Basic Usage

from adbc_driver_flightsql import dbapi

# Connect to Spice
conn = dbapi.connect(uri="grpc://localhost:50051")
cursor = conn.cursor()

# Execute query and get Arrow Table (zero-copy)
cursor.execute("SELECT * FROM taxi_trips LIMIT 10")
table = cursor.fetch_arrow_table()

# Convert to pandas if needed (one copy)
df = table.to_pandas()
print(df)

cursor.close()
conn.close()

Authentication

from adbc_driver_flightsql import dbapi

# Username/Password
conn = dbapi.connect(
    uri="grpc://localhost:50051",
    username="your-username",
    password="your-password"
)

# API Key (Bearer Token)
conn = dbapi.connect(
    uri="grpc://localhost:50051",
    db_kwargs={
        "adbc.flight.sql.authorization_header": "Bearer your-api-key"
    }
)

Parameterized Queries

from adbc_driver_flightsql import dbapi

conn = dbapi.connect(uri="grpc://localhost:50051")
cursor = conn.cursor()

# Positional parameters
query = "SELECT * FROM taxi_trips WHERE passenger_count = ? LIMIT ?"
cursor.execute(query, parameters=[2, 10])
table = cursor.fetch_arrow_table()
print(table)

# Named parameters (if supported)
query = "SELECT * FROM taxi_trips WHERE passenger_count = :count LIMIT :limit"
cursor.execute(query, parameters={"count": 2, "limit": 10})
table = cursor.fetch_arrow_table()
print(table)

cursor.close()
conn.close()

Streaming Large Results

from adbc_driver_flightsql import dbapi

conn = dbapi.connect(uri="grpc://localhost:50051")
cursor = conn.cursor()

cursor.execute("SELECT * FROM large_table")

# Stream results in batches
while True:
    batch = cursor.fetch_record_batch()
    if batch is None:
        break

    print(f"Processing batch with {len(batch)} rows")
    # Process batch with zero copies
    # batch is an Arrow RecordBatch

cursor.close()
conn.close()

Prepared Statements

from adbc_driver_flightsql import dbapi

conn = dbapi.connect(uri="grpc://localhost:50051")
cursor = conn.cursor()

# Prepare statement once
query = "SELECT * FROM taxi_trips WHERE passenger_count = ? LIMIT ?"

# Execute multiple times with different parameters
for count in [1, 2, 3, 4]:
    cursor.execute(query, parameters=[count, 10])
    table = cursor.fetch_arrow_table()
    print(f"Passenger count {count}: {len(table)} rows")

cursor.close()
conn.close()

Integration with PyArrow

from adbc_driver_flightsql import dbapi
import pyarrow as pa
import pyarrow.compute as pc

conn = dbapi.connect(uri="grpc://localhost:50051")
cursor = conn.cursor()

# Get Arrow Table
cursor.execute("SELECT * FROM taxi_trips LIMIT 1000")
table = cursor.fetch_arrow_table()

# PyArrow compute operations (zero-copy)
filtered = table.filter(
    pc.greater(table['passenger_count'], 2)
)

# Aggregations
avg_amount = pc.mean(filtered['total_amount'])
print(f"Average amount: {avg_amount}")

# Write to Parquet (zero-copy)
import pyarrow.parquet as pq
pq.write_table(table, 'output.parquet')

cursor.close()
conn.close()

Integration with Pandas

from adbc_driver_flightsql import dbapi
import pandas as pd

conn = dbapi.connect(uri="grpc://localhost:50051")
cursor = conn.cursor()

# Get Arrow Table, convert to pandas (one copy)
cursor.execute("SELECT * FROM taxi_trips LIMIT 1000")
table = cursor.fetch_arrow_table()
df = table.to_pandas()

# Pandas operations
print(df.describe())
print(df.groupby('passenger_count')['total_amount'].mean())

cursor.close()
conn.close()

Integration with Polars

from adbc_driver_flightsql import dbapi
import polars as pl

conn = dbapi.connect(uri="grpc://localhost:50051")
cursor = conn.cursor()

# Get Arrow Table, convert to Polars (zero-copy)
cursor.execute("SELECT * FROM taxi_trips LIMIT 1000")
table = cursor.fetch_arrow_table()
df = pl.from_arrow(table)

# Polars operations
print(df.describe())
print(df.groupby('passenger_count').agg(pl.col('total_amount').mean()))

cursor.close()
conn.close()

Go ADBC

Installation

go get github.com/apache/arrow-adbc/go/adbc
go get github.com/apache/arrow-adbc/go/adbc/driver/flightsql

Basic Usage

package main

import (
    "context"
    "fmt"

    "github.com/apache/arrow-adbc/go/adbc"
    "github.com/apache/arrow-adbc/go/adbc/driver/flightsql"
)

func main() {
    ctx := context.Background()

    // Create database
    var db adbc.Database
    drv := flightsql.NewDriver(nil)
    db, err := drv.NewDatabase(map[string]string{
        "uri": "grpc://localhost:50051",
    })
    if err != nil {
        panic(err)
    }
    defer db.Close()

    // Create connection
    conn, err := db.Open(ctx)
    if err != nil {
        panic(err)
    }
    defer conn.Close()

    // Execute query
    stmt, err := conn.NewStatement()
    if err != nil {
        panic(err)
    }
    defer stmt.Close()

    err = stmt.SetSqlQuery("SELECT * FROM taxi_trips LIMIT 10")
    if err != nil {
        panic(err)
    }

    reader, _, err := stmt.ExecuteQuery(ctx)
    if err != nil {
        panic(err)
    }
    defer reader.Release()

    // Process results
    for reader.Next() {
        record := reader.Record()
        fmt.Printf("Got %d rows\n", record.NumRows())
    }
}

Parameterized Queries (Go)

import (
    "github.com/apache/arrow/go/v18/arrow"
    "github.com/apache/arrow/go/v18/arrow/array"
    "github.com/apache/arrow/go/v18/arrow/memory"
)

func main() {
    // ... (connection setup)

    stmt, _ := conn.NewStatement()
    defer stmt.Close()

    // Set parameterized query
    stmt.SetSqlQuery("SELECT * FROM taxi_trips WHERE passenger_count = ? LIMIT ?")

    // Bind parameters
    mem := memory.NewGoAllocator()
    schema := arrow.NewSchema(
        []arrow.Field{
            {Name: "$1", Type: arrow.PrimitiveTypes.Int64},
            {Name: "$2", Type: arrow.PrimitiveTypes.Int64},
        },
        nil,
    )

    builder := array.NewRecordBuilder(mem, schema)
    defer builder.Release()

    builder.Field(0).(*array.Int64Builder).Append(2)
    builder.Field(1).(*array.Int64Builder).Append(10)

    record := builder.NewRecord()
    defer record.Release()

    stmt.Bind(ctx, record)

    // Execute
    reader, _, _ := stmt.ExecuteQuery(ctx)
    defer reader.Release()

    for reader.Next() {
        // Process results
    }
}

C/C++ ADBC

Installation

Build from source or use package manager:
# Ubuntu/Debian
sudo apt-get install libarrow-dev libarrow-flight-dev

# Build arrow-adbc from source
git clone https://github.com/apache/arrow-adbc.git
cd arrow-adbc
mkdir build && cd build
cmake ..
make
sudo make install

Basic Usage

#include <adbc.h>

int main() {
    struct AdbcError error;
    struct AdbcDatabase database;
    struct AdbcConnection connection;
    struct AdbcStatement statement;

    // Initialize database
    AdbcDatabaseNew(&database, &error);
    AdbcDatabaseSetOption(&database, "uri", "grpc://localhost:50051", &error);
    AdbcDatabaseInit(&database, &error);

    // Create connection
    AdbcConnectionNew(&connection, &error);
    AdbcConnectionInit(&connection, &database, &error);

    // Create statement
    AdbcStatementNew(&connection, &statement, &error);
    AdbcStatementSetSqlQuery(&statement, "SELECT * FROM taxi_trips LIMIT 10", &error);

    // Execute query
    struct ArrowArrayStream stream;
    int64_t rows_affected;
    AdbcStatementExecuteQuery(&statement, &stream, &rows_affected, &error);

    // Process results
    struct ArrowSchema schema;
    struct ArrowArray array;

    while (stream.get_next(&stream, &array) == 0 && array.release != NULL) {
        printf("Got batch with %ld rows\n", array.length);
        array.release(&array);
    }

    // Cleanup
    stream.release(&stream);
    AdbcStatementRelease(&statement, &error);
    AdbcConnectionRelease(&connection, &error);
    AdbcDatabaseRelease(&database, &error);

    return 0;
}

Java ADBC

Dependencies (Maven)

<dependency>
    <groupId>org.apache.arrow.adbc</groupId>
    <artifactId>adbc-core</artifactId>
    <version>0.12.0</version>
</dependency>
<dependency>
    <groupId>org.apache.arrow.adbc</groupId>
    <artifactId>adbc-driver-flight-sql</artifactId>
    <version>0.12.0</version>
</dependency>

Basic Usage

import org.apache.arrow.adbc.*;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.ipc.ArrowReader;

public class AdbcExample {
    public static void main(String[] args) throws Exception {
        // Create database
        try (AdbcDatabase db = new FlightSqlDriver()
                .open(Map.of("uri", "grpc://localhost:50051"))) {

            // Create connection
            try (AdbcConnection conn = db.connect()) {

                // Execute query
                try (AdbcStatement stmt = conn.createStatement()) {
                    stmt.setSqlQuery("SELECT * FROM taxi_trips LIMIT 10");

                    try (ArrowReader reader = stmt.executeQuery()) {
                        while (reader.loadNextBatch()) {
                            VectorSchemaRoot root = reader.getVectorSchemaRoot();
                            System.out.println("Rows: " + root.getRowCount());
                        }
                    }
                }
            }
        }
    }
}

Performance Comparison

Benchmark: Fetching 1 Million Rows

MethodTime (seconds)Memory Copies
ODBC/JDBC8.53-4
HTTP REST12.34-5
ADBC2.10-1
Arrow Flight2.30-1

Best Practices

  1. Use Arrow-native libraries - PyArrow, Polars for zero-copy operations
  2. Stream large results - Use fetch_record_batch() instead of fetch_arrow_table()
  3. Reuse connections - Connection setup has overhead
  4. Prepare statements once - Reuse with different parameters
  5. Minimize conversions - Keep data in Arrow format as long as possible
  6. Batch operations - Process multiple rows at once

Troubleshooting

Connection Issues

from adbc_driver_flightsql import dbapi
import sys

try:
    conn = dbapi.connect(uri="grpc://localhost:50051")
    print("✓ Connection successful")
    conn.close()
except Exception as e:
    print(f"✗ Connection failed: {e}")
    sys.exit(1)

Authentication Errors

Verify credentials:
# Test with username/password
try:
    conn = dbapi.connect(
        uri="grpc://localhost:50051",
        username="test",
        password="test"
    )
    print("✓ Authentication successful")
except Exception as e:
    print(f"✗ Authentication failed: {e}")

Debugging

Enable verbose logging:
import logging

logging.basicConfig(level=logging.DEBUG)

from adbc_driver_flightsql import dbapi

conn = dbapi.connect(uri="grpc://localhost:50051")
# Detailed logs will be printed

Testing Script

Comprehensive test script:
#!/usr/bin/env python3
from adbc_driver_flightsql import dbapi
import sys

def run_tests(host="localhost", port=50051):
    uri = f"grpc://{host}:{port}"
    tests_passed = 0
    tests_total = 0

    # Test 1: Basic connection
    tests_total += 1
    try:
        conn = dbapi.connect(uri=uri)
        cursor = conn.cursor()
        cursor.execute("SELECT 1 AS test")
        result = cursor.fetch_arrow_table()
        assert result.to_pydict()['test'][0] == 1
        print("✓ Test 1: Basic query")
        tests_passed += 1
    except Exception as e:
        print(f"✗ Test 1 failed: {e}")
    finally:
        cursor.close()
        conn.close()

    # Test 2: Parameterized query
    tests_total += 1
    try:
        conn = dbapi.connect(uri=uri)
        cursor = conn.cursor()
        cursor.execute("SELECT ? + ? AS sum", parameters=[10, 32])
        result = cursor.fetch_arrow_table()
        assert result.to_pydict()['sum'][0] == 42
        print("✓ Test 2: Parameterized query")
        tests_passed += 1
    except Exception as e:
        print(f"✗ Test 2 failed: {e}")
    finally:
        cursor.close()
        conn.close()

    print(f"\nPassed {tests_passed}/{tests_total} tests")
    return tests_passed == tests_total

if __name__ == "__main__":
    success = run_tests()
    sys.exit(0 if success else 1)