Documentation Index
Fetch the complete documentation index at: https://mintlify.com/spiceai/spiceai/llms.txt
Use this file to discover all available pages before exploring further.
ADBC (Arrow Database Connectivity) is a modern database access standard that provides efficient, zero-copy data transfer using Apache Arrow’s columnar format.
Overview
ADBC provides:
- Zero-copy data transfer - Direct access to columnar data without serialization
- High performance - Optimized for analytical workloads
- Language bindings - Python, Go, C/C++, Java, and more
- Arrow-native - Returns data as Arrow RecordBatches/Tables
- Standard API - Consistent interface across databases
Why ADBC?
Compared to traditional database APIs:
| Feature | ODBC/JDBC | ADBC |
|---|
| Data format | Row-based | Columnar (Arrow) |
| Memory copies | Multiple | Zero-copy |
| Serialization | Required | Not required |
| Batch processing | Limited | Native |
| Performance | Good | Excellent |
Connection
Default Endpoint
Default Ports
| Service | Port | Description |
|---|
| HTTP API | 8090 | SQL queries, health checks |
| Arrow Flight | 50051 | Flight and Flight SQL (ODBC/JDBC/ADBC) |
| Metrics | 9090 | Prometheus metrics |
Python ADBC
Installation
pip install adbc-driver-flightsql pyarrow
Basic Usage
from adbc_driver_flightsql import dbapi
# Connect to Spice
conn = dbapi.connect(uri="grpc://localhost:50051")
cursor = conn.cursor()
# Execute query and get Arrow Table (zero-copy)
cursor.execute("SELECT * FROM taxi_trips LIMIT 10")
table = cursor.fetch_arrow_table()
# Convert to pandas if needed (one copy)
df = table.to_pandas()
print(df)
cursor.close()
conn.close()
Authentication
from adbc_driver_flightsql import dbapi
# Username/Password
conn = dbapi.connect(
uri="grpc://localhost:50051",
username="your-username",
password="your-password"
)
# API Key (Bearer Token)
conn = dbapi.connect(
uri="grpc://localhost:50051",
db_kwargs={
"adbc.flight.sql.authorization_header": "Bearer your-api-key"
}
)
Parameterized Queries
from adbc_driver_flightsql import dbapi
conn = dbapi.connect(uri="grpc://localhost:50051")
cursor = conn.cursor()
# Positional parameters
query = "SELECT * FROM taxi_trips WHERE passenger_count = ? LIMIT ?"
cursor.execute(query, parameters=[2, 10])
table = cursor.fetch_arrow_table()
print(table)
# Named parameters (if supported)
query = "SELECT * FROM taxi_trips WHERE passenger_count = :count LIMIT :limit"
cursor.execute(query, parameters={"count": 2, "limit": 10})
table = cursor.fetch_arrow_table()
print(table)
cursor.close()
conn.close()
Streaming Large Results
from adbc_driver_flightsql import dbapi
conn = dbapi.connect(uri="grpc://localhost:50051")
cursor = conn.cursor()
cursor.execute("SELECT * FROM large_table")
# Stream results in batches
while True:
batch = cursor.fetch_record_batch()
if batch is None:
break
print(f"Processing batch with {len(batch)} rows")
# Process batch with zero copies
# batch is an Arrow RecordBatch
cursor.close()
conn.close()
Prepared Statements
from adbc_driver_flightsql import dbapi
conn = dbapi.connect(uri="grpc://localhost:50051")
cursor = conn.cursor()
# Prepare statement once
query = "SELECT * FROM taxi_trips WHERE passenger_count = ? LIMIT ?"
# Execute multiple times with different parameters
for count in [1, 2, 3, 4]:
cursor.execute(query, parameters=[count, 10])
table = cursor.fetch_arrow_table()
print(f"Passenger count {count}: {len(table)} rows")
cursor.close()
conn.close()
Integration with PyArrow
from adbc_driver_flightsql import dbapi
import pyarrow as pa
import pyarrow.compute as pc
conn = dbapi.connect(uri="grpc://localhost:50051")
cursor = conn.cursor()
# Get Arrow Table
cursor.execute("SELECT * FROM taxi_trips LIMIT 1000")
table = cursor.fetch_arrow_table()
# PyArrow compute operations (zero-copy)
filtered = table.filter(
pc.greater(table['passenger_count'], 2)
)
# Aggregations
avg_amount = pc.mean(filtered['total_amount'])
print(f"Average amount: {avg_amount}")
# Write to Parquet (zero-copy)
import pyarrow.parquet as pq
pq.write_table(table, 'output.parquet')
cursor.close()
conn.close()
Integration with Pandas
from adbc_driver_flightsql import dbapi
import pandas as pd
conn = dbapi.connect(uri="grpc://localhost:50051")
cursor = conn.cursor()
# Get Arrow Table, convert to pandas (one copy)
cursor.execute("SELECT * FROM taxi_trips LIMIT 1000")
table = cursor.fetch_arrow_table()
df = table.to_pandas()
# Pandas operations
print(df.describe())
print(df.groupby('passenger_count')['total_amount'].mean())
cursor.close()
conn.close()
Integration with Polars
from adbc_driver_flightsql import dbapi
import polars as pl
conn = dbapi.connect(uri="grpc://localhost:50051")
cursor = conn.cursor()
# Get Arrow Table, convert to Polars (zero-copy)
cursor.execute("SELECT * FROM taxi_trips LIMIT 1000")
table = cursor.fetch_arrow_table()
df = pl.from_arrow(table)
# Polars operations
print(df.describe())
print(df.groupby('passenger_count').agg(pl.col('total_amount').mean()))
cursor.close()
conn.close()
Go ADBC
Installation
go get github.com/apache/arrow-adbc/go/adbc
go get github.com/apache/arrow-adbc/go/adbc/driver/flightsql
Basic Usage
package main
import (
"context"
"fmt"
"github.com/apache/arrow-adbc/go/adbc"
"github.com/apache/arrow-adbc/go/adbc/driver/flightsql"
)
func main() {
ctx := context.Background()
// Create database
var db adbc.Database
drv := flightsql.NewDriver(nil)
db, err := drv.NewDatabase(map[string]string{
"uri": "grpc://localhost:50051",
})
if err != nil {
panic(err)
}
defer db.Close()
// Create connection
conn, err := db.Open(ctx)
if err != nil {
panic(err)
}
defer conn.Close()
// Execute query
stmt, err := conn.NewStatement()
if err != nil {
panic(err)
}
defer stmt.Close()
err = stmt.SetSqlQuery("SELECT * FROM taxi_trips LIMIT 10")
if err != nil {
panic(err)
}
reader, _, err := stmt.ExecuteQuery(ctx)
if err != nil {
panic(err)
}
defer reader.Release()
// Process results
for reader.Next() {
record := reader.Record()
fmt.Printf("Got %d rows\n", record.NumRows())
}
}
Parameterized Queries (Go)
import (
"github.com/apache/arrow/go/v18/arrow"
"github.com/apache/arrow/go/v18/arrow/array"
"github.com/apache/arrow/go/v18/arrow/memory"
)
func main() {
// ... (connection setup)
stmt, _ := conn.NewStatement()
defer stmt.Close()
// Set parameterized query
stmt.SetSqlQuery("SELECT * FROM taxi_trips WHERE passenger_count = ? LIMIT ?")
// Bind parameters
mem := memory.NewGoAllocator()
schema := arrow.NewSchema(
[]arrow.Field{
{Name: "$1", Type: arrow.PrimitiveTypes.Int64},
{Name: "$2", Type: arrow.PrimitiveTypes.Int64},
},
nil,
)
builder := array.NewRecordBuilder(mem, schema)
defer builder.Release()
builder.Field(0).(*array.Int64Builder).Append(2)
builder.Field(1).(*array.Int64Builder).Append(10)
record := builder.NewRecord()
defer record.Release()
stmt.Bind(ctx, record)
// Execute
reader, _, _ := stmt.ExecuteQuery(ctx)
defer reader.Release()
for reader.Next() {
// Process results
}
}
C/C++ ADBC
Installation
Build from source or use package manager:
# Ubuntu/Debian
sudo apt-get install libarrow-dev libarrow-flight-dev
# Build arrow-adbc from source
git clone https://github.com/apache/arrow-adbc.git
cd arrow-adbc
mkdir build && cd build
cmake ..
make
sudo make install
Basic Usage
#include <adbc.h>
int main() {
struct AdbcError error;
struct AdbcDatabase database;
struct AdbcConnection connection;
struct AdbcStatement statement;
// Initialize database
AdbcDatabaseNew(&database, &error);
AdbcDatabaseSetOption(&database, "uri", "grpc://localhost:50051", &error);
AdbcDatabaseInit(&database, &error);
// Create connection
AdbcConnectionNew(&connection, &error);
AdbcConnectionInit(&connection, &database, &error);
// Create statement
AdbcStatementNew(&connection, &statement, &error);
AdbcStatementSetSqlQuery(&statement, "SELECT * FROM taxi_trips LIMIT 10", &error);
// Execute query
struct ArrowArrayStream stream;
int64_t rows_affected;
AdbcStatementExecuteQuery(&statement, &stream, &rows_affected, &error);
// Process results
struct ArrowSchema schema;
struct ArrowArray array;
while (stream.get_next(&stream, &array) == 0 && array.release != NULL) {
printf("Got batch with %ld rows\n", array.length);
array.release(&array);
}
// Cleanup
stream.release(&stream);
AdbcStatementRelease(&statement, &error);
AdbcConnectionRelease(&connection, &error);
AdbcDatabaseRelease(&database, &error);
return 0;
}
Java ADBC
Dependencies (Maven)
<dependency>
<groupId>org.apache.arrow.adbc</groupId>
<artifactId>adbc-core</artifactId>
<version>0.12.0</version>
</dependency>
<dependency>
<groupId>org.apache.arrow.adbc</groupId>
<artifactId>adbc-driver-flight-sql</artifactId>
<version>0.12.0</version>
</dependency>
Basic Usage
import org.apache.arrow.adbc.*;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.ipc.ArrowReader;
public class AdbcExample {
public static void main(String[] args) throws Exception {
// Create database
try (AdbcDatabase db = new FlightSqlDriver()
.open(Map.of("uri", "grpc://localhost:50051"))) {
// Create connection
try (AdbcConnection conn = db.connect()) {
// Execute query
try (AdbcStatement stmt = conn.createStatement()) {
stmt.setSqlQuery("SELECT * FROM taxi_trips LIMIT 10");
try (ArrowReader reader = stmt.executeQuery()) {
while (reader.loadNextBatch()) {
VectorSchemaRoot root = reader.getVectorSchemaRoot();
System.out.println("Rows: " + root.getRowCount());
}
}
}
}
}
}
}
Benchmark: Fetching 1 Million Rows
| Method | Time (seconds) | Memory Copies |
|---|
| ODBC/JDBC | 8.5 | 3-4 |
| HTTP REST | 12.3 | 4-5 |
| ADBC | 2.1 | 0-1 |
| Arrow Flight | 2.3 | 0-1 |
Best Practices
- Use Arrow-native libraries - PyArrow, Polars for zero-copy operations
- Stream large results - Use
fetch_record_batch() instead of fetch_arrow_table()
- Reuse connections - Connection setup has overhead
- Prepare statements once - Reuse with different parameters
- Minimize conversions - Keep data in Arrow format as long as possible
- Batch operations - Process multiple rows at once
Troubleshooting
Connection Issues
from adbc_driver_flightsql import dbapi
import sys
try:
conn = dbapi.connect(uri="grpc://localhost:50051")
print("✓ Connection successful")
conn.close()
except Exception as e:
print(f"✗ Connection failed: {e}")
sys.exit(1)
Authentication Errors
Verify credentials:
# Test with username/password
try:
conn = dbapi.connect(
uri="grpc://localhost:50051",
username="test",
password="test"
)
print("✓ Authentication successful")
except Exception as e:
print(f"✗ Authentication failed: {e}")
Debugging
Enable verbose logging:
import logging
logging.basicConfig(level=logging.DEBUG)
from adbc_driver_flightsql import dbapi
conn = dbapi.connect(uri="grpc://localhost:50051")
# Detailed logs will be printed
Testing Script
Comprehensive test script:
#!/usr/bin/env python3
from adbc_driver_flightsql import dbapi
import sys
def run_tests(host="localhost", port=50051):
uri = f"grpc://{host}:{port}"
tests_passed = 0
tests_total = 0
# Test 1: Basic connection
tests_total += 1
try:
conn = dbapi.connect(uri=uri)
cursor = conn.cursor()
cursor.execute("SELECT 1 AS test")
result = cursor.fetch_arrow_table()
assert result.to_pydict()['test'][0] == 1
print("✓ Test 1: Basic query")
tests_passed += 1
except Exception as e:
print(f"✗ Test 1 failed: {e}")
finally:
cursor.close()
conn.close()
# Test 2: Parameterized query
tests_total += 1
try:
conn = dbapi.connect(uri=uri)
cursor = conn.cursor()
cursor.execute("SELECT ? + ? AS sum", parameters=[10, 32])
result = cursor.fetch_arrow_table()
assert result.to_pydict()['sum'][0] == 42
print("✓ Test 2: Parameterized query")
tests_passed += 1
except Exception as e:
print(f"✗ Test 2 failed: {e}")
finally:
cursor.close()
conn.close()
print(f"\nPassed {tests_passed}/{tests_total} tests")
return tests_passed == tests_total
if __name__ == "__main__":
success = run_tests()
sys.exit(0 if success else 1)