No edit summary |
|||
| (One intermediate revision by the same user not shown) | |||
| Line 67: | Line 67: | ||
<syntaxhighlight lang="python"> | <syntaxhighlight lang="python"> | ||
# | #!/usr/bin/env python3 | ||
""" | |||
Kraken Historical Data Collection Script | |||
Copyright (c) 2025 Volatility.RED and FXGears.com | |||
All rights reserved. | |||
This software is provided for NON-COMMERCIAL USE ONLY. | |||
Permissions: | |||
- Personal use and modification for individual trading and research | |||
- Educational use in academic or learning environments | |||
- Internal use within organizations for research purposes | |||
Restrictions: | |||
- NO REDISTRIBUTION: This code may not be redistributed, shared, or published | |||
in any form, including but not limited to: websites, forums, repositories, | |||
social media, or any other public or private distribution channels | |||
- NO REPUBLISHING: This code may not be republished or included in other | |||
software packages, tutorials, or educational materials | |||
- NO COMMERCIAL USE: This code may not be used for commercial purposes, | |||
including but not limited to: selling, licensing, or incorporating into | |||
commercial products or services | |||
THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND. | |||
THE AUTHORS SHALL NOT BE LIABLE FOR ANY DAMAGES ARISING FROM ITS USE. | |||
""" | |||
import urllib.request | |||
import urllib.parse | |||
import urllib.error | |||
import json | |||
import csv | |||
import hmac | |||
import hashlib | |||
import base64 | |||
import time | |||
import os | |||
from datetime import datetime, timedelta | |||
# USER CONFIGURATION - Modify these variables as needed | |||
SYMBOL = "XBTUSD" # Trading pair (e.g., XBTUSD, ETHUSD, ADAUSD) | |||
DATA_TYPE = "trades" # Options: "trades", "ohlc", "spread" | |||
#Not used for "trades" type data | |||
INTERVAL = 1 # For OHLC data: 1, 5, 15, 30, 60, 240, 1440, 10080, 21600 (minutes) | |||
DAYS = 10 # Pull data from X days back until now. | |||
SINCE = int((datetime.now() - timedelta(days=DAYS)).timestamp()) # Unix timestamp or None for all available data | |||
SLEEP_DELAY = 0.01 # Delay between API requests in seconds | |||
RATE_LIMIT_WAIT = 5 # Base wait time (in seconds) for rate limit errors (exponential backoff multiplier) | |||
BATCH_SIZE = 5000 # Number of records to request per API call | |||
OUTPUT_FILE = "kraken_{}_{}_data_{}_to_{}.csv" # Output filename | |||
API_KEY = "" # Your Kraken API key | |||
API_SECRET = "" # Your Kraken API secret | |||
# Kraken API endpoints | |||
BASE_URL = "https://api.kraken.com/0/public/" | |||
PRIVATE_URL = "https://api.kraken.com/0/private/" | |||
def get_auth_headers(uri_path, query_params): | |||
"""Generate authentication headers for Kraken API""" | |||
if not API_KEY or not API_SECRET: | |||
return {}, "" | |||
try: | |||
nonce = str(int(time.time() * 1000)) # Use milliseconds for nonce | |||
# Separate query string (API params) from body (nonce only) | |||
query_str = urllib.parse.urlencode(query_params) if query_params else "" | |||
body = {"nonce": nonce} | |||
body_str = json.dumps(body) | |||
# Kraken's signature method: path + SHA256(nonce + query_str + body_str) | |||
combined_data = nonce + query_str + body_str | |||
sha256_hash = hashlib.sha256(combined_data.encode()).digest() | |||
message = uri_path.encode() + sha256_hash | |||
signature = hmac.new(base64.b64decode(API_SECRET), message, hashlib.sha512) | |||
headers = { | |||
'API-Key': API_KEY, | |||
'API-Sign': base64.b64encode(signature.digest()).decode(), | |||
'Content-Type': 'application/json' | |||
} | |||
return headers, body_str | |||
except Exception as e: | |||
print(f"Auth headers: Error generating signature - {e}") | |||
import traceback | |||
traceback.print_exc() | |||
return {}, "" | |||
def make_request(url, params, use_auth=False, max_retries=3): | |||
"""Make API request with optional authentication and retry logic""" | |||
for attempt in range(max_retries): | |||
try: | |||
# Build URL with query parameters | |||
query_str = urllib.parse.urlencode(params) if params else "" | |||
full_url = f"{url}?{query_str}" if query_str else url | |||
headers = {} | |||
body_data = None | |||
if use_auth and API_KEY and API_SECRET: | |||
uri_path = url.replace('https://api.kraken.com', '') | |||
headers, body_str = get_auth_headers(uri_path, params) | |||
body_data = body_str.encode() if body_str else None | |||
if headers and attempt == 0: | |||
print(f"AUTH: Using authenticated GET request to {uri_path}") | |||
else: | |||
if attempt == 0: | |||
print("AUTH: Using public GET request (no authentication)") | |||
req = urllib.request.Request(full_url, data=body_data, headers=headers) | |||
response = urllib.request.urlopen(req) | |||
response_text = response.read().decode() | |||
data = json.loads(response_text) | |||
except urllib.error.HTTPError as e: | |||
print(f"HTTP Error {e.code}: {e.read().decode()}") | |||
return {'error': [f'HTTP {e.code} error']} | |||
except Exception as e: | |||
print(f"DEBUG: Failed to parse JSON or make request: {e}") | |||
return {'error': ['Failed to parse response']} | |||
# Debug: Check response for auth issues | |||
if 'error' in data and data['error']: | |||
if any('invalid' in str(err).lower() or 'auth' in str(err).lower() for err in data['error']): | |||
print(f"AUTH ERROR: {data['error']} - Check API credentials") | |||
# Check for rate limit error | |||
if 'error' in data and any('too many requests' in str(err).lower() for err in data['error']): | |||
wait_time = (2 ** attempt) * RATE_LIMIT_WAIT # Exponential backoff | |||
print(f"RATE LIMIT: Hit limit (attempt {attempt + 1}), waiting {wait_time} seconds...") | |||
time.sleep(wait_time) | |||
continue | |||
return data | |||
return data # Return last response if all retries failed | |||
def get_historical_trades(pair, filename, since=None): | |||
"""Get all historical trade data with pagination and write directly to CSV""" | |||
url = f"{BASE_URL}Trades" | |||
current_since = since | |||
total_records = 0 | |||
first_batch = True | |||
with open(filename, 'w', newline='') as file: | |||
writer = csv.writer(file) | |||
writer.writerow(['timestamp', 'price', 'volume', 'buy_sell', 'market_limit', 'misc']) | |||
while True: | |||
params = {"pair": pair, "count": BATCH_SIZE} | |||
if current_since: | |||
params["since"] = current_since | |||
data = make_request(url, params, use_auth=True) | |||
if 'error' in data and data['error']: | |||
return {'error': data['error']} | |||
batch_count = 0 | |||
for pair_key in data['result']: | |||
if pair_key != 'last': | |||
trades = data['result'][pair_key] | |||
batch_count = len(trades) | |||
for trade in trades: | |||
writer.writerow([ | |||
datetime.fromtimestamp(float(trade[2])), | |||
trade[0], trade[1], trade[3], trade[4], trade[5] | |||
]) | |||
total_records += batch_count | |||
if 'last' in data['result'] and batch_count == BATCH_SIZE: | |||
print(f"Fetched {total_records} records...") | |||
current_since = data['result']['last'] | |||
time.sleep(SLEEP_DELAY) | |||
else: | |||
break | |||
return {'total_records': total_records} | |||
def get_historical_ohlc(pair, filename, interval=1, since=None): | |||
"""Get all historical OHLC data with pagination and write directly to CSV""" | |||
url = f"{BASE_URL}OHLC" | |||
current_since = since | |||
total_records = 0 | |||
with open(filename, 'w', newline='') as file: | |||
writer = csv.writer(file) | |||
writer.writerow(['timestamp', 'open', 'high', 'low', 'close', 'vwap', 'volume', 'count']) | |||
while True: | |||
params = {"pair": pair, "interval": interval} | |||
if current_since: | |||
params["since"] = current_since | |||
data = make_request(url, params, use_auth=True) | |||
if 'error' in data and data['error']: | |||
return {'error': data['error']} | |||
batch_count = 0 | |||
for pair_key in data['result']: | |||
if pair_key != 'last': | |||
ohlc_data = data['result'][pair_key] | |||
batch_count = len(ohlc_data) | |||
for candle in ohlc_data: | |||
writer.writerow([ | |||
datetime.fromtimestamp(float(candle[0])), | |||
candle[1], candle[2], candle[3], candle[4], candle[5], candle[6], candle[7] | |||
]) | |||
total_records += batch_count | |||
if 'last' in data['result'] and batch_count == 720: | |||
print(f"Fetched {total_records} records...") | |||
current_since = data['result']['last'] | |||
time.sleep(SLEEP_DELAY) | |||
else: | |||
break | |||
return {'total_records': total_records} | |||
# Dispatch table for data type handlers | |||
DATA_HANDLERS = { | |||
"trades": lambda symbol, filename, since: get_historical_trades(symbol, filename, since), | |||
"ohlc": lambda symbol, filename, since: get_historical_ohlc(symbol, filename, INTERVAL, since) | |||
} | |||
def test_authentication(): | |||
"""Test if authentication is working by calling a private endpoint""" | |||
url = f"{PRIVATE_URL}OpenOrders" | |||
params = {} | |||
print(f"DEBUG: Testing auth with URL: {url}") | |||
data = make_request(url, params, use_auth=True) | |||
print(f"DEBUG: Full response: {data}") | |||
if 'error' in data and data['error']: | |||
# Check if it's a permission error vs auth error | |||
if any('permission' in str(err).lower() for err in data['error']): | |||
print("AUTH TEST PASSED: Authentication working but insufficient permissions") | |||
return True | |||
else: | |||
print(f"AUTH TEST FAILED: {data['error']}") | |||
return False | |||
else: | |||
print("AUTH TEST PASSED: Authentication is working") | |||
return True | |||
def main(): | |||
# Check for required authentication | |||
if not API_KEY or not API_SECRET: | |||
print("ERROR: API_KEY and API_SECRET are required for data collection.") | |||
print("Please set your Kraken API credentials in the script configuration.") | |||
print("Register a free account at https://geni.us/GoKraken to get API access.") | |||
exit(1) | |||
print(f"Downloading {DATA_TYPE} data for {SYMBOL}...") | |||
print(f"Fetching data from {datetime.fromtimestamp(SINCE)} to now...") | |||
print("Using authenticated requests for optimal performance") | |||
try: | |||
start_date = datetime.fromtimestamp(SINCE).strftime('%Y%m%d') | |||
end_date = datetime.now().strftime('%Y%m%d') | |||
filename = OUTPUT_FILE.format(SYMBOL, DATA_TYPE, start_date, end_date) | |||
if os.path.exists(filename): | |||
os.remove(filename) | |||
print(f"Removed existing file: {filename}") | |||
handler = DATA_HANDLERS.get(DATA_TYPE) | |||
if not handler: | |||
print(f"Invalid DATA_TYPE '{DATA_TYPE}'. Use: {', '.join(DATA_HANDLERS.keys())}") | |||
exit(1) | |||
result = handler(SYMBOL, filename, SINCE) | |||
if 'error' in result: | |||
print(f"API Error: {result['error']}") | |||
exit(1) | |||
else: | |||
print(f"Data saved to {filename} ({result['total_records']} records)") | |||
except Exception as e: | |||
print(f"Error: {e}") | |||
exit(1) | |||
if __name__ == "__main__": | |||
main() | |||
</syntaxhighlight> | </syntaxhighlight> | ||
| Line 275: | Line 570: | ||
</pre> | </pre> | ||
* '''Solution''': Use [https://geni.us/GoKraken Kraken's] official pair names (XBTUSD, not BTCUSD) | * '''Solution''': Use [https://geni.us/GoKraken Kraken's] official pair names (XBTUSD, not BTCUSD) | ||
* '''Reference''': Check | * '''Reference''': Check [https://geni.us/GoKraken Kraken Asset Pairs] | ||
==== Optimization Tips ==== | ==== Optimization Tips ==== | ||
Latest revision as of 11:29, 29 August 2025
Crypto Historical Data Collection
This guide covers collecting both tick-level trade data and OHLC (candlestick) data from Kraken using a single Python script. The script can collect either individual trades (tick data) or aggregated OHLC bars at various timeframes, providing comprehensive historical market data for backtesting and algorithmic trading research.
Data Types Available
Tick (Trades) Data captures every individual trade executed on the exchange:
- Price - The exact execution price of the trade
- Volume - The quantity of the asset traded
- Timestamp - Precise time when the trade occurred (microsecond precision)
- Side - Whether the trade was a buy or sell (market taker direction)
- Type - Market order vs limit order classification
OHLC (Candlestick) Data provides aggregated price action over specific time intervals:
- Open - First trade price in the interval
- High - Highest trade price in the interval
- Low - Lowest trade price in the interval
- Close - Last trade price in the interval
- Volume - Total volume traded in the interval
- VWAP - Volume-weighted average price
- Count - Number of trades in the interval
Prerequisites
Before collecting historical data from Kraken, ensure you have:
- Python 3.7+ installed on your system
- Kraken account registration - Required for API access
- Kraken API credentials (API Key and Secret)
- Basic understanding of cryptocurrency trading concepts
- Sufficient storage space - Data files can be several GB for active pairs
- Stable internet connection - Data collection may run for hours
Account Registration Required: Practical data collection from Kraken requires API authentication to access reasonable batch sizes and request limits. Without authentication, data collection becomes prohibitively slow for any meaningful historical analysis.
Registering a Kraken account is free and requires no trading activity or deposits. Kraken is a fully regulated exchange, licensed in multiple jurisdictions and trusted by millions of users worldwide. Account registration takes only a few minutes and provides the API access essential for efficient data collection.
Kraken API Setup
Account Registration
If you don't have a Kraken account:
- Visit Kraken.com and click Create Account
- Complete email verification and basic information
- No identity verification required for API-only usage
- No deposits or trading required - API access is immediate
Creating API Credentials
- Log into your Kraken account
- Navigate to Settings → API
- Click Generate New Key
- Set permissions to Query Funds and Query Open Orders (minimum required)
- Important: Store your API Secret securely - it's only shown once
Understanding Rate Limits
Kraken enforces rate limiting on their API:
- Authenticated requests: 5000 records per batch, 60 requests per minute
- Rate limit errors: Exponential backoff required
The script handles rate limiting automatically with configurable delays and retry logic.
Code Implementation
Create a new Python file and configure the following parameters at the top:
#!/usr/bin/env python3
"""
Kraken Historical Data Collection Script
Copyright (c) 2025 Volatility.RED and FXGears.com
All rights reserved.
This software is provided for NON-COMMERCIAL USE ONLY.
Permissions:
- Personal use and modification for individual trading and research
- Educational use in academic or learning environments
- Internal use within organizations for research purposes
Restrictions:
- NO REDISTRIBUTION: This code may not be redistributed, shared, or published
in any form, including but not limited to: websites, forums, repositories,
social media, or any other public or private distribution channels
- NO REPUBLISHING: This code may not be republished or included in other
software packages, tutorials, or educational materials
- NO COMMERCIAL USE: This code may not be used for commercial purposes,
including but not limited to: selling, licensing, or incorporating into
commercial products or services
THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND.
THE AUTHORS SHALL NOT BE LIABLE FOR ANY DAMAGES ARISING FROM ITS USE.
"""
import urllib.request
import urllib.parse
import urllib.error
import json
import csv
import hmac
import hashlib
import base64
import time
import os
from datetime import datetime, timedelta
# USER CONFIGURATION - Modify these variables as needed
SYMBOL = "XBTUSD" # Trading pair (e.g., XBTUSD, ETHUSD, ADAUSD)
DATA_TYPE = "trades" # Options: "trades", "ohlc", "spread"
#Not used for "trades" type data
INTERVAL = 1 # For OHLC data: 1, 5, 15, 30, 60, 240, 1440, 10080, 21600 (minutes)
DAYS = 10 # Pull data from X days back until now.
SINCE = int((datetime.now() - timedelta(days=DAYS)).timestamp()) # Unix timestamp or None for all available data
SLEEP_DELAY = 0.01 # Delay between API requests in seconds
RATE_LIMIT_WAIT = 5 # Base wait time (in seconds) for rate limit errors (exponential backoff multiplier)
BATCH_SIZE = 5000 # Number of records to request per API call
OUTPUT_FILE = "kraken_{}_{}_data_{}_to_{}.csv" # Output filename
API_KEY = "" # Your Kraken API key
API_SECRET = "" # Your Kraken API secret
# Kraken API endpoints
BASE_URL = "https://api.kraken.com/0/public/"
PRIVATE_URL = "https://api.kraken.com/0/private/"
def get_auth_headers(uri_path, query_params):
"""Generate authentication headers for Kraken API"""
if not API_KEY or not API_SECRET:
return {}, ""
try:
nonce = str(int(time.time() * 1000)) # Use milliseconds for nonce
# Separate query string (API params) from body (nonce only)
query_str = urllib.parse.urlencode(query_params) if query_params else ""
body = {"nonce": nonce}
body_str = json.dumps(body)
# Kraken's signature method: path + SHA256(nonce + query_str + body_str)
combined_data = nonce + query_str + body_str
sha256_hash = hashlib.sha256(combined_data.encode()).digest()
message = uri_path.encode() + sha256_hash
signature = hmac.new(base64.b64decode(API_SECRET), message, hashlib.sha512)
headers = {
'API-Key': API_KEY,
'API-Sign': base64.b64encode(signature.digest()).decode(),
'Content-Type': 'application/json'
}
return headers, body_str
except Exception as e:
print(f"Auth headers: Error generating signature - {e}")
import traceback
traceback.print_exc()
return {}, ""
def make_request(url, params, use_auth=False, max_retries=3):
"""Make API request with optional authentication and retry logic"""
for attempt in range(max_retries):
try:
# Build URL with query parameters
query_str = urllib.parse.urlencode(params) if params else ""
full_url = f"{url}?{query_str}" if query_str else url
headers = {}
body_data = None
if use_auth and API_KEY and API_SECRET:
uri_path = url.replace('https://api.kraken.com', '')
headers, body_str = get_auth_headers(uri_path, params)
body_data = body_str.encode() if body_str else None
if headers and attempt == 0:
print(f"AUTH: Using authenticated GET request to {uri_path}")
else:
if attempt == 0:
print("AUTH: Using public GET request (no authentication)")
req = urllib.request.Request(full_url, data=body_data, headers=headers)
response = urllib.request.urlopen(req)
response_text = response.read().decode()
data = json.loads(response_text)
except urllib.error.HTTPError as e:
print(f"HTTP Error {e.code}: {e.read().decode()}")
return {'error': [f'HTTP {e.code} error']}
except Exception as e:
print(f"DEBUG: Failed to parse JSON or make request: {e}")
return {'error': ['Failed to parse response']}
# Debug: Check response for auth issues
if 'error' in data and data['error']:
if any('invalid' in str(err).lower() or 'auth' in str(err).lower() for err in data['error']):
print(f"AUTH ERROR: {data['error']} - Check API credentials")
# Check for rate limit error
if 'error' in data and any('too many requests' in str(err).lower() for err in data['error']):
wait_time = (2 ** attempt) * RATE_LIMIT_WAIT # Exponential backoff
print(f"RATE LIMIT: Hit limit (attempt {attempt + 1}), waiting {wait_time} seconds...")
time.sleep(wait_time)
continue
return data
return data # Return last response if all retries failed
def get_historical_trades(pair, filename, since=None):
"""Get all historical trade data with pagination and write directly to CSV"""
url = f"{BASE_URL}Trades"
current_since = since
total_records = 0
first_batch = True
with open(filename, 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(['timestamp', 'price', 'volume', 'buy_sell', 'market_limit', 'misc'])
while True:
params = {"pair": pair, "count": BATCH_SIZE}
if current_since:
params["since"] = current_since
data = make_request(url, params, use_auth=True)
if 'error' in data and data['error']:
return {'error': data['error']}
batch_count = 0
for pair_key in data['result']:
if pair_key != 'last':
trades = data['result'][pair_key]
batch_count = len(trades)
for trade in trades:
writer.writerow([
datetime.fromtimestamp(float(trade[2])),
trade[0], trade[1], trade[3], trade[4], trade[5]
])
total_records += batch_count
if 'last' in data['result'] and batch_count == BATCH_SIZE:
print(f"Fetched {total_records} records...")
current_since = data['result']['last']
time.sleep(SLEEP_DELAY)
else:
break
return {'total_records': total_records}
def get_historical_ohlc(pair, filename, interval=1, since=None):
"""Get all historical OHLC data with pagination and write directly to CSV"""
url = f"{BASE_URL}OHLC"
current_since = since
total_records = 0
with open(filename, 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(['timestamp', 'open', 'high', 'low', 'close', 'vwap', 'volume', 'count'])
while True:
params = {"pair": pair, "interval": interval}
if current_since:
params["since"] = current_since
data = make_request(url, params, use_auth=True)
if 'error' in data and data['error']:
return {'error': data['error']}
batch_count = 0
for pair_key in data['result']:
if pair_key != 'last':
ohlc_data = data['result'][pair_key]
batch_count = len(ohlc_data)
for candle in ohlc_data:
writer.writerow([
datetime.fromtimestamp(float(candle[0])),
candle[1], candle[2], candle[3], candle[4], candle[5], candle[6], candle[7]
])
total_records += batch_count
if 'last' in data['result'] and batch_count == 720:
print(f"Fetched {total_records} records...")
current_since = data['result']['last']
time.sleep(SLEEP_DELAY)
else:
break
return {'total_records': total_records}
# Dispatch table for data type handlers
DATA_HANDLERS = {
"trades": lambda symbol, filename, since: get_historical_trades(symbol, filename, since),
"ohlc": lambda symbol, filename, since: get_historical_ohlc(symbol, filename, INTERVAL, since)
}
def test_authentication():
"""Test if authentication is working by calling a private endpoint"""
url = f"{PRIVATE_URL}OpenOrders"
params = {}
print(f"DEBUG: Testing auth with URL: {url}")
data = make_request(url, params, use_auth=True)
print(f"DEBUG: Full response: {data}")
if 'error' in data and data['error']:
# Check if it's a permission error vs auth error
if any('permission' in str(err).lower() for err in data['error']):
print("AUTH TEST PASSED: Authentication working but insufficient permissions")
return True
else:
print(f"AUTH TEST FAILED: {data['error']}")
return False
else:
print("AUTH TEST PASSED: Authentication is working")
return True
def main():
# Check for required authentication
if not API_KEY or not API_SECRET:
print("ERROR: API_KEY and API_SECRET are required for data collection.")
print("Please set your Kraken API credentials in the script configuration.")
print("Register a free account at https://geni.us/GoKraken to get API access.")
exit(1)
print(f"Downloading {DATA_TYPE} data for {SYMBOL}...")
print(f"Fetching data from {datetime.fromtimestamp(SINCE)} to now...")
print("Using authenticated requests for optimal performance")
try:
start_date = datetime.fromtimestamp(SINCE).strftime('%Y%m%d')
end_date = datetime.now().strftime('%Y%m%d')
filename = OUTPUT_FILE.format(SYMBOL, DATA_TYPE, start_date, end_date)
if os.path.exists(filename):
os.remove(filename)
print(f"Removed existing file: {filename}")
handler = DATA_HANDLERS.get(DATA_TYPE)
if not handler:
print(f"Invalid DATA_TYPE '{DATA_TYPE}'. Use: {', '.join(DATA_HANDLERS.keys())}")
exit(1)
result = handler(SYMBOL, filename, SINCE)
if 'error' in result:
print(f"API Error: {result['error']}")
exit(1)
else:
print(f"Data saved to {filename} ({result['total_records']} records)")
except Exception as e:
print(f"Error: {e}")
exit(1)
if __name__ == "__main__":
main()
Configuration Parameters
The script uses several key configuration variables:
| Parameter | Description | Example Values |
|---|---|---|
SYMBOL |
Trading pair to collect | "XBTUSD", "ETHUSD", "ADAUSD" |
DATA_TYPE |
Type of data to collect | "trades", "ohlc", "spread" |
INTERVAL |
OHLC timeframe (minutes) | 1, 5, 15, 30, 60, 240, 1440 |
DAYS |
Historical lookback period | 7, 30, 90 |
BATCH_SIZE |
Records per API request | 5000 (recommended) |
SLEEP_DELAY |
Delay between requests | 0.01 to 1.0 seconds |
API_KEY |
Your Kraken API key | "your_api_key_here" |
API_SECRET |
Your Kraken API secret | "your_api_secret_here" |
Tick (Trades) Data Collection
Configuration for Tick Data
To collect individual trade data, set these parameters:
SYMBOL = "XBTUSD" # Trading pair
DATA_TYPE = "trades" # Collect tick data
DAYS = 7 # Last 7 days of data
Running for Tick Data
python kraken_historical_data.py
The script will:
- Authenticate with Kraken API
- Begin fetching trade data in batches of 5000
- Stream data directly to CSV file
- Display progress every 1000 records
- Handle rate limits automatically
Tick Data Format
Tick data CSV files contain the following columns:
| Column | Description | Example Value |
|---|---|---|
| timestamp | Trade execution time | 2024-01-15 14:23:17.123456 |
| price | Execution price | 42350.50 |
| volume | Trade volume | 0.15234 |
| buy_sell | Trade direction | "b" (buy) or "s" (sell) |
| market_limit | Order type | "m" (market) or "l" (limit) |
| misc | Additional flags | "" (usually empty) |
Sample tick data output:
timestamp,price,volume,buy_sell,market_limit,misc 2024-01-15 14:23:17.123456,42350.50,0.15234,b,m, 2024-01-15 14:23:17.234567,42351.00,0.05000,b,l, 2024-01-15 14:23:18.345678,42349.75,0.25000,s,m,
OHLC (Minute Bar) Data Collection
Configuration for OHLC Data
To collect OHLC candlestick data, set these parameters:
SYMBOL = "XBTUSD" # Trading pair
DATA_TYPE = "ohlc" # Collect OHLC data
INTERVAL = 1 # 1-minute bars
DAYS = 30 # Last 30 days of data
Available Timeframes
Kraken supports the following OHLC intervals (in minutes):
- 1 - 1 minute bars
- 5 - 5 minute bars
- 15 - 15 minute bars
- 30 - 30 minute bars
- 60 - 1 hour bars
- 240 - 4 hour bars
- 1440 - Daily bars
- 10080 - Weekly bars
- 21600 - Monthly bars
OHLC Data Format
OHLC data CSV files contain the following columns:
| Column | Description | Example Value |
|---|---|---|
| timestamp | Candle open time | 2024-01-15 14:23:00.000000 |
| open | Opening price | 42350.50 |
| high | Highest price | 42375.25 |
| low | Lowest price | 42340.00 |
| close | Closing price | 42360.75 |
| vwap | Volume weighted average price | 42355.12 |
| volume | Total volume | 15.234567 |
| count | Number of trades | 127 |
Sample OHLC data output:
timestamp,open,high,low,close,vwap,volume,count 2024-01-15 14:23:00.000000,42350.50,42375.25,42340.00,42360.75,42355.12,15.234567,127 2024-01-15 14:24:00.000000,42360.75,42380.00,42355.50,42370.25,42368.45,12.567890,98 2024-01-15 14:25:00.000000,42370.25,42385.75,42365.00,42375.50,42374.22,18.901234,156
File Naming and Output
File Naming Convention
Output files follow these patterns:
- Tick data:
kraken_trades_SYMBOL_data_YYYY-MM-DD_to_YYYY-MM-DD.csv - OHLC data:
kraken_ohlc_SYMBOL_data_YYYY-MM-DD_to_YYYY-MM-DD.csv
Examples:
kraken_trades_XBTUSD_data_2024-01-08_to_2024-01-15.csvkraken_ohlc_XBTUSD_data_2024-01-08_to_2024-01-15.csv
Performance Considerations
Memory Usage
The script streams data directly to CSV files rather than storing in memory, allowing collection of unlimited historical data without memory constraints.
File Sizes
Typical file sizes for major pairs:
Tick Data:
- Bitcoin (XBTUSD): ~50-100MB per day
- Ethereum (ETHUSD): ~30-60MB per day
- Altcoins: ~5-20MB per day
OHLC Data (1-minute bars):
- Bitcoin (XBTUSD): ~2-5MB per day
- Ethereum (ETHUSD): ~1-3MB per day
- Altcoins: ~0.5-1MB per day
Collection Speed
With optimal settings:
- Tick data: ~300,000 trades per hour
- OHLC data: ~500,000 bars per hour
- Full week of BTC tick data: ~2-4 hours
- Full year of BTC 1-minute bars: ~30 minutes
Troubleshooting
Common Issues
Rate Limit Errors
RATE LIMIT: Hit limit (attempt 1), waiting 5 seconds...
- Solution: Script handles automatically with exponential backoff
- Prevention: Increase
SLEEP_DELAYto 0.1 or higher
Authentication Errors
AUTH ERROR: ['EAPI:Invalid key'] - Check API credentials
- Solution: Verify API key and secret are correct
- Check: Ensure API permissions include required access
Network Timeouts
DEBUG: Failed to parse JSON or make request: timeout
- Solution: Script retries automatically (3 attempts)
- Prevention: Ensure stable internet connection
Invalid Trading Pair
{'error': ['EQuery:Unknown asset pair']}
- Solution: Use Kraken's official pair names (XBTUSD, not BTCUSD)
- Reference: Check Kraken Asset Pairs
Optimization Tips
- Set BATCH_SIZE to 5000 for maximum efficiency
- Use minimal SLEEP_DELAY (0.01s) for maximum speed
- Monitor rate limits - script will auto-adjust if needed
- Run during low-activity periods for faster collection
Advanced Usage
Collecting Multiple Pairs and Data Types
Modify the script to loop through multiple configurations:
# Collect both tick and OHLC data for multiple pairs
configs = [
{"symbol": "XBTUSD", "data_type": "trades"},
{"symbol": "XBTUSD", "data_type": "ohlc", "interval": 1},
{"symbol": "ETHUSD", "data_type": "trades"},
{"symbol": "ETHUSD", "data_type": "ohlc", "interval": 5},
]
for config in configs:
# Run collection for each configuration
Custom Date Ranges
Set specific start/end dates instead of days lookback:
from datetime import datetime
SINCE = int(datetime(2024, 1, 1).timestamp()) # Start from Jan 1, 2024
Integration with Analysis Tools
The CSV output integrates seamlessly with:
- Pandas for data analysis
- NumPy for numerical computation
- Matplotlib/Plotly for visualization
- Zipline/Backtrader for backtesting
- TimescaleDB for time-series storage
Next Steps
Once you have historical data collected:
With Tick Data:
- Analyze market microstructure patterns
- Build order flow indicators
- Develop high-frequency strategies
- Create custom aggregations (volume bars, tick bars)
- Backtest algorithms with realistic execution modeling
With OHLC Data:
- Build technical indicators (RSI, MACD, Bollinger Bands)
- Develop swing trading strategies
- Create multi-timeframe analysis
- Backtest position-based algorithms
- Perform statistical analysis of price movements