Workflow fields
We recommend creating this workflow in the anyformat platform where you can test with sample statements and iterate on field descriptions. Copy the workflow ID to use with the API.
| Field | Type | Description |
|---|---|---|
account_holder | string | Name on the account |
account_number | string | Account number (masked or full) |
statement_period_start | date | Start of statement period |
statement_period_end | date | End of statement period |
opening_balance | float | Balance at start of period |
closing_balance | float | Balance at end of period |
total_deposits | float | Sum of all deposits |
total_withdrawals | float | Sum of all withdrawals |
transaction_count | integer | Number of transactions |
transactions | object | Individual transactions (date / description / amount / type) |
End-to-end
- curl
- TypeScript
- Python
# 1. Create the workflow
curl -X POST 'https://api.anyformat.ai/v2/workflows/' \
-H 'Content-Type: application/json' \
-H "Authorization: Bearer $ANYFORMAT_API_KEY" \
-d '{
"name": "Bank Statement Processor",
"description": "Extract statement summary and transactions",
"nodes": [
{"id": "parse_1", "type": "parse"},
{
"id": "extract_1",
"type": "extract",
"extraction_schema": {
"fields": [
{"name": "account_holder", "description": "Name of the account holder as shown on the statement", "data_type": "string"},
{"name": "account_number", "description": "Bank account number, may be partially masked", "data_type": "string"},
{"name": "statement_period_start", "description": "First day of the statement period", "data_type": "date"},
{"name": "statement_period_end", "description": "Last day of the statement period", "data_type": "date"},
{"name": "opening_balance", "description": "Account balance at the start of the period", "data_type": "float"},
{"name": "closing_balance", "description": "Account balance at the end of the period", "data_type": "float"},
{"name": "total_deposits", "description": "Total amount deposited during the statement period", "data_type": "float"},
{"name": "total_withdrawals", "description": "Total amount withdrawn during the statement period", "data_type": "float"},
{"name": "transaction_count", "description": "Total number of transactions in the statement period", "data_type": "integer"},
{
"name": "transactions",
"description": "Individual transactions listed on the statement",
"data_type": "object",
"nested_fields": [
{"name": "date", "description": "Date of the transaction", "data_type": "date"},
{"name": "description", "description": "Transaction description or memo", "data_type": "string"},
{"name": "amount", "description": "Transaction amount (positive for deposits, negative for withdrawals)", "data_type": "float"},
{
"name": "type",
"description": "Type of transaction",
"data_type": "enum",
"enum_options": [
{"name": "deposit", "description": "Incoming deposit"},
{"name": "withdrawal", "description": "Outgoing withdrawal"},
{"name": "fee", "description": "Bank fee or charge"},
{"name": "transfer", "description": "Transfer between accounts"},
{"name": "interest", "description": "Interest earned or charged"}
]
}
]
}
]
}
}
],
"edges": [{"source": "parse_1", "target": "extract_1"}]
}'
# 2. Run each statement (stay under the 60 req/min file-submission limit)
for f in statement-jan.pdf statement-feb.pdf statement-mar.xlsx; do
curl -X POST 'https://api.anyformat.ai/v2/workflows/WORKFLOW_ID/run/' \
-H "Authorization: Bearer $ANYFORMAT_API_KEY" \
-F "file=@$f"
sleep 1
done
# 3. Poll each collection_id for results
curl -H "Authorization: Bearer $ANYFORMAT_API_KEY" \
'https://api.anyformat.ai/v2/workflows/WORKFLOW_ID/files/COLLECTION_ID/results/'
import { Anyformat, Schema } from "@anyformat/sdk";
const af = new Anyformat({ apiKey: process.env.ANYFORMAT_API_KEY! });
// Create the workflow once, then reuse the handle for every statement.
const workflow = await af
.workflow("Bank Statement Processor", "Extract statement summary and transactions")
.parse()
.extract([
Schema.string("account_holder", "Name of the account holder as shown on the statement"),
Schema.string("account_number", "Bank account number, may be partially masked"),
Schema.date("statement_period_start", "First day of the statement period"),
Schema.date("statement_period_end", "Last day of the statement period"),
Schema.float("opening_balance", "Account balance at the start of the period"),
Schema.float("closing_balance", "Account balance at the end of the period"),
Schema.float("total_deposits", "Total amount deposited during the statement period"),
Schema.float("total_withdrawals", "Total amount withdrawn during the statement period"),
Schema.integer("transaction_count", "Total number of transactions in the statement period"),
Schema.object("transactions", "Individual transactions listed on the statement", [
Schema.date("date", "Date of the transaction"),
Schema.string("description", "Transaction description or memo"),
Schema.float("amount", "Transaction amount (positive for deposits, negative for withdrawals)"),
Schema.enum("type", "Type of transaction", [
Schema.option("deposit", "Incoming deposit"),
Schema.option("withdrawal", "Outgoing withdrawal"),
Schema.option("fee", "Bank fee or charge"),
Schema.option("transfer", "Transfer between accounts"),
Schema.option("interest", "Interest earned or charged"),
]),
]),
])
.create();
// Submit every statement against the existing workflow id.
const files: File[] = /* an array of File objects with .name set */;
for (const file of files) {
const run = await workflow.run(file);
const result = await run.wait();
console.log(result.field("account_holder")?.value, result.field("closing_balance")?.value);
}
import os
from anyformat.sdk import Client
from anyformat.workflow import Schema
client = Client(api_key=os.environ["ANYFORMAT_API_KEY"])
workflow = (
client.workflow("Bank Statement Processor")
.parse()
.extract([
Schema.string("account_holder", "Name of the account holder as shown on the statement"),
Schema.string("account_number", "Bank account number, may be partially masked"),
Schema.date("statement_period_start", "First day of the statement period"),
Schema.date("statement_period_end", "Last day of the statement period"),
Schema.float("opening_balance", "Account balance at the start of the period"),
Schema.float("closing_balance", "Account balance at the end of the period"),
Schema.float("total_deposits", "Total amount deposited during the statement period"),
Schema.float("total_withdrawals", "Total amount withdrawn during the statement period"),
Schema.integer("transaction_count", "Total number of transactions in the statement period"),
Schema.object("transactions", "Individual transactions listed on the statement", fields=[
Schema.date("date", "Date of the transaction"),
Schema.string("description", "Transaction description or memo"),
Schema.float("amount", "Transaction amount (positive for deposits, negative for withdrawals)"),
Schema.enum("type", "Type of transaction", options=[
Schema.option("deposit", "Incoming deposit"),
Schema.option("withdrawal", "Outgoing withdrawal"),
Schema.option("fee", "Bank fee or charge"),
Schema.option("transfer", "Transfer between accounts"),
Schema.option("interest", "Interest earned or charged"),
]),
]),
])
.create()
)
for path in ["statement-jan.pdf", "statement-feb.pdf", "statement-mar.xlsx"]:
result = workflow.run(path).wait()
print(result.fields["account_holder"].value, result.fields["closing_balance"].value)
# Iterate transactions from the raw extractions array
rows = result.raw["extractions"][0]["fields"]["transactions"]
for txn in rows:
if txn["type"]["value"] == "fee":
print(f"Fee: {txn['description']['value']} — ${abs(txn['amount']['value'])}")
Tips
XLSX statements yield better results than scanned PDFs — the data is structured in cells rather than requiring OCR.
- Describe
amountas “positive for deposits, negative for withdrawals” to get a consistent sign convention. - Submit serially with a small delay (or run multiple workflows in parallel) to stay under the 60 req/min file-submission limit.
integertransaction_countis a quick sanity check against the length of the extracted rows.
Next steps
Response formats
The unified JSON response shape
List files
Enumerate all processed files in a workflow
