The Business Analyst Agent frames questions, aligns metric definitions, discovers data, produces validated queries & summaries, and turns findings into actionable tasks and artifacts.

Purpose

Translate business questions into precise metrics and reproducible analyses.

Scope

Requirements & metric definitions, data discovery, queries/validation, visuals & summaries, backlog of questions.

Design Goals

Grounded SQL/tables/docs, traceable assumptions, read-first safe defaults on production data.

SQL Prototypes

PMPM (approximate denominator = distinct patients with paid claims per month)
with claims as (
  select
    strftime('%Y-%m-01', CLAIM_DATE) as month_start,
    CLAIM_TYPE,
    PATIENT_ID,
    coalesce(INSURANCE_PAID, 0.0) + coalesce(PATIENT_RESPONSIBILITY, 0.0) as allowed_amt
  from CLAIMS
  where CLAIM_DATE >= date('now','-6 months')
    and CLAIM_STATUS = 'PAID'
),
members as (
  select month_start, CLAIM_TYPE, count(distinct PATIENT_ID) as members
  from claims
  group by month_start, CLAIM_TYPE
),
agg as (
  select month_start, CLAIM_TYPE, sum(allowed_amt) as total_allowed
  from claims
  group by month_start, CLAIM_TYPE
)
select
  a.month_start,
  a.CLAIM_TYPE,
  a.total_allowed,
  m.members,
  case when m.members > 0 then a.total_allowed * 1.0 / m.members end as pmpm_allowed
from agg a
join members m using(month_start, CLAIM_TYPE)
order by 1,2
limit 1000;
30-day readmission rate (inpatient claims; next admission within 30 days of discharge)
with ip as (
  select
    c.PATIENT_ID,
    c.ADMISSION_DATE as admit_dt,
    c.DISCHARGE_DATE as discharge_dt,
    (select d.DIAGNOSIS_CODE
     from CLAIM_DETAILS d
     where d.CLAIM_ID = c.CLAIM_ID
     order by d.SERVICE_DATE limit 1) as diagnosis_code
  from CLAIMS c
  where c.CLAIM_TYPE = 'INPATIENT'
    and c.DISCHARGE_DATE is not null
    and c.DISCHARGE_DATE >= date('now','-6 months')
),
chain as (
  select
    PATIENT_ID,
    admit_dt,
    discharge_dt,
    diagnosis_code,
    lead(admit_dt) over (partition by PATIENT_ID order by admit_dt) as next_admit_dt
  from ip
),
flagged as (
  select *,
         case
           when next_admit_dt is not null
            and (julianday(next_admit_dt) - julianday(discharge_dt)) between 0 and 30
           then 1 else 0
         end as readmit_30d
  from chain
)
select
  strftime('%Y-%m-01', discharge_dt) as discharge_month,
  coalesce(diagnosis_code, 'UNKNOWN') as diagnosis_code,
  count(*) as discharges,
  sum(readmit_30d) as readmits_30d,
  sum(readmit_30d) * 1.0 / nullif(count(*), 0) as readmit_rate_30d
from flagged
group by 1,2
order by 1,2
limit 1000;

Data Flow Overview