graphile / crystal

🔮 Graphile's Crystal Monorepo; home to Grafast, PostGraphile, pg-introspection, pg-sql2 and much more!
https://graphile.org/
Other
12.62k stars 571 forks source link

Non-deterministic sorting when querying a `setof` function and relational data #2210

Open Tam opened 1 month ago

Tam commented 1 month ago

Summary

Original discussion on Discord https://discord.com/channels/489127045289476126/1293563114125787197

When querying a function that returns a pre-sorted setof, the values returned are non-deterministic if relational data is also queried.

Steps to reproduce

A "minimal" database structure and sample data:

create role support_test;
create role super_test;

-- Asset
create table public.test_asset (
  id uuid primary key
, url varchar
, created_at timestamptz default now()
);

grant select on public.test_asset to public;

create function public.test_asset_src (asset public.test_asset) returns varchar as $$
begin
  return asset.url;
end;
$$ language plpgsql stable;

grant execute on function public.test_asset_src(public.test_asset) to public;

-- Account

create table public.test_account (
  id uuid primary key
);

alter table public.test_account enable row level security;
grant select on public.test_account to public;

create policy can_select_support_test on public.test_account for select to support_test using (false);
create policy can_select_super_test on public.test_account for select to super_test using (true);

-- User
create table public.test_user (
  test_account_id uuid primary key references public.test_account(id)
, test_asset_id uuid references public.test_asset (id)
, name varchar
, created_at timestamptz default now()
);

alter table public.test_user enable row level security;
grant select on public.test_user to public;

create policy can_select_support_test on public.test_user for select to support_test using (exists(select 1 from public.test_account where id = test_account_id));
create policy can_select_super_test on public.test_user for select to super_test using (exists(select 1 from public.test_account where id = test_account_id));

create index test_user_test_account_id_idx on public.test_user(test_account_id);
create index test_user_test_asset_id_idx on public.test_user(test_asset_id);

comment on column public.test_user.test_account_id is E'@name id';

-- Chat

create table public.test_chat (
  id uuid primary key
);

grant select on public.test_chat to public;

-- Message

create table public.test_message (
  id uuid primary key
, test_chat_id uuid references public.test_chat (id)
, test_user_id uuid references public.test_user (test_account_id)
, message text
, created_at timestamptz
);

grant select on public.test_message to public;

create index test_message_test_chat_id_idx on public.test_message(test_chat_id);
create index test_message_test_user_id_idx on public.test_message(test_user_id);

comment on table public.test_message is E'@behaviour -connection';

-- Messages

create function public.test_messages (test_chat_id uuid) returns setof public.test_message as $$
declare
  _aThing int;
begin
  select count(*) into _aThing
    from public.test_message m
   where m.test_chat_id = $1;

  if _aThing > 100 then
    raise exception 'Please don''t inline me';
  else
    return query select m.* from public.test_message m where m.test_chat_id = $1 order by created_at desc;
  end if;
end
$$ language plpgsql stable;

grant execute on function public.test_messages(uuid) to public;
comment on function public.test_messages(uuid) is E'@behaviour +connection';

-- Sample data

insert into public.test_asset (id, url)
values ('f9c8397a-97bb-4ffe-be3f-86c1eb71c03d', 'https://picsum.photos/200');

insert into public.test_account (id)
values ('a13b8bac-f2c7-4444-bac6-4ae7c9c28bbc')
     , ('935945c1-d824-4a98-93e5-c22215c58982');

insert into public.test_user (test_account_id, name, test_asset_id)
values ('a13b8bac-f2c7-4444-bac6-4ae7c9c28bbc', 'Bob', 'f9c8397a-97bb-4ffe-be3f-86c1eb71c03d')
     , ('935945c1-d824-4a98-93e5-c22215c58982', 'John', null);

insert into public.test_chat (id)
values ('0d126c0c-9710-478c-9aee-0be34b250573')
     , ('c46b4b59-0a29-4211-8e0f-659cb3e01c2f');

insert into public.test_message (id, test_chat_id, message, test_user_id, created_at)
values ('e0849772-7070-4fdf-8438-1ef846fc0daf', '0d126c0c-9710-478c-9aee-0be34b250573', 'Bob says 3', 'a13b8bac-f2c7-4444-bac6-4ae7c9c28bbc', now() - '1 minute'::interval)
     , ('c8a660af-7021-4360-b019-ee404014b3cb', '0d126c0c-9710-478c-9aee-0be34b250573', 'Bob says 2', 'a13b8bac-f2c7-4444-bac6-4ae7c9c28bbc', now() - '3 minutes'::interval)
     , ('6e2db5cb-8757-4b8a-9d19-a6a676a214d2', '0d126c0c-9710-478c-9aee-0be34b250573', 'John says 3', '935945c1-d824-4a98-93e5-c22215c58982', now() - '2 minutes'::interval)
     , ('7dbc5c82-3c1f-463e-a97a-aaff09dc8a28', '0d126c0c-9710-478c-9aee-0be34b250573', 'Bob says 1', 'a13b8bac-f2c7-4444-bac6-4ae7c9c28bbc', now() - '5 minutes'::interval)
     , ('5751f977-209d-45ab-8620-b647ff67ded6', 'c46b4b59-0a29-4211-8e0f-659cb3e01c2f', 'A different chat', 'a13b8bac-f2c7-4444-bac6-4ae7c9c28bbc', now() - '3 minutes 30 seconds'::interval)
     , ('8b9e89dc-2e1b-461a-94d5-3afafa4f87ad', '0d126c0c-9710-478c-9aee-0be34b250573', 'John says 2', '935945c1-d824-4a98-93e5-c22215c58982', now() - '4 minutes'::interval)
     , ('cc20ffeb-0701-4619-acc3-4a9b67671272', '0d126c0c-9710-478c-9aee-0be34b250573', 'John says 1', '935945c1-d824-4a98-93e5-c22215c58982', now() - '6 minutes'::interval)

And this GraphQL query (assuming you are connected to the database as a super_test role):

query Test ($after: Cursor) {
  testMessages (
    testChatId: "0d126c0c-9710-478c-9aee-0be34b250573"
    first: 50
    after: $after
  ) {
    nodes {
      id
      message
      createdAt
      testUser {
        id
        name
      }
    }
    pageInfo {
      hasNextPage
      endCursor
    }
  }
}

Expected results

The messages should be sorted by createdAt, as specified in the public.test_messages function. (The correct order in the example above is messages counting down from 3 to 1, alternating between Bob and John each message).

Actual results

The messages are sorted somewhat randomly (in my case it was sorting by User.id).

Possible Solution

Quoting Benji on Discord:

My guess is that we're treating the function as a table, and then doing a left join to users at which point the order of values returned from the function is no longer guaranteed (Postgres can choose any order it wishes if it's not explicit; it just happens that it sticks to function order when you only query the function). In that case, what we should do is select ... from my_func() with ordinality left join users ... order by ordinality or select ... from (select row_number() over (partition by 1), v from my_func() v) t left join users ... order by t.row_number or something like that.