I'm working on an app to display content shared by users and I'm using PostgreSQL Arrays to handle the security model.
We support public and private content and I have two queries that I need to optimize. From PostgreSQL documentation I need to use GIN indexes when indexing array columns but I can't get PostgreSQL to pick them.
Here is my data and indexes definition:
-- Table: public.usershares
-- DROP TABLE public.usershares;
CREATE TABLE public.usershares
(
id integer NOT NULL,
title text,
sharedcontent text,
shared_on timestamp without time zone,
roles text[],
claims text[],
users integer[],
private boolean,
CONSTRAINT pk_id PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
ALTER TABLE public.usershares
OWNER TO postgres;
-- Index: public.idx_arrays_private
-- DROP INDEX public.idx_arrays_private;
CREATE INDEX idx_arrays_private
ON public.usershares
USING gin
(roles COLLATE pg_catalog."default", claims COLLATE pg_catalog."default", users)
WHERE private = true AND claims <> '{}'::text[];
-- Index: public.idx_arrays_public
-- DROP INDEX public.idx_arrays_public;
CREATE INDEX idx_arrays_public
ON public.usershares
USING gin
(roles COLLATE pg_catalog."default", users)
WHERE private = false AND claims = '{}'::text[];
-- Index: public.idx_sharedon
-- DROP INDEX public.idx_sharedon;
CREATE INDEX idx_sharedon
ON public.usershares
USING btree
(shared_on DESC);
-- Index: public.idx_sharedon_notprivate
-- DROP INDEX public.idx_sharedon_notprivate;
CREATE INDEX idx_sharedon_notprivate
ON public.usershares
USING btree
(shared_on DESC)
WHERE private = false;
-- Index: public.idx_sharedon_private
-- DROP INDEX public.idx_sharedon_private;
CREATE INDEX idx_sharedon_private
ON public.usershares
USING btree
(shared_on DESC)
WHERE private = true;
QUERY #1: Although I have a partial GIN index for this query 'idx_arrays_private', PostgreSQL uses the 'idx_sharedon_private' (which is also a partial index but that doesn't include the array columns (roles, claims and users)
select *
from usershares
where
( usershares.private = true
and usershares.claims != '{}'
and ( ( array['adminX'] && usershares.roles /* to see private content user has to belong to one of the roles */
and array['managerX'] <@ usershares.claims ) /* and have all the required claims */
or array[]::integer[] && usershares.users /* or just be member of the list of authorized users */ ) )
order by shared_on desc
limit 100
offset 100;
QUERY #2: Although I also have a partial GIN index for this query 'idx_arrays_public', PostgreSQL uses the 'idx_sharedon_notprivate' (which is also a partial index but that doesn't include the array columns (roles, claims and users)
select *
from usershares
where
( usershares.private = false
and usershares.claims = '{}'
and ( array['admin'] && usershares.roles /* to see public content user has to belong to one of the roles */
or array[1,2,3,4,5] && usershares.users /* or be a member of the list of authorized users */
) )
order by shared_on desc
limit 100
offset 100;
Script to generate test data:
TRUNCATE TABLE usershares;
INSERT INTO usershares
(id,
title,
sharedcontent,
shared_on,
roles,
claims,
users,
private)
SELECT x.id,
'title #'
|| x.id,
'content #'
|| x.id,
Now(),
array['admin','registered'],
'{}',
'{}',
false
FROM Generate_series(1, 500000) AS x(id);
INSERT INTO usershares
(id,
title,
sharedcontent,
shared_on,
roles,
claims,
users,
private)
SELECT x.id,
'title #'
|| x.id,
'content #'
|| x.id,
Now(),
array['admin','registered'],
array['manager', 'director'],
array[1,2,3,4,5,6,7,8,9,10],
true
FROM Generate_series(500001, 750000) AS x(id);
INSERT INTO usershares
(id,
title,
sharedcontent,
shared_on,
roles,
claims,
users,
private)
SELECT x.id,
'title #'
|| x.id,
'content #'
|| x.id,
Now(),
array['adminX','registeredX'],
array['managerX', 'directorX'],
'{}',
true
FROM Generate_series(750001, 1000000) AS x(id);