perseas / Pyrseas

Provides utilities for Postgres database schema versioning.
https://perseas.github.io/
BSD 3-Clause "New" or "Revised" License
395 stars 67 forks source link

Add support Full Text Search (ALTER MAPPING FOR) #135

Open pyoner opened 8 years ago

pyoner commented 8 years ago

I have a sql file:

CREATE TEXT SEARCH CONFIGURATION fr ( COPY = french );
ALTER TEXT SEARCH CONFIGURATION fr ALTER MAPPING
FOR hword, hword_part, word WITH unaccent, french_stem;

CREATE TEXT SEARCH CONFIGURATION en ( COPY = english );
ALTER TEXT SEARCH CONFIGURATION en ALTER MAPPING
FOR hword, hword_part, word WITH unaccent, english_stem;

CREATE TEXT SEARCH CONFIGURATION de ( COPY = german );
ALTER TEXT SEARCH CONFIGURATION de ALTER MAPPING
FOR hword, hword_part, word WITH unaccent, german_stem;

CREATE TEXT SEARCH CONFIGURATION nl ( COPY = dutch );
ALTER TEXT SEARCH CONFIGURATION nl ALTER MAPPING
FOR hword, hword_part, word WITH unaccent, dutch_stem;

CREATE TEXT SEARCH CONFIGURATION da ( COPY = danish );
ALTER TEXT SEARCH CONFIGURATION da ALTER MAPPING
FOR hword, hword_part, word WITH unaccent, danish_stem;

CREATE TEXT SEARCH CONFIGURATION fi ( COPY = finnish );
ALTER TEXT SEARCH CONFIGURATION fi ALTER MAPPING
FOR hword, hword_part, word WITH unaccent, finnish_stem;

CREATE TEXT SEARCH CONFIGURATION hu ( COPY = hungarian );
ALTER TEXT SEARCH CONFIGURATION hu ALTER MAPPING
FOR hword, hword_part, word WITH unaccent, hungarian_stem;

CREATE TEXT SEARCH CONFIGURATION it ( COPY = italian );
ALTER TEXT SEARCH CONFIGURATION it ALTER MAPPING
FOR hword, hword_part, word WITH unaccent, italian_stem;

CREATE TEXT SEARCH CONFIGURATION no ( COPY = norwegian );
ALTER TEXT SEARCH CONFIGURATION no ALTER MAPPING
FOR hword, hword_part, word WITH unaccent, norwegian_stem;

CREATE TEXT SEARCH CONFIGURATION pt ( COPY = portuguese );
ALTER TEXT SEARCH CONFIGURATION pt ALTER MAPPING
FOR hword, hword_part, word WITH unaccent, portuguese_stem;

CREATE TEXT SEARCH CONFIGURATION ro ( COPY = romanian );
ALTER TEXT SEARCH CONFIGURATION ro ALTER MAPPING
FOR hword, hword_part, word WITH unaccent, romanian_stem;

CREATE TEXT SEARCH CONFIGURATION ru ( COPY = russian );
ALTER TEXT SEARCH CONFIGURATION ru ALTER MAPPING
FOR hword, hword_part, word WITH unaccent, russian_stem;

CREATE TEXT SEARCH CONFIGURATION es ( COPY = spanish );
ALTER TEXT SEARCH CONFIGURATION es ALTER MAPPING
FOR hword, hword_part, word WITH unaccent, spanish_stem;

CREATE TEXT SEARCH CONFIGURATION sv ( COPY = swedish );
ALTER TEXT SEARCH CONFIGURATION sv ALTER MAPPING
FOR hword, hword_part, word WITH unaccent, swedish_stem;

CREATE TEXT SEARCH CONFIGURATION tr ( COPY = turkish );
ALTER TEXT SEARCH CONFIGURATION tr ALTER MAPPING
FOR hword, hword_part, word WITH unaccent, turkish_stem;

CREATE TEXT SEARCH CONFIGURATION usimple ( COPY = simple );
ALTER TEXT SEARCH CONFIGURATION usimple ALTER MAPPING
FOR hword, hword_part, word WITH unaccent, simple;

and dbtoyaml output:

schema public:
  description: standard public schema
  owner: postgres
  privileges:
  - postgres:
    - all
  - PUBLIC:
    - all
  text search configuration da:
    owner: postgres
    parser: pg_catalog.default
  text search configuration de:
    owner: postgres
    parser: pg_catalog.default
  text search configuration en:
    owner: postgres
    parser: pg_catalog.default
  text search configuration es:
    owner: postgres
    parser: pg_catalog.default
  text search configuration fi:
    owner: postgres
    parser: pg_catalog.default
  text search configuration fr:
    owner: postgres
    parser: pg_catalog.default
  text search configuration hu:
    owner: postgres
    parser: pg_catalog.default
  text search configuration it:
    owner: postgres
    parser: pg_catalog.default
  text search configuration nl:
    owner: postgres
    parser: pg_catalog.default
  text search configuration no:
    owner: postgres
    parser: pg_catalog.default
  text search configuration pt:
    owner: postgres
    parser: pg_catalog.default
  text search configuration ro:
    owner: postgres
    parser: pg_catalog.default
  text search configuration ru:
    owner: postgres
    parser: pg_catalog.default
  text search configuration sv:
    owner: postgres
    parser: pg_catalog.default
  text search configuration tr:
    owner: postgres
    parser: pg_catalog.default
  text search configuration usimple:
    owner: postgres
    parser: pg_catalog.default
  text search dictionary unaccent:
    options: rules = 'unaccent'
    owner: postgres
    template: unaccent
  text search template unaccent:
    init: unaccent_init
    lexize: unaccent_lexize

and yamltodb output:

BEGIN;
CREATE EXTENSION unaccent
    VERSION '1.0';

COMMENT ON EXTENSION unaccent IS 'text search dictionary that removes accents';

CREATE TEXT SEARCH DICTIONARY unaccent (
    TEMPLATE = unaccent,
    rules = 'unaccent');

ALTER TEXT SEARCH DICTIONARY unaccent OWNER TO postgres;

CREATE TEXT SEARCH TEMPLATE unaccent (
    INIT = unaccent_init,
    LEXIZE = unaccent_lexize);

CREATE TEXT SEARCH CONFIGURATION pt (
    PARSER = pg_catalog.default);

ALTER TEXT SEARCH CONFIGURATION pt OWNER TO postgres;

CREATE TEXT SEARCH CONFIGURATION no (
    PARSER = pg_catalog.default);

ALTER TEXT SEARCH CONFIGURATION no OWNER TO postgres;

CREATE TEXT SEARCH CONFIGURATION hu (
    PARSER = pg_catalog.default);

ALTER TEXT SEARCH CONFIGURATION hu OWNER TO postgres;

CREATE TEXT SEARCH CONFIGURATION es (
    PARSER = pg_catalog.default);

ALTER TEXT SEARCH CONFIGURATION es OWNER TO postgres;

CREATE TEXT SEARCH CONFIGURATION de (
    PARSER = pg_catalog.default);

ALTER TEXT SEARCH CONFIGURATION de OWNER TO postgres;

CREATE TEXT SEARCH CONFIGURATION ru (
    PARSER = pg_catalog.default);

ALTER TEXT SEARCH CONFIGURATION ru OWNER TO postgres;

CREATE TEXT SEARCH CONFIGURATION tr (
    PARSER = pg_catalog.default);

ALTER TEXT SEARCH CONFIGURATION tr OWNER TO postgres;

CREATE TEXT SEARCH CONFIGURATION fr (
    PARSER = pg_catalog.default);

ALTER TEXT SEARCH CONFIGURATION fr OWNER TO postgres;

CREATE TEXT SEARCH CONFIGURATION sv (
    PARSER = pg_catalog.default);

ALTER TEXT SEARCH CONFIGURATION sv OWNER TO postgres;

CREATE TEXT SEARCH CONFIGURATION en (
    PARSER = pg_catalog.default);

ALTER TEXT SEARCH CONFIGURATION en OWNER TO postgres;

CREATE TEXT SEARCH CONFIGURATION fi (
    PARSER = pg_catalog.default);

ALTER TEXT SEARCH CONFIGURATION fi OWNER TO postgres;

CREATE TEXT SEARCH CONFIGURATION nl (
    PARSER = pg_catalog.default);

ALTER TEXT SEARCH CONFIGURATION nl OWNER TO postgres;

CREATE TEXT SEARCH CONFIGURATION ro (
    PARSER = pg_catalog.default);

ALTER TEXT SEARCH CONFIGURATION ro OWNER TO postgres;

CREATE TEXT SEARCH CONFIGURATION da (
    PARSER = pg_catalog.default);

ALTER TEXT SEARCH CONFIGURATION da OWNER TO postgres;

CREATE TEXT SEARCH CONFIGURATION usimple (
    PARSER = pg_catalog.default);

ALTER TEXT SEARCH CONFIGURATION usimple OWNER TO postgres;

CREATE TEXT SEARCH CONFIGURATION it (
    PARSER = pg_catalog.default);

ALTER TEXT SEARCH CONFIGURATION it OWNER TO postgres;

COMMIT;

but yamltodb not equal to my sql file.

Can we fix it?

jmafc commented 8 years ago

Yes, we ought to fix that, but it may take a while. It looks like an oversight on our part, probably because a MAPPING apparently can only be added with ALTER (and we tended to focus on the CREATE statements, and the catalogs, but we obviously missed pg_ts_config_map).

jmafc commented 8 years ago

I started to work on this, but it's unclear what would be the minimal best way to handle these mappings.

To start off, I issued the following statements against my test database:

CREATE TEXT SEARCH CONFIGURATION tsc1 (COPY = simple);
ALTER TEXT SEARCH CONFIGURATION tsc1
    ALTER MAPPING FOR hword, word WITH english_stem, simple;

The only apparent way to display the mappings in psql (other than querying the pg_ts_config_map catalog which only shows OIDs/integers) is to use \dF+ tsc1, which shows:

Text search configuration "public.tsc1"
Parser: "pg_catalog.default"
      Token      |    Dictionaries     
-----------------+---------------------
 asciihword      | simple
 asciiword       | simple
 email           | simple
 file            | simple
 float           | simple
 host            | simple
 hword           | english_stem,simple
 hword_asciipart | simple
 hword_numpart   | simple
 hword_part      | simple
 int             | simple
 numhword        | simple
 numword         | simple
 sfloat          | simple
 uint            | simple
 url             | simple
 url_path        | simple
 version         | simple
 word            | english_stem,simple

dbtoyaml will have to query pg_config_ts_map and probably output something similar, i.e., a list of tokens and associated dictionaries for each text search configuration. And of course, yamltodb will have to be changed to read that in.

The pg_dump -s output is as follows (relevant section only):

CREATE TEXT SEARCH CONFIGURATION tsc1 (
    PARSER = pg_catalog."default" );

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR asciiword WITH simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR word WITH english_stem, simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR numword WITH simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR email WITH simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR url WITH simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR host WITH simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR sfloat WITH simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR version WITH simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR hword_numpart WITH simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR hword_part WITH simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR hword_asciipart WITH simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR numhword WITH simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR asciihword WITH simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR hword WITH english_stem, simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR url_path WITH simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR file WITH simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR "float" WITH simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR "int" WITH simple;

ALTER TEXT SEARCH CONFIGURATION tsc1
    ADD MAPPING FOR uint WITH simple;

The bottom line is that it is apparently too complicated to reverse engineer what were the original CREATE and ALTER statements, since obviously the "COPY" adds a bunch of rows to pg_ts_config_map and the ALTER MAPPING changes some of those. yamltodb will probably have to emulate this pg_dump behavior.

jmafc commented 5 years ago

@pyoner Please read https://pyrseas.wordpress.com/2018/09/12/the-future-of-pyrseas-revisited/ . As mentioned previously, although I started to work on this, I was stuck when I tried to come up with a suitable solution.