dimitri / pgloader

Migrate to PostgreSQL in a single command!
http://pgloader.io
Other
5.26k stars 536 forks source link

csv load error in postgres v12 using pgload (v 3.6.1) on Ubuntu 20.04 on Windows 10 WSL2 #1188

Open ajit555 opened 4 years ago

ajit555 commented 4 years ago
    # Below docker command installs Postgres version 12 container.
    docker run \
    --name pgloader \
    -d -p 5432:5432  \
    -e POSTGRES_PASSWORD=pg1234  \
    --mount type=bind,source="$(pwd)",target=/src  \
    postgres

   docker exec -it pgloader bash
   createdb -U postgres --encoding=utf-8 pgloader
   psql -U postgres
   \l
   \c pgloader
LOAD CSV
     FROM INLINE with encoding 'ascii'
     INTO postgresql:///pgloader
     TARGET TABLE jordane

     WITH truncate,
          fields terminated by '|',
          fields not enclosed,
          fields escaped by backslash-quote

      SET work_mem to '128MB',
          standard_conforming_strings to 'on'

   BEFORE LOAD DO
    $$ drop table if exists jordane; $$,
    $$ CREATE TABLE jordane
       (
         "NOM" character(20),
         "PRENOM" character(20)
       )
    $$;

BORDET|Jordane
BORDET|Audrey
LASTNAME|"opening quote
BONNIER|testprenombe~aucouptroplong
JOURDAIN|héhé¶
00:38 $ pgloader csv-test.load
2020-07-12T19:11:42.006000Z LOG pgloader version "3.6.1"
2020-07-12T19:11:42.008000Z LOG Parsing commands from file #P"/mnt/d/ajit-code/gh/mindmap/postgres/pgloader/pgloader-csv/csv-test.load"
2020-07-12T19:11:42.114000Z ERROR Database error 42703: column "adsrc" does not exist
QUERY: -- params: table-type-name
--         including
--         filter-list-to-where-clause for including
--         excluding
--         filter-list-to-where-clause for excluding
with seqattr as
 (
   select adrelid, 
          adnum,
          adsrc,
          case when adsrc ~ 'nextval'
               then substring(pg_get_expr(d.adbin, d.adrelid)
                              from '''([^'']+)'''
                    )
               else null
           end as seqname
     from pg_attrdef d
 )
    select nspname, relname, c.oid, attname,
           t.oid::regtype as type,
           case when atttypmod > 0
                then substring(format_type(t.oid, atttypmod) from '\d+(?:,\d+)?')
                else null
            end as typmod,
           attnotnull,
           case when atthasdef then def.adsrc end as default,
           case when s.seqname is not null then 'auto_increment' end as extra
      from pg_class c
           join pg_namespace n on n.oid = c.relnamespace
           left join pg_attribute a on c.oid = a.attrelid
           join pg_type t on t.oid = a.atttypid and attnum > 0
           left join pg_attrdef def on a.attrelid = def.adrelid
                                   and a.attnum = def.adnum
                                   and a.atthasdef
           left join seqattr s on def.adrelid = s.adrelid
                              and def.adnum = s.adnum

     where nspname !~ '^pg_' and n.nspname <> 'information_schema'
           and relkind in ('r', 'f', 'p')
           and ((n.nspname = 'public' and c.relname = 'jordane'))

  order by nspname, relname, attnum;
2020-07-12T19:11:42.114000Z FATAL Failed to prepare target PostgreSQL table.
2020-07-12T19:11:42.114000Z FATAL Database error 42703: column "adsrc" does not exist
QUERY: -- params: table-type-name
--         including
--         filter-list-to-where-clause for including
--         excluding
--         filter-list-to-where-clause for excluding
with seqattr as
 (
   select adrelid, 
          adnum,
          adsrc,
          case when adsrc ~ 'nextval'
               then substring(pg_get_expr(d.adbin, d.adrelid)
                              from '''([^'']+)'''
                    )
               else null
           end as seqname
     from pg_attrdef d
 )
    select nspname, relname, c.oid, attname,
           t.oid::regtype as type,
           case when atttypmod > 0
                then substring(format_type(t.oid, atttypmod) from '\d+(?:,\d+)?')
                else null
            end as typmod,
           attnotnull,
           case when atthasdef then def.adsrc end as default,
           case when s.seqname is not null then 'auto_increment' end as extra
      from pg_class c
           join pg_namespace n on n.oid = c.relnamespace
           left join pg_attribute a on c.oid = a.attrelid
           join pg_type t on t.oid = a.atttypid and attnum > 0
           left join pg_attrdef def on a.attrelid = def.adrelid
                                   and a.attnum = def.adnum
                                   and a.atthasdef
           left join seqattr s on def.adrelid = s.adrelid
                              and def.adnum = s.adnum

     where nspname !~ '^pg_' and n.nspname <> 'information_schema'
           and relkind in ('r', 'f', 'p')
           and ((n.nspname = 'public' and c.relname = 'jordane'))

  order by nspname, relname, attnum;
2020-07-12T19:11:42.114000Z LOG report summary reset
       table name     errors       rows      bytes      total time
-----------------  ---------  ---------  ---------  --------------
            fetch          0          0                     0.004s
      before load          0          2                     0.021s
-----------------  ---------  ---------  ---------  --------------
-----------------  ---------  ---------  ---------  --------------
SELECT * FROM public.jordane
No records
ajit555 commented 4 years ago

Compiled the pgloader from source code.

$ pgloader --version
pgloader version "3.6.2"
compiled with SBCL 2.0.1.debian

After load, the data is loaded as below:

pgloader=# select * from jordane;
         NOM          |        PRENOM        
----------------------+----------------------
 BORDET               | Jordane             
 BORDET               | Audrey              
 LASTNAME             | "opening quote      
(3 rows)
shengyao commented 2 years ago

This issue should already be fixed by this commit: https://github.com/dimitri/pgloader/commit/8a13c02561fe030301045f9f50d94f523dd61b2c