heavyai / heavydb

HeavyDB (formerly OmniSciDB)
https://heavy.ai
Apache License 2.0
2.94k stars 445 forks source link

Infinite loop (query never finish) #577

Open alexey-milovidov opened 4 years ago

alexey-milovidov commented 4 years ago

I execute the following query on CPU version of OmniSci:

SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;

in multiple runs. The first run completed successfully (in 30 seconds). But the second run cannot finish in more than a hour.

Table definition:

CREATE TABLE hits
(
    "WatchID" BIGINT, 
    "JavaEnable" TINYINT, 
    "Title" TEXT ENCODING DICT, 
    "GoodEvent" SMALLINT, 
    "EventTime" TIMESTAMP ENCODING FIXED(32), 
    "EventDate" ENCODING DAYS(16) Date, 
    "CounterID" INTEGER, 
    "ClientIP" INTEGER, 
    "RegionID" INTEGER, 
    "UserID" BIGINT, 
    "CounterClass" TINYINT, 
    "OS" TINYINT, 
    "UserAgent" TINYINT, 
    "URL" TEXT ENCODING DICT, 
    "Referer" TEXT ENCODING DICT, 
    "Refresh" TINYINT, 
    "RefererCategoryID" SMALLINT, 
    "RefererRegionID" INTEGER, 
    "URLCategoryID" SMALLINT, 
    "URLRegionID" INTEGER, 
    "ResolutionWidth" SMALLINT, 
    "ResolutionHeight" SMALLINT, 
    "ResolutionDepth" TINYINT, 
    "FlashMajor" TINYINT, 
    "FlashMinor" TINYINT, 
    "FlashMinor2" TEXT ENCODING DICT, 
    "NetMajor" TINYINT, 
    "NetMinor" TINYINT, 
    "UserAgentMajor" SMALLINT, 
    "UserAgentMinor" TEXT ENCODING DICT(16), 
    "CookieEnable" TINYINT, 
    "JavascriptEnable" TINYINT, 
    "IsMobile" TINYINT, 
    "MobilePhone" TINYINT, 
    "MobilePhoneModel" TEXT ENCODING DICT, 
    "Params" TEXT ENCODING DICT, 
    "IPNetworkID" INTEGER, 
    "TraficSourceID" TINYINT, 
    "SearchEngineID" SMALLINT, 
    "SearchPhrase" TEXT ENCODING DICT, 
    "AdvEngineID" TINYINT, 
    "IsArtifical" TINYINT, 
    "WindowClientWidth" SMALLINT, 
    "WindowClientHeight" SMALLINT, 
    "ClientTimeZone" SMALLINT, 
    "ClientEventTime" TIMESTAMP ENCODING FIXED(32), 
    "SilverlightVersion1" TINYINT, 
    "SilverlightVersion2" TINYINT, 
    "SilverlightVersion3" INTEGER, 
    "SilverlightVersion4" SMALLINT, 
    "PageCharset" TEXT ENCODING DICT, 
    "CodeVersion" INTEGER, 
    "IsLink" TINYINT, 
    "IsDownload" TINYINT, 
    "IsNotBounce" TINYINT, 
    "FUniqID" BIGINT, 
    "OriginalURL" TEXT ENCODING DICT, 
    "HID" INTEGER, 
    "IsOldCounter" TINYINT, 
    "IsEvent" TINYINT, 
    "IsParameter" TINYINT, 
    "DontCountHits" TINYINT, 
    "WithHash" TINYINT, 
    "HitColor" TEXT ENCODING DICT(8), 
    "LocalEventTime" TIMESTAMP ENCODING FIXED(32), 
    "Age" TINYINT, 
    "Sex" TINYINT, 
    "Income" TINYINT, 
    "Interests" SMALLINT, 
    "Robotness" TINYINT, 
    "RemoteIP" INTEGER, 
    "WindowName" INTEGER, 
    "OpenerName" INTEGER, 
    "HistoryLength" SMALLINT, 
    "BrowserLanguage" TEXT ENCODING DICT(16), 
    "BrowserCountry" TEXT ENCODING DICT(16), 
    "SocialNetwork" TEXT ENCODING DICT, 
    "SocialAction" TEXT ENCODING DICT, 
    "HTTPError" SMALLINT, 
    "SendTiming" INTEGER, 
    "DNSTiming" INTEGER, 
    "ConnectTiming" INTEGER, 
    "ResponseStartTiming" INTEGER, 
    "ResponseEndTiming" INTEGER, 
    "FetchTiming" INTEGER, 
    "SocialSourceNetworkID" TINYINT, 
    "SocialSourcePage" TEXT ENCODING DICT, 
    "ParamPrice" BIGINT, 
    "ParamOrderID" TEXT ENCODING DICT, 
    "ParamCurrency" TEXT ENCODING DICT, 
    "ParamCurrencyID" SMALLINT, 
    "OpenstatServiceName" TEXT ENCODING DICT, 
    "OpenstatCampaignID" TEXT ENCODING DICT, 
    "OpenstatAdID" TEXT ENCODING DICT, 
    "OpenstatSourceID" TEXT ENCODING DICT, 
    "UTMSource" TEXT ENCODING DICT, 
    "UTMMedium" TEXT ENCODING DICT, 
    "UTMCampaign" TEXT ENCODING DICT, 
    "UTMContent" TEXT ENCODING DICT, 
    "UTMTerm" TEXT ENCODING DICT, 
    "FromTag" TEXT ENCODING DICT, 
    "HasGCLID" TINYINT, 
    "RefererHash" BIGINT, 
    "URLHash" BIGINT, 
    "CLID" INTEGER
);

How to fill the table:

  1. Download the "extended version of the hits table containing 100 million rows" from here: https://clickhouse.tech/docs/en/getting-started/example-datasets/metrica/

  2. Insert into ClickHouse.

  3. Transform to CSV with the following query:

    SELECT 
    toInt64(WatchID), 
    toInt8(JavaEnable), 
    toValidUTF8(toString(Title)), 
    toInt16(GoodEvent), 
    EventTime, 
    EventDate, 
    toInt32(CounterID), 
    toInt32(ClientIP), 
    toInt32(RegionID), 
    toInt64(UserID), 
    toInt8(CounterClass), 
    toInt8(OS), 
    toInt8(UserAgent), 
    toValidUTF8(toString(URL)), 
    toValidUTF8(toString(Referer)), 
    toInt8(Refresh), 
    toInt16(RefererCategoryID), 
    toInt32(RefererRegionID), 
    toInt16(URLCategoryID), 
    toInt32(URLRegionID), 
    toInt16(ResolutionWidth), 
    toInt16(ResolutionHeight), 
    toInt8(ResolutionDepth), 
    toInt8(FlashMajor), 
    toInt8(FlashMinor), 
    toValidUTF8(toString(FlashMinor2)), 
    toInt8(NetMajor), 
    toInt8(NetMinor), 
    toInt16(UserAgentMajor), 
    toValidUTF8(toString(UserAgentMinor)), 
    toInt8(CookieEnable), 
    toInt8(JavascriptEnable), 
    toInt8(IsMobile), 
    toInt8(MobilePhone), 
    toValidUTF8(toString(MobilePhoneModel)), 
    toValidUTF8(toString(Params)), 
    toInt32(IPNetworkID), 
    toInt8(TraficSourceID), 
    toInt16(SearchEngineID), 
    toValidUTF8(toString(SearchPhrase)), 
    toInt8(AdvEngineID), 
    toInt8(IsArtifical), 
    toInt16(WindowClientWidth), 
    toInt16(WindowClientHeight), 
    toInt16(ClientTimeZone), 
    ClientEventTime, 
    toInt8(SilverlightVersion1), 
    toInt8(SilverlightVersion2), 
    toInt32(SilverlightVersion3), 
    toInt16(SilverlightVersion4), 
    toValidUTF8(toString(PageCharset)), 
    toInt32(CodeVersion), 
    toInt8(IsLink), 
    toInt8(IsDownload), 
    toInt8(IsNotBounce), 
    toInt64(FUniqID), 
    toValidUTF8(toString(OriginalURL)), 
    toInt32(HID), 
    toInt8(IsOldCounter), 
    toInt8(IsEvent), 
    toInt8(IsParameter), 
    toInt8(DontCountHits), 
    toInt8(WithHash), 
    toValidUTF8(toString(HitColor)), 
    LocalEventTime, 
    toInt8(Age), 
    toInt8(Sex), 
    toInt8(Income), 
    toInt16(Interests), 
    toInt8(Robotness), 
    toInt32(RemoteIP), 
    toInt32(WindowName), 
    toInt32(OpenerName), 
    toInt16(HistoryLength), 
    toValidUTF8(toString(BrowserLanguage)), 
    toValidUTF8(toString(BrowserCountry)), 
    toValidUTF8(toString(SocialNetwork)), 
    toValidUTF8(toString(SocialAction)), 
    toInt16(HTTPError), 
    toInt32(SendTiming), 
    toInt32(DNSTiming), 
    toInt32(ConnectTiming), 
    toInt32(ResponseStartTiming), 
    toInt32(ResponseEndTiming), 
    toInt32(FetchTiming), 
    toInt8(SocialSourceNetworkID), 
    toValidUTF8(toString(SocialSourcePage)), 
    toInt64(ParamPrice), 
    toValidUTF8(toString(ParamOrderID)), 
    toValidUTF8(toString(ParamCurrency)), 
    toInt16(ParamCurrencyID), 
    toValidUTF8(toString(OpenstatServiceName)), 
    toValidUTF8(toString(OpenstatCampaignID)), 
    toValidUTF8(toString(OpenstatAdID)), 
    toValidUTF8(toString(OpenstatSourceID)), 
    toValidUTF8(toString(UTMSource)), 
    toValidUTF8(toString(UTMMedium)), 
    toValidUTF8(toString(UTMCampaign)), 
    toValidUTF8(toString(UTMContent)), 
    toValidUTF8(toString(UTMTerm)), 
    toValidUTF8(toString(FromTag)), 
    toInt8(HasGCLID), 
    toInt64(RefererHash), 
    toInt64(URLHash), 
    toInt32(CLID)
    FROM hits_100m_obfuscated
    INTO OUTFILE 'hits_100m_obfuscated.csv'
    FORMAT CSV;
  4. Insert into OmniSci with the following query:

    COPY hits FROM '/home/milovidov/example_datasets/hits_100m_obfuscated.csv' WITH (HEADER = 'false');
alexey-milovidov commented 4 years ago

The omnisci_server process is using 100% of single CPU core, according to perf top, it is spending the time in LLVM generated code.

alexey-milovidov commented 4 years ago

Hardware: Xeon E5-2650v2 (32 logical cores), 128 GiB RAM, 40 TB HDD in mdRAID-5.

cdessanti commented 4 years ago

Hi @alexey-milovidov,

I can't get the reason I would load a tsv file into clickhouse and export into a csv file, when I can load directly into omnisci database, just changing the separator from the default to the tab.

This is a link to the docs for the copy command

https://docs.omnisci.com/loading-and-exporting-data/command-line/export-data

Beside if that I will check what's wrong with this query ASAP; it's nota query where our database shine, but it should take forever

alexey-milovidov commented 4 years ago

just changing the separator from the default to the tab

It would not work, we need to convert unsigned numbers to signed (OmniSci does not support BIGINT UNSIGNED) and turn string fields to valid UTF-8 (OmniSci does not support BLOB).

cdessanti commented 4 years ago

Hi @alexey-milovidov ,

just loaded the table with some fields using a different datatype (basically all id fields have been changed from bigint/integer to text encoded)

populated the table with the original TSV downloaded from Clickhouse's tutorial and loaded multiple times to have 106M records into the table with this copy command

copy hits_v1 from '/opt/root_ubuntu18/opt/opendata/hits_visit/hits_v1.tsv.xz' with (header='false', delimiter='\t', array_marker='[]', quoted='false');

the query wall time is with 2 GPUs is floating between 390 and 410 ms with an AMD Threadripper, 1920X CPU 12c/24t is floating between 2910 and 2960 ms (3 cores used) with an AMD Threadripper, 1920X CPU 12c/24t and a data balanced table (sharded) between 1790 and 1850 ms (7 cores used)

Probably the changes I did to the DDL improved the performance; You can check by yourself if the problems you are facing are gone trying this DDL to better evaluate omnisci database

CREATE TABLE hits_v1 (
  WatchID BIGINT,
  JavaEnable TINYINT,
  Title TEXT ENCODING DICT(32),
  GoodEvent SMALLINT,
  EventTime TIMESTAMP(0) ENCODING FIXED(32),
  EventDate DATE ENCODING DAYS(16),
  CounterID INTEGER,
  ClientIP TEXT ENCODING DICT(32),
  ClientIP6 TEXT ENCODING DICT(32),
  RegionID INTEGER,
  UserID TEXT ENCODING DICT(32),
  CounterClass TINYINT,
  OS TINYINT,
  UserAgent TINYINT,
  URL TEXT ENCODING DICT(32),
  Referer TEXT ENCODING DICT(32),
  URLDomain TEXT ENCODING DICT(32),
  RefererDomain TEXT ENCODING DICT(32),
  Refresh TINYINT,
  IsRobot TINYINT,
  RefererCategories SMALLINT[],
  URLCategories SMALLINT[],
  URLRegions INTEGER[],
  RefererRegions INTEGER[],
  ResolutionWidth SMALLINT,
  ResolutionHeight SMALLINT,
  ResolutionDepth TINYINT,
  FlashMajor TINYINT,
  FlashMinor TINYINT,
  FlashMinor2 TEXT ENCODING DICT(32),
  NetMajor TINYINT,
  NetMinor TINYINT,
  UserAgentMajor SMALLINT,
  UserAgentMinor TEXT ENCODING DICT(32),
  CookieEnable TINYINT,
  JavascriptEnable TINYINT,
  IsMobile TINYINT,
  MobilePhone TINYINT,
  MobilePhoneModel TEXT ENCODING DICT(32),
  Params TEXT ENCODING DICT(32),
  IPNetworkID INTEGER,
  TraficSourceID TINYINT,
  SearchEngineID SMALLINT,
  SearchPhrase TEXT ENCODING DICT(32),
  AdvEngineID TINYINT,
  IsArtifical TINYINT,
  WindowClientWidth SMALLINT,
  WindowClientHeight SMALLINT,
  ClientTimeZone SMALLINT,
  ClientEventTime TIMESTAMP(0) ENCODING FIXED(32),
  SilverlightVersion1 TINYINT,
  SilverlightVersion2 TINYINT,
  SilverlightVersion3 INTEGER,
  SilverlightVersion4 SMALLINT,
  PageCharset TEXT ENCODING DICT(32),
  CodeVersion INTEGER,
  IsLink TINYINT,
  IsDownload TINYINT,
  IsNotBounce TINYINT,
  FUniqID TEXT ENCODING DICT(32),
  HID TEXT ENCODING DICT(32),
  IsOldCounter TINYINT,
  IsEvent TINYINT,
  IsParameter TINYINT,
  DontCountHits TINYINT,
  WithHash TINYINT,
  HitColor TEXT ENCODING DICT(32),
  UTCEventTime TIMESTAMP(0) ENCODING FIXED(32),
  Age TINYINT,
  Sex TINYINT,
  Income TINYINT,
  Interests SMALLINT,
  Robotness TINYINT,
  GeneralInterests SMALLINT[],
  RemoteIP TEXT ENCODING DICT(32),
  RemoteIP6 TEXT ENCODING DICT(32),
  WindowName INTEGER,
  OpenerName INTEGER,
  HistoryLength SMALLINT,
  BrowserLanguage TEXT ENCODING DICT(32),
  BrowserCountry TEXT ENCODING DICT(32),
  SocialNetwork TEXT ENCODING DICT(32),
  SocialAction TEXT ENCODING DICT(32),
  HTTPError SMALLINT,
  SendTiming INTEGER,
  DNSTiming INTEGER,
  ConnectTiming INTEGER,
  ResponseStartTiming INTEGER,
  ResponseEndTiming INTEGER,
  FetchTiming INTEGER,
  RedirectTiming INTEGER,
  DOMInteractiveTiming INTEGER,
  DOMContentLoadedTiming INTEGER,
  DOMCompleteTiming INTEGER,
  LoadEventStartTiming INTEGER,
  LoadEventEndTiming INTEGER,
  NSToDOMContentLoadedTiming INTEGER,
  FirstPaintTiming INTEGER,
  RedirectCount TINYINT,
  SocialSourceNetworkID TINYINT,
  SocialSourcePage TEXT ENCODING DICT(32),
  ParamPrice BIGINT,
  ParamOrderID TEXT ENCODING DICT(32),
  ParamCurrency TEXT ENCODING DICT(32),
  ParamCurrencyID SMALLINT,
  GoalsReached INTEGER[],
  OpenstatServiceName TEXT ENCODING DICT(32),
  OpenstatCampaignID TEXT ENCODING DICT(32),
  OpenstatAdID TEXT ENCODING DICT(32),
  OpenstatSourceID TEXT ENCODING DICT(32),
  UTMSource TEXT ENCODING DICT(32),
  UTMMedium TEXT ENCODING DICT(32),
  UTMCampaign TEXT ENCODING DICT(32),
  UTMContent TEXT ENCODING DICT(32),
  UTMTerm TEXT ENCODING DICT(32),
  FromTag TEXT ENCODING DICT(32),
  HasGCLID TINYINT,
  RefererHash TEXT ENCODING DICT(32),
  URLHash TEXT ENCODING DICT(32),
  CLID INTEGER,
  YCLID BIGINT,
  ShareService TEXT ENCODING DICT(32),
  ShareURL TEXT ENCODING DICT(32),
  ShareTitle TEXT ENCODING DICT(32),
  Key1 TEXT[],
  Key2 TEXT[],
  Key3 TEXT[],
  Key4 TEXT[],
  Key5 TEXT[],
  ValueDouble DOUBLE[],
  IslandID TEXT ENCODING DICT(32),
  RequestNum INTEGER,
  RequestTry TINYINT);