Closed jhzhou002 closed 1 year ago
Good question. Actually we use clickhouse only for sample-data before. As for csv-format data, I'm not sure. @bifenglin can you help answer this question?
-- auto-generated definition
create table gh_events
(
id UInt64,
type Enum8('CommitCommentEvent' = 1, 'CreateEvent' = 2, 'DeleteEvent' = 3, 'ForkEvent' = 4, 'GollumEvent' = 5, 'IssueCommentEvent' = 6, 'IssuesEvent' = 7, 'MemberEvent' = 8, 'PublicEvent' = 9, 'PullRequestEvent' = 10, 'PullRequestReviewCommentEvent' = 11, 'PushEvent' = 12, 'ReleaseEvent' = 13, 'WatchEvent' = 14, 'PullRequestReviewEvent' = 15),
action Enum8('added' = 1, 'closed' = 2, 'created' = 3, 'labeled' = 4, 'opened' = 5, 'published' = 6, 'reopened' = 7, 'started' = 8),
actor_id UInt64,
actor_login LowCardinality(String),
repo_id UInt64,
repo_name LowCardinality(String),
org_id UInt64,
org_login LowCardinality(String),
created_at DateTime,
issue_id UInt64,
issue_number UInt32,
issue_title String,
body String,
`issue_labels.name` Array(String),
`issue_labels.color` Array(String),
`issue_labels.default` Array(UInt8),
`issue_labels.description` Array(String),
issue_author_id UInt64,
issue_author_login LowCardinality(String),
issue_author_type Enum8('Bot' = 1, 'Mannequin' = 2, 'Organization' = 3, 'User' = 4),
issue_author_association Enum8('COLLABORATOR' = 1, 'CONTRIBUTOR' = 2, 'MEMBER' = 3, 'NONE' = 4, 'OWNER' = 5, 'MANNEQUIN' = 6),
issue_assignee_id UInt64,
issue_assignee_login LowCardinality(String),
`issue_assignees.login` Array(LowCardinality(String)),
`issue_assignees.id` Array(UInt64),
issue_created_at Nullable(DateTime),
issue_updated_at Nullable(DateTime),
issue_comments UInt16,
issue_closed_at Nullable(DateTime),
issue_comment_id UInt64,
issue_comment_created_at Nullable(DateTime),
issue_comment_updated_at Nullable(DateTime),
issue_comment_author_association Enum8('COLLABORATOR' = 1, 'CONTRIBUTOR' = 2, 'MEMBER' = 3, 'NONE' = 4, 'OWNER' = 5, 'MANNEQUIN' = 6),
issue_comment_author_id UInt64,
issue_comment_author_login LowCardinality(String),
issue_comment_author_type Enum8('Bot' = 1, 'Mannequin' = 2, 'Organization' = 3, 'User' = 4),
pull_commits UInt16,
pull_additions UInt16,
pull_deletions UInt16,
pull_changed_files UInt32,
pull_merged UInt8,
pull_merge_commit_sha String,
pull_merged_at Nullable(DateTime),
pull_merged_by_id UInt64,
pull_merged_by_login LowCardinality(String),
pull_merged_by_type Enum8('Bot' = 1, 'Mannequin' = 2, 'Organization' = 3, 'User' = 4),
pull_requested_reviewer_id UInt64,
pull_requested_reviewer_login LowCardinality(String),
pull_requested_reviewer_type Enum8('Bot' = 1, 'Mannequin' = 2, 'Organization' = 3, 'User' = 4),
pull_review_comments UInt16,
repo_description String,
repo_size UInt32,
repo_stargazers_count UInt32,
repo_forks_count UInt32,
repo_language String,
repo_has_issues UInt8,
repo_has_projects UInt8,
repo_has_downloads UInt8,
repo_has_wiki UInt8,
repo_has_pages UInt8,
repo_license String,
repo_default_branch String,
repo_created_at Nullable(DateTime),
repo_updated_at Nullable(DateTime),
repo_pushed_at Nullable(DateTime),
pull_review_id UInt64,
pull_review_comment_id UInt64,
pull_review_comment_path String,
pull_review_comment_position String,
pull_review_comment_author_id UInt64,
pull_review_comment_author_login LowCardinality(String),
pull_review_comment_author_type Enum8('Bot' = 1, 'Mannequin' = 2, 'Organization' = 3, 'User' = 4),
pull_review_comment_author_association Enum8('COLLABORATOR' = 1, 'CONTRIBUTOR' = 2, 'MEMBER' = 3, 'NONE' = 4, 'OWNER' = 5, 'MANNEQUIN' = 6),
pull_review_comment_created_at Nullable(DateTime),
pull_review_comment_updated_at Nullable(DateTime),
push_id UInt64,
push_size UInt32,
push_distinct_size UInt32,
push_ref String,
push_head String,
`push_commits.name` Array(LowCardinality(String)),
`push_commits.email` Array(String),
`push_commits.message` Array(String),
fork_forkee_id UInt64,
fork_forkee_full_name LowCardinality(String),
fork_forkee_owner_id UInt64,
fork_forkee_owner_login LowCardinality(String),
fork_forkee_owner_type Enum8('Bot' = 1, 'Mannequin' = 2, 'Organization' = 3, 'User' = 4),
delete_ref String,
delete_ref_type String,
delete_pusher_type Enum8('deploy_key' = 1, 'user' = 2),
create_ref String,
create_ref_type Enum8('branch' = 1, 'tag' = 2),
create_master_branch String,
create_description String,
create_pusher_type Enum8('deploy_key' = 1, 'user' = 2),
`gollum_pages.page_name` Array(String),
`gollum_pages.title` Array(String),
`gollum_pages.action` Array(String),
member_id UInt64,
member_login LowCardinality(String),
member_type Enum8('Bot' = 1, 'Mannequin' = 2, 'Organization' = 3, 'User' = 4),
release_id UInt64,
release_tag_name String,
release_target_commitish String,
release_name String,
release_draft UInt8,
release_author_id UInt64,
release_author_login LowCardinality(String),
release_author_type Enum8('Bot' = 1, 'Mannequin' = 2, 'Organization' = 3, 'User' = 4),
release_prerelease UInt8,
release_created_at Nullable(DateTime),
release_published_at Nullable(DateTime),
release_body String,
`release_assets.name` Array(String),
`release_assets.uploader_login` Array(LowCardinality(String)),
`release_assets.uploader_id` Array(UInt64),
`release_assets.content_type` Array(LowCardinality(String)),
`release_assets.state` Array(String),
`release_assets.size` Array(UInt64),
`release_assets.download_count` Array(UInt16),
commit_comment_id UInt64,
commit_comment_author_id UInt64,
commit_comment_author_login LowCardinality(String),
commit_comment_author_type Enum8('Bot' = 1, 'Mannequin' = 2, 'Organization' = 3, 'User' = 4),
commit_comment_author_association Enum8('COLLABORATOR' = 1, 'CONTRIBUTOR' = 2, 'MEMBER' = 3, 'NONE' = 4, 'OWNER' = 5, 'MANNEQUIN' = 6),
commit_comment_path String,
commit_comment_position String,
commit_comment_line String,
commit_comment_created_at Nullable(DateTime),
commit_comment_updated_at Nullable(DateTime)
)
engine = ReplacingMergeTree(id)
PARTITION BY toYYYYMM(created_at)
ORDER BY (type, repo_name, created_at)
SETTINGS index_granularity = 8192;
@318352733 hi there are DDL of opensource table. You can try create table first and import it .
And this JSON data. @xgdyp
hi, JSON data can not import to clickhouse now.
Usage
Here there is no table structure after the JSON data is decompressed, and you want to import it to ClickHouse, how to import it.
Extract SQL
https://xlab2017.yuque.com/staff-kbz9wp/olpzth/tq36xvyzg9b880hy?singleDoc#UKotT Here the data import in the URL click house is a direct import of the data table or how.
Does this dataset need to be updated regularly?
No response