larkery / zsh-histdb

A slightly better history for zsh
MIT License
1.29k stars 75 forks source link

Pure zsh history import function #137

Open baodrate opened 1 year ago

baodrate commented 1 year ago

In case this helps anyone else, here's a function to parse zsh's histfile and add it to the database. Leverages zsh's history file parsing so it handles newlines and arbitrary characters very well. The other tools suggested in the README require other languages and don't handle multiline commands soundly.

Inserts are batched and it's pretty fast. A bit of time is wasted calling fc -l to read the timestamps, but it takes ~1s to parse ~20k lines for me so it should be fast enough for most use-cases.

_histdb_import() {
    emulate -L zsh
    # more accurate parsing of history file
    setopt HIST_LEX_WORDS

    # set local so later fc -p/-P commands don't modify caller's history settings
    local HISTFILE
    local HISTSIZE
    local SAVEHIST

    local _HISTFILE=${1:-$HISTFILE}
    # push current history list onto a stack and initialize a new one
    fc -p -a
    # read all the lines from the given histfile
    HISTFILE=$_HISTFILE
    HISTSIZE=999999999
    SAVEHIST=0
    fc -R ${1:-$HISTFILE}
    print "loaded ${#history} lines from history file: $HISTFILE" >&2
    # unset HISTFILE after reading so we don't (somehow) accidentally write to it
    HISTFILE=

    local -a histories
    local -i i=0
    # history is read into `history` associative array, but we don't have
    # direct access to timestamps so parse them from `fc -l`
    fc -l -t %s -d -D 0 | while { read -r histcmd timestamp duration _cmd } {
        # the command output by `fc -l` is escaped so use $history[$histcmd]
        #   instead to get the raw, unescaped characters
        # omit empty commands (not sure how this happens. `print -S` maybe?)
        [[ -n "${history[$histcmd]}" ]] || continue
        ((++i))
        # duration is formatted as m:ss so parse it back into integer seconds
        # escape the history command
        histories+=("(${timestamp}, $((${duration%:*}*60 + ${duration#*:})), '${${history[$histcmd]//\'/''}//$'\x00'}')")
    }

    _histdb_init
    local result=
    result=$(_histdb_query <<EOF
begin transaction;
insert into places   (host, dir) values (${HISTDB_HOST}, '');
insert into commands (argv) values ${(@pj:,\n:)${(@)${(@)${(@uv)history//'/''}//$'\x00'}/#/('}/%/')};
with histories (timestamp, duration, cmd) as (values ${(pj:,\n:)histories})
insert into history (session, command_id, place_id, start_time, duration)
select
    ${SESSION:-0},
    c.id,
    (select id from places where host = ${HISTDB_HOST} and dir = ''),
    h.timestamp,
    h.duration
from histories h
    join commands c on c.argv = h.cmd
on conflict (session, command_id, place_id, start_time)
    do update set duration = excluded.duration
    where history.duration != excluded.duration
returning id
;
commit;
EOF
) || return
    print "Imported ${#${(@f)result}} new history entries"
}

Note that:

Hello71 commented 1 year ago

my solution uses sed and .import:

#!/bin/sh
LC_CTYPE=C sed -e ':a;/\\$/{N;b a};s/"/""/g;t b;:b;s/^: \([0-9]*\):\([0-9]*\);\(.*\)$/"\3",\1,\2/;t;s/.*/"&","",""/' ~/.zsh_history | sqlite3 3<&0 << EOF
CREATE TABLE zsh_history(argv text, started int, duration int);
.import --csv /dev/fd/3 zsh_history
ATTACH DATABASE '$HOME/.histdb/zsh-history.db' AS histdb;
BEGIN;
INSERT INTO histdb.commands (argv) SELECT argv FROM zsh_history ORDER BY rowid;
INSERT INTO histdb.history (command_id, start_time, duration) SELECT rowid, started, duration FROM zsh_history ORDER BY rowid;
COMMIT;
EOF

it takes a couple seconds to import 150k lines. I don't think this handles duration etc correctly though, and I don't think histdb is right for me so I probably won't improve this.