dylanaraps / pure-sh-bible

📖 A collection of pure POSIX sh alternatives to external processes.
MIT License
6.45k stars 281 forks source link

string truncate #18

Open spiralofhope opened 4 years ago

spiralofhope commented 4 years ago

This leverages string iteration in #17 to truncate a given string to a specified length.

string_truncate() {
  string_length_maximum="$1"
  shift
  string="$*"
  string_length=${#string}
  #
  if [ "$string_length" -le "$string_length_maximum" ]; then
    _debug  "  (not truncating)"
    \echo  "$string"
  else
    __="$string"
    #
    while [ -n "$__" ]; do
      rest="${__#?}"
      first_character="${__%"$rest"}"
      result="$result$first_character"
      __="$rest"
      #
      if [ ${#result} -eq "$string_length_maximum" ]; then
        \echo  "$result"
        break
      fi
    done
  fi
}

string_truncate  3  'string'
# =>
# str
cmplstofB commented 3 years ago

Would you let me know your opinion about this method?

string_truncate() {
    expr "$2" : '\(.\{'"$1"'\}\).*'
}

string_truncate  3  'string'
# =>
# str
spiralofhope commented 3 years ago

I personally wouldn't do things that way, because it's using expr.

cmplstofB commented 3 years ago

I'm sorry for asking so many times. Are you reluctant to use expr because it's an external command?

posguy99 commented 3 years ago

"The goal of this book is to document commonly-known and lesser-known methods of doing various tasks using only built-in POSIX sh features."

notevenaperson commented 2 years ago

I came up with an alternative solution that is, simply stacking the match-one-character wildcard ? as many times as the number of characters we want to delete. It's dumb but works.

   s='string'
echo ${s%???}
# =>
# str
      a='abcdefg'
echo ${a#?????}
# => fg
trunc() {
    local desired_len="$1" ; shift ; local string="$*" 
    local len="${#string}" ques ques_len

    [ "$desired_len" -ge "$len" -o "$((- desired_len))" -ge "$len" -o "$desired_len" = 0 ] \
        && { echo "$string" ; return 1 ; } #Error: Function has no work to do if desired length is larger or = string length

    if [ "$desired_len" -gt 0 ]; then #test if desired_len is a negative number
        local left_len="$desired_len"
        local right_len="$((len - left_len))" 
    else #inverse our desired lengths
        local right_len="$((- desired_len))" 
        local left_len="$((len - right_len))"
    fi

    while true; do
        ques="$ques"'?'
        ques_len="${#ques}"
        if [ "$ques_len" = "$right_len" ]; then #delete end to get the truncated string
            truncated="${string%$ques}" #fun fact: this doesn't work in zsh
            break
        fi
        if [ "$ques_len" = "$left_len" ]; then #
            local rest="${string#$ques}" #fun fact: this doesn't work in zsh
            truncated="${string%%$rest}"
            break
        fi
    done
    echo "$truncated"
}

This solution iterates on adding more ???? until it matches the closest desired length, then trims the string at once. It may be faster than @spiralofhope's solution, which reads $string at least once on every iteration, slowly biting off characters just to be able to delete them.

Demo

trunc 19 'The brown fox jumps over the lazy dog'
# => The brown fox jumps
trunc -1 'The brown fox jumps over the lazy dog' # also added support for truncating counting from the end
# => The brown fox jumps over the lazy do
trunc -17 'The brown fox jumps over the lazy dog'
# => The brown fox jumps 
notevenaperson commented 2 years ago

I made a faster and powerful version a week later, but forgot to post it. Down to the syntax, it emulates Bash's substring expansion.

#Usage: substr offset:length "string"

rpt() { 
    #repeat string N times
    #$ rpt 50 "foobar"

    #algorithm snatched from: https://stackoverflow.com/questions/202605/repeat-string-javascript/4152613#4152613
    #See also: https://en.wikipedia.org/wiki/Ancient_Egyptian_multiplication
    #basically, it doubles $str at the same rate $count halves, spiraling down synchronously to the asked length.
    #i had the intuition that an algorithm must be faster than just doing 50 iterations to get 50 length.
    #multiplication could save some iterations, but i had to be careful to not overshoot. it wasn't this elegant.

    count="$1" ; shift ; s="$*"

    while [ $count -gt 1 ];do 
        [ $((count & 1)) -eq 1 ] && result="$result""$s"
        count=$((count >> 1)) 
        s="$s""$s"
    done

    #junk below isn't necessary as a standalone function
    repeated="$result""$s"
    unset count s result
}

substr() {
    #Parse
    if [ -z "${1##*:*}" ]; then #result is only empty if there isn't a ':'
        offset="${1%%:*}"
        desired_len="${1##*:}"
    else
        offset="$1"
    fi
    shift; str="$*" 

    len="${#str}"

    [ "$offset" -ge "$len" ] || [ "$((-offset))" -ge "$len" ] && return 1 #bad input

    [ "$offset" = 0 ] || {
        #"If offset evaluates to a number less than zero, the value is used as an offset in characters from the end of the value" 
        [ "$offset" -lt 0 ] && offset=$((len - -offset))
        rpt "$offset" '?'
        str="${str#$repeated}"
    }

    # Below only gets executed if user specified a length
    [ "$desired_len" ] || { echo "$str" ; return 0 ; }

    [ "$desired_len" -ge "$len" ] || [ "$((- desired_len))" -ge "$len" ] || [ "$desired_len" = 0 ] \
        && { echo "$str" ; return 1 ; } #Error: Function has no work to do if desired length is > or = str length

    if [ "$desired_len" -gt 0 ]; then #test if desired_len is a negative number
        left_len="$desired_len"
        right_len="$((len - left_len))" 
    else #inverse our desired lengths
        right_len="$((- desired_len))" 
        left_len="$((len - right_len))"
    fi

    #decide which trimming direction is faster
    if [ "$right_len" -lt "$left_len" ]; then 
        rpt "$right_len" '?'
        truncated="${str%$repeated}" #fun fact: this doesn't work in zsh
    elif [ "$left_len" -lt "$right_len" ]; then
        rpt "$left_len" '?'
        rest="${str#$ques}"
        truncated="${str%%$repeated}"
    else #not necessary, but readability, bro.
        rpt "$right_len" '?'
        truncated="${str%$repeated}"
    fi

    echo "$truncated"
}