Closed HenrikBengtsson closed 10 months ago
I took a stab at cleaning these notes up a bit. Here's a step-by-step re-write of the current implementation. Note that if we define rotate()
such that it can rotate in both, we can get rid of n
in rotate(x, n - s)
, where n = len(seq)
, by using rotate(x, -s)
.
Define rotate()
as:
def rotate(seq: str, amount: int = 0) -> str:
"""Rotates a circular, genomic sequence a certain amount.
Rotates sequence 'seq', 'amount' number of symbols to the right.
A rotation 'amount' is the same as a rotation 'amount + n * len(seq)'
for any integer 'n'.
Returns the rotated sequence as a string of length 'len(seq)'.
Examples
--------
>>> seq = "ABCDEFGH"
>>> rotate(seq, 0)
'ABCDEFGH'
>>> rotate(seq, +1)
'BCDEFGHA'
>>> rotate(seq, +7)
'HABCDEFG'
>>> rotate(seq, -1)
'HABCDEFG'
>>> rotate(seq, +8)
'ABCDEFGH'
"""
assert isinstance(seq, str), "Argument 'seq' must be an string"
assert isinstance(amount, int), "Argument 'amount' must be an integer"
## Nothing to rotate?
if len(seq) == 0:
return seq
amount = amount % len(seq)
## Rotate?
if amount > 0:
seq = seq[amount:] + seq[:amount]
return seq
Then, we can rewrite the current:
def dcseguid(watson: str, crick: str) -> str:
n = len(watson)
x = min_rotation(watson)
y = min_rotation(crick)
w, c = min(
(watson[x:] + watson[:x], crick[n - x :] + crick[: n - x]),
(crick[y:] + crick[:y], watson[n - y :] + watson[: n - y]),
)
return dlseguid(w, c, offset = 0)
in several steps:
def dcseguid1(watson: str, crick: str) -> str:
n = len(watson)
x = min_rotation(watson)
y = min_rotation(crick)
w, c = min(
(rotate(watson, x), rotate(crick, n - x)),
(rotate(crick, y), rotate(watson, n - y))
)
return dlseguid(w, c, offset = 0)
def dcseguid2(watson: str, crick: str) -> str:
n = len(watson)
x = min_rotation(watson)
watson1 = rotate(watson, x)
crick1 = rotate(crick, n - x)
y = min_rotation(crick)
crick2 = rotate(crick, y)
watson2 = rotate(watson, n - y)
w, c = min(
(watson1, crick1),
(crick2, watson2)
)
return dlseguid(w, c, offset = 0)
def dcseguid3(watson: str, crick: str) -> str:
n = len(watson)
amount = min_rotation(watson)
watson1 = rotate(watson, amount)
crick1 = rotate(crick, n - amount)
amount = min_rotation(crick)
crick2 = rotate(crick, amount)
watson2 = rotate(watson, n - amount)
w, c = min(
(watson1, crick1),
(crick2, watson2)
)
return dlseguid(w, c, offset = 0)
def dcseguid4(watson: str, crick: str) -> str:
amount = min_rotation(watson)
watson1 = rotate(watson, +amount)
crick1 = rotate(crick, -amount) ## Because rotate() takes both + and -
amount = min_rotation(crick)
crick2 = rotate(crick, +amount)
watson2 = rotate(watson, -amount)
w, c = min(
(watson1, crick1),
(crick2, watson2)
)
return dlseguid(w, c, offset = 0)
def dcseguid5(watson: str, crick: str) -> str:
amount = min_rotation(watson)
watson1 = rotate(watson, +amount)
crick1 = rotate(crick, -amount)
amount = min_rotation(crick)
crick2 = rotate(crick1, +amount) ## Using crick1 or crick doesn't matter
watson2 = rotate(watson1, -amount)
w, c = min(
(watson1, crick1),
(crick2, watson2)
)
return dlseguid(w, c, offset = 0)
def dcseguid6(watson: str, crick: str) -> str:
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
crick = rotate(crick, -amount)
amount = min_rotation(crick)
crick2 = rotate(crick, +amount)
watson2 = rotate(watson, -amount)
w, c = min(
(watson, crick ),
(crick2, watson2)
)
return dlseguid(w, c, offset = 0)
def dcseguid7(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
crick = rotate(crick, -amount)
## Minimal rotation per 'crick'
amount = min_rotation(crick)
crick_alt = rotate(crick, +amount)
watson_alt = rotate(watson, -amount)
## Keep the "minimum" of the two variants
w, c = min(
(watson , crick ),
(crick_alt, watson_alt)
)
return dlseguid(w, c, offset = 0)
def dcseguid8(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
crick = rotate(crick, -amount)
## Minimal rotation per 'crick'
amount = min_rotation(crick)
crick_alt = rotate(crick, +amount)
watson_alt = rotate(watson, -amount)
## Keep the "minimum" of the two variants
w, c = min(
(watson , crick ),
(crick_alt, watson_alt)
)
return dlseguid(w, c, offset = 0)
def dcseguid9(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
crick = rc(watson) ## because the crick strand is by definition
## rev-complementarty to the watson strinng
## Minimal rotation per 'crick'
amount = min_rotation(crick)
crick_alt = rotate(crick, +amount)
watson_alt = rc(crick)
## Keep the "minimum" of the two variants
w, c = min(
(watson , crick ),
(crick_alt, watson_alt)
)
return dlseguid(w, c, offset = 0)
We can continue as
def dcseguid10(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
crick = rc(watson)
## Minimal rotation per 'crick'
amount = min_rotation(crick)
crick_alt = rotate(crick, +amount)
watson_alt = rc(crick)
## Keep the "minimum" of the two variants
w, c = min(
(watson , crick ),
(crick_alt, watson_alt)
)
return dlseguid(w, c, offset = 0)
def dcseguid11(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
crick = rc(watson)
## Minimal rotation per 'crick'
amount = min_rotation(crick)
crick_alt = rotate(crick, +amount)
## Keep the "minimum" of the two variants
w, c = min(
(watson , crick ),
(crick_alt, rc(crick))
)
return dlseguid(w, c, offset = 0)
def dcseguid12(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
crick = rc(watson)
## Minimal rotation per 'crick'
amount = min_rotation(crick)
crick_alt = rotate(rc(watson), +amount)
## Keep the "minimum" of the two variants
w, c = min(
(watson , crick ),
(crick_alt, rc(crick))
)
return dlseguid(w, c, offset = 0)
Next, I'm pretty use the following holds:
rotate(rc(seq), +amount) == rc(rotate(seq, -amount))
IMPORTANT: This claim needs to be double-checked!
def dcseguid13(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
crick = rc(watson)
## Minimal rotation per 'crick'
amount = min_rotation(crick)
crick_alt = rc(watson, -amount)
## Keep the "minimum" of the two variants
w, c = min(
(watson , crick ),
(crick_alt, rc(crick))
)
return dlseguid(w, c, offset = 0)
def dcseguid14(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
crick = rc(watson)
## Minimal rotation per 'crick'
amount = min_rotation(rc(watson))
crick_alt = rc(watson, -amount)
## Keep the "minimum" of the two variants
w, c = min(
(watson , crick ),
(crick_alt, rc(crick))
)
return dlseguid(w, c, offset = 0)
Substituting crick
with rc(watson)
, gives:
def dcseguid15(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
## Minimal rotation per 'crick'
amount = min_rotation(rc(watson))
crick_alt = rc(watson, -amount)
## Keep the "minimum" of the two variants
w, c = min(
(watson , rc(watson)),
(crick_alt, rc(rc(watson)))
)
return dlseguid(w, c, offset = 0)
and from rc(rc(s)) == s
, we get:
def dcseguid16(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
## Minimal rotation per 'crick'
amount = min_rotation(rc(watson))
crick_alt = rc(watson, -amount)
## Keep the "minimum" of the two variants
w, c = min(
(watson , rc(watson)),
(crick_alt, watson)
)
return dlseguid(w, c, offset = 0)
There were some typos, leading to mistakes above. Here's a revised version:
def dcseguid(watson: str, crick: str) -> str:
n = len(watson)
x = min_rotation(watson)
y = min_rotation(crick)
w, c = min(
(watson[x:] + watson[:x], crick[n - x :] + crick[: n - x]),
(crick[y:] + crick[:y], watson[n - y :] + watson[: n - y]),
)
return dlseguid(w, c, offset = 0)
def dcseguid1(watson: str, crick: str) -> str:
n = len(watson)
x = min_rotation(watson)
y = min_rotation(crick)
w, c = min(
(rotate(watson, x), rotate(crick, n - x)),
(rotate(crick, y), rotate(watson, n - y))
)
return dlseguid(w, c, offset = 0)
def dcseguid2(watson: str, crick: str) -> str:
n = len(watson)
x = min_rotation(watson)
watson1 = rotate(watson, x)
crick1 = rotate(crick, n - x)
y = min_rotation(crick)
crick2 = rotate(crick, y)
watson2 = rotate(watson, n - y)
w, c = min(
(watson1, crick1),
(crick2, watson2)
)
return dlseguid(w, c, offset = 0)
def dcseguid3(watson: str, crick: str) -> str:
n = len(watson)
amount = min_rotation(watson)
watson1 = rotate(watson, amount)
crick1 = rotate(crick, n - amount)
amount = min_rotation(crick)
crick2 = rotate(crick, amount)
watson2 = rotate(watson, n - amount)
w, c = min(
(watson1, crick1),
(crick2, watson2)
)
return dlseguid(w, c, offset = 0)
def dcseguid4(watson: str, crick: str) -> str:
amount = min_rotation(watson)
watson1 = rotate(watson, +amount)
crick1 = rotate(crick, -amount) ## Because rotate() takes both + and -
amount = min_rotation(crick)
crick2 = rotate(crick, +amount)
watson2 = rotate(watson, -amount)
w, c = min(
(watson1, crick1),
(crick2, watson2)
)
return dlseguid(w, c, offset = 0)
def dcseguid5(watson: str, crick: str) -> str:
amount = min_rotation(watson)
watson1 = rotate(watson, +amount)
crick1 = rotate(crick, -amount)
amount = min_rotation(crick)
crick2 = rotate(crick1, +amount) ## Using crick1 or crick doesn't matter
watson2 = rotate(watson1, -amount)
w, c = min(
(watson1, crick1),
(crick2, watson2)
)
return dlseguid(w, c, offset = 0)
def dcseguid5(watson: str, crick: str) -> str:
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
crick = rotate(crick, -amount)
amount = min_rotation(crick)
crick2 = rotate(crick, +amount)
watson2 = rotate(watson, -amount)
w, c = min(
(watson, crick ),
(crick2, watson2)
)
return dlseguid(w, c, offset = 0)
def dcseguid6(watson: str, crick: str) -> str:
amount = min_rotation(watson)
watson = rotate(watson, +amount)
crick = rotate(crick, -amount)
amount = min_rotation(crick)
crick_alt = rotate(crick, +amount)
watson_alt = rotate(watson, -amount)
w, c = min(
(watson , crick ),
(crick_alt, watson_alt)
)
return dlseguid(w, c, offset = 0)
def dcseguid8(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
crick = rotate(crick, -amount)
## Minimal rotation per 'crick'
amount = min_rotation(crick)
crick_alt = rotate(crick, +amount)
watson_alt = rotate(watson, -amount)
## Keep the "minimum" of the two variants
w, c = min(
(watson , crick ),
(crick_alt, watson_alt)
)
return dlseguid(w, c, offset = 0)
def dcseguid9(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
crick = rc(watson)
## Minimal rotation per 'crick'
amount = min_rotation(crick)
crick_alt = rotate(crick, +amount)
watson_alt = rc(crick_alt)
## Keep the "minimum" of the two variants
w, c = min(
(watson , crick ),
(crick_alt, watson_alt)
)
return dlseguid(w, c, offset = 0)
def dcseguid10(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
crick = rc(watson)
## Minimal rotation per 'crick'
amount = min_rotation(crick)
crick_alt = rotate(crick, +amount)
## Keep the "minimum" of the two variants
w, c = min(
(watson , crick ),
(crick_alt, rc(crick_alt))
)
return dlseguid(w, c, offset = 0)
def dcseguid11(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
crick = rc(watson)
## Minimal rotation per 'crick'
amount = min_rotation(crick)
crick_alt = rotate(crick, +amount)
## Keep the "minimum" of the two variants
w, c = min(
(watson , rc(watson) ),
(crick_alt, rc(crick_alt))
)
return dlseguid(w, c, offset = 0)
def dcseguid12(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount) ## rotate "in-place"
## Minimal rotation per 'crick'
amount = min_rotation(rc(watson))
crick_alt = rotate(rc(watson), +amount)
## Keep the "minimum" of the two variants
w, c = min(
(watson , rc(watson) ),
(crick_alt, rc(crick_alt))
)
return dlseguid(w, c, offset = 0)
def dcseguid13(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount)
## Minimal rotation per 'crick'
amount = min_rotation(rc(watson))
crick_alt = rc(rotate(watson, -amount))
## Keep the "minimum" of the two variants
w, c = min(
(watson , rc(watson) ),
(crick_alt, rc(crick_alt))
)
return dlseguid(w, c, offset = 0)
def dcseguid14(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount)
## Minimal rotation per 'crick'
amount = min_rotation(rc(watson))
watson_alt = rotate(watson, -amount)
crick_alt = rc(watson_alt)
## Keep the "minimum" of the two variants
w, c = min(
(watson , rc(watson) ),
(crick_alt, rc(crick_alt))
)
return dlseguid(w, c, offset = 0)
def dcseguid14(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount)
## Minimal rotation per 'crick'
amount = min_rotation(rc(watson))
watson_alt = rotate(watson, -amount)
## Keep the "minimum" of the two variants
w, c = min(
(watson , rc(watson)),
(rc(watson_alt), watson_alt)
)
return dlseguid(w, c, offset = 0)
## Guess that max_rotation(x) == min_rotation(rc(x))
def dcseguid15(watson: str, crick: str) -> str:
## Minimal rotation per 'watson'
amount = min_rotation(watson)
watson = rotate(watson, +amount)
## Minimal rotation per 'crick'
amount = max_rotation(watson)
watson_alt = rotate(watson, +amount)
## Keep the "minimum" of the two variants
w, c = min(
(watson , rc(watson)),
(rc(watson_alt), watson_alt)
)
return dlseguid(w, c, offset = 0)
def dcseguid16(watson: str, crick: str) -> str:
## Minimal and maximum rotation per 'watson'
amount_min = min_rotation(watson)
amount_max = max_rotation(watson)
watson_min = rotate(watson, amount_min)
watson_max = rotate(watson, amount_max)
## Keep the "minimum" of the two variants
w, c = min(
( watson_min , rc(watson_min)),
(rc(watson_max), watson_max )
)
return dlseguid(w, c, offset = 0)
# Imagine we have rotate_to_min() and rotate_to_max()
def dcseguid17(watson: str, crick: str) -> str:
watson_min = rotate_to_min(watson)
watson_max = rotate_to_max(watson)
## Keep the "minimum" of the two variants
w, c = min(
( watson_min , rc(watson_min)),
(rc(watson_max), watson_max )
)
return dlseguid(w, c, offset = 0)
## Back to using 'crick' again; maybe more clear?
def dcseguid18(watson: str, crick: str) -> str:
watson_min = rotate_to_min(watson)
watson_max = rotate_to_max(watson)
crick_min = rc(watson_min)
crick_max = rc(watson_max)
## Keep the "minimum" of the two variants
w, c = min(
(watson_min, crick_min ),
(crick_max , watson_max)
)
return dlseguid(w, c, offset = 0)
Then, my gut feeling tells me that the above is the same as:
def dcseguid19(watson: str, crick: str) -> str:
watson_min = rotate_to_min(watson)
watson_max = rotate_to_max(watson)
crick_min = rc(watson_min)
crick_max = rc(watson_max)
## Keep the "minimum" of the two variants
if watson_min < crick_max:
w = (watson_min, crick_min)
else
w = (crick_max, watson_max)
return dlseguid(w, c, offset = 0)
If that's true, we can continue as:
def dcseguid20(watson: str, crick: str) -> str:
watson_min = rotate_to_min(watson)
watson_max = rotate_to_max(watson)
crick_min = rc(watson_min)
crick_max = rc(watson_max)
## Keep the "minimum" of the two variants
if watson_min < crick_max:
w = watson_min
else
w = crick_max
c = rc(w)
return dlseguid(w, c, offset = 0)
def dcseguid21(watson: str, crick: str) -> str:
watson_min = rotate_to_min(watson)
watson_max = rotate_to_max(watson)
crick_min = rc(watson_min)
crick_max = rc(watson_max)
## Keep the "minimum" of the two variants
if watson_min < crick_max:
w = watson_min
else
w = crick_max
return dlseguid(watson = w, crick = rc(w), offset = 0)
def dcseguid22(watson: str, crick: str) -> str:
watson_min = rotate_to_min(watson)
watson_max = rotate_to_max(watson)
## Keep the "minimum" of the two variants
if watson_min < rc(watson_max):
w = watson_min
else
w = rc(watson_max)
return dlseguid(watson = w, crick = rc(w), offset = 0)
And, of course, if we move back to using only minimal rotations:
def dcseguid23(watson: str, crick: str) -> str:
watson_min = rotate_to_min(watson)
crick_min = rotate_to_min(crick)
## Keep the "minimum" of the two variants
if watson_min < crick_min:
w = watson_min
else
w = crick_min
return dlseguid(watson = w, crick = rc(w), offset = 0)
which is basically back to the beginning; your original dcseguid()
. Interesting exercise.
Even if my gut feeling would be wrong, you could make the latter then definition of dcseguid()
.
ACTION:
Issue #33 concludes this.
From brainstorming during 2023-12-08 call on:
https://github.com/MetabolicEngineeringGroupCBMA/seguid/blob/2ed17694f309405736183afe2b94ad99fe967b36/Python/seguid.py#L494-L517
Jotter