ContentMine / phylotree

A repository for ami-phylotree development
0 stars 0 forks source link

suggested OCR correction rules #24

Open rossmounce opened 9 years ago

rossmounce commented 9 years ago
#replace 8 or 3 -> B in 8char matches
sed -i 's/\([(,]\)[38]\([A-Z0-9][A-Z0-9][A-Z0-9][0-9][0-9][0-9][0-9]:\)/\1B\2/g' *.nwk
#replace 8 or 3 -> B in 8char matches
sed -i 's/\([(,][A-Z]\)[38]\([A-Z0-9][A-Z0-9][A-Z0-9][0-9][0-9][0-9]:\)/\1B\2/g' *.nwk
#replace 5 -> B in 8char matches
sed -i 's/\([(,][A-Z0-9]\)[38]\([A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9]:\)/\1B\2/g' *.nwk
#replace 8 or 3 -> B in 6char matches
sed -i 's/\([(,]\)[83]\([0-9][0-9][0-9][0-9][0-9]:\)/\1B\2:/g' *.nwk
#replace ^0 -> ^D in 8char matches
sed -i 's/\([(,]\)0\([A-Z0-9][A-Z0-9][0-9][0-9][0-9][0-9][0-9]:\)/\1D\2/g' *.nwk
#replace ^0 -> ^D in 6char matches
sed -i 's/\([(,]\)0\([0-9][0-9][0-9][0-9][0-9]:\)/\1D\2/g' *.nwk
#replace DO or D0 -> DQ in 8char matches
sed -i 's/\([(,]D\)[O0]\([0-9][0-9][0-9][0-9][0-9][0-9]:\)/\1Q\2/g' *.nwk
#replace GO or G0 -> GQ in 8char matches
sed -i 's/\([(,]G\)[O0]\([A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9]:\)/\1Q\2/g' *.nwk
#replace A1 -> AJ in 8char matches
sed -i 's/\([(,]A\)1\([0-9][0-9][0-9][0-9][0-9][0-9]:\)/\1J\2/g' *.nwk
#replace 2 -> Z in 6char matches
sed -i 's/\([(,]\)2\([0-9][0-9][0-9][0-9][0-9]:\)/\1Z\2:/g' *.nwk
#replace Z -> 2 in 6char matches
sed -i 's/\([(,][A-Z]\)Z\([0-9][0-9][0-9][0-9]:\)/\12\2:/g' *.nwk
#replace Z -> 2 in 6char matches
sed -i 's/\([(,][A-Z][0-9]\)Z\([0-9][0-9][0-9]:\)/\12\2:/g' *.nwk
#replace Z -> 2 in 6char matches
sed -i 's/\([(,][A-Z][0-9][0-9]\)Z\([0-9][0-9]:\)/\12\2:/g' *.nwk
#replace Z -> 2 in 6char matches
sed -i 's/\([(,][A-Z][0-9][0-9][0-9]\)Z\([0-9]:\)/\12\2:/g' *.nwk
#replace Z -> 2 in 6char matches
sed -i 's/\([(,][A-Z][0-9][0-9][0-9][0-9]\)Z\(:\)/\12\2:/g' *.nwk
#SAFE replace Z -> 2 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9]\)Z\([0-9][0-9][0-9][0-9]:\)/\12\2/g' *.nwk
#SAFE replace Z -> 2 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9][0-9]\)Z\([0-9][0-9][0-9]:\)/\12\2/g' *.nwk
#SAFE replace Z -> 2 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9][0-9][0-9]\)Z\([0-9][0-9]:\)/\12\2/g' *.nwk
#SAFE replace Z -> 2 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9][0-9][0-9][0-9]\)Z\([0-9]:\)/\12\2/g' *.nwk
#SAFE replace Z -> 2 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9][0-9][0-9][0-9][0-9]\)Z\(:\)/\12\2/g' *.nwk
#SAFE replace OO -> 00 in 8char matches
sed -i 's/\([(,][A-Z][A-Z]\)OO\([0-9][0-9][0-9][0-9]:\)/\100\2/g' *.nwk
#replace O -> 0
sed -i 's/\([(,][A-Z][A-Z][0-9]\)O\([0-9][0-9][0-9][0-9]:\)/\10\2/g' *.nwk
#SAFE replace O -> 0 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9][0-9]\)O\([0-9][0-9][0-9]:\)/\10\2/g' *.nwk
#SAFE replace O -> 0 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9][0-9][0-9]\)O\([0-9][0-9]:\)/\10\2/g' *.nwk
#SAFE replace O -> 0 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9][0-9][0-9][0-9]\)O\([0-9]:\)/\10\2/g' *.nwk
#SAFE replace O -> 0 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9][0-9][0-9][0-9][0-9]\)O\(:\)/\10\2/g' *.nwk
#SAFE replace O -> 0 in 6char matches
sed -i 's/\([(,][A-Z]\)O\([0-9][0-9][0-9][0-9]:\)/\10\2/g' *.nwk
#SAFE replace O -> 0 in 6char matches
sed -i 's/\([(,][A-Z][0-9]\)O\([0-9][0-9][0-9]:\)/\10\2/g' *.nwk
#SAFE replace O -> 0 in 6char matches
sed -i 's/\([(,][A-Z][0-9][0-9]\)O\([0-9][0-9]:\)/\10\2/g' *.nwk
#SAFE replace O -> 0 in 6char matches
sed -i 's/\([(,][A-Z][0-9][0-9][0-9]\)O\([0-9]:\)/\10\2/g' *.nwk
#SAFE replace O -> 0 in 6char matches
sed -i 's/\([(,][A-Z][0-9][0-9][0-9][0-9]\)O\(:\)/\10\2/g' *.nwk
#replace B -> 8 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9]\)B\([0-9][0-9][0-9][0-9]:\)/\18\2/g' *.nwk
#SAFE replace B -> 8 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9][0-9]\)B\([0-9][0-9][0-9]:\)/\18\2/g' *.nwk
#SAFE replace B -> 8 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9][0-9][0-9]\)B\([0-9][0-9]:\)/\18\2/g' *.nwk
#SAFE replace B -> 8 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9][0-9][0-9][0-9]\)B\([0-9]:\)/\18\2/g' *.nwk
#SAFE replace B -> 8 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9][0-9][0-9][0-9][0-9]\)B\(:\)/\18\2/g' *.nwk
#SAFE replace B -> 8 in 6char matches
sed -i 's/\([(,][A-Z]\)B\([0-9][0-9][0-9][0-9]:\)/\18\2/g' *.nwk
#SAFE replace B -> 8 in 6char matches
sed -i 's/\([(,][A-Z][0-9]\)B\([0-9][0-9][0-9]:\)/\18\2/g' *.nwk
#SAFE replace B -> 8 in 6char matches
sed -i 's/\([(,][A-Z][0-9][0-9]\)B\([0-9][0-9]:\)/\18\2/g' *.nwk
#SAFE replace B -> 8 in 6char matches
sed -i 's/\([(,][A-Z][0-9][0-9][0-9]\)B\([0-9]:\)/\18\2/g' *.nwk
#SAFE replace B -> 8 in 6char matches
sed -i 's/\([(,][A-Z][0-9][0-9][0-9][0-9]\)B\(:\)/\18\2/g' *.nwk
#replace G -> 6 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9]\)G\([0-9][0-9][0-9][0-9]:\)/\16\2/g' *.nwk
#SAFE replace G -> 6 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9][0-9]\)G\([0-9][0-9][0-9]:\)/\16\2/g' *.nwk
#SAFE replace G -> 6 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9][0-9][0-9]\)G\([0-9][0-9]:\)/\16\2/g' *.nwk
#SAFE replace G -> 6 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9][0-9][0-9][0-9]\)G\([0-9]:\)/\16\2/g' *.nwk
#SAFE replace G -> 6 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9][0-9][0-9][0-9][0-9]\)G\(:\)/\16\2/g' *.nwk
#SAFE replace G -> 6 in 6char matches
sed -i 's/\([(,][A-Z]\)G\([0-9][0-9][0-9][0-9]:\)/\16\2/g' *.nwk
#SAFE replace G -> 6 in 6char matches
sed -i 's/\([(,][A-Z][0-9]\)G\([0-9][0-9][0-9]:\)/\16\2/g' *.nwk
#SAFE replace G -> 6 in 6char matches
sed -i 's/\([(,][A-Z][0-9][0-9]\)G\([0-9][0-9]:\)/\16\2/g' *.nwk
#SAFE replace G -> 6 in 6char matches
sed -i 's/\([(,][A-Z][0-9][0-9][0-9]\)G\([0-9]:\)/\16\2/g' *.nwk
#SAFE replace G -> 6 in 6char matches
sed -i 's/\([(,][A-Z][0-9][0-9][0-9][0-9]\)G\(:\)/\16\2/g' *.nwk
#SAFE replace E -> 2 in 8char matches
sed -i 's/\([(,][A-Z][A-Z]\)E\([A-Z0-9][0-9][0-9][0-9][0-9]:\)/\12\2/g' *.nwk
#SAFE replace S -> 6 in 8char matches
sed -i 's/\([(,][A-Z][A-Z][0-9A-Z]\)S\([0-9][0-9][0-9][0-9]:\)/\16\2/g' *.nwk