Open RemingtonRohel opened 3 months ago
Okay so using this code to check very quickly all the read and write methods:
# Read and write test for pyDARNio with darn-dmap dependency
import pydarnio
import time
# ====================================== IQDAT ======================================
try:
iq_in_file = '/Users/carley/Documents/data/iq/20160303.1800.17.cly.iqdat'
iq_out_file = '/Users/carley/Documents/data/write_files/20160303.1800.17.cly.iqdat'
t1 = time.time()
iq_data = pydarnio.read_iqdat(iq_in_file)
t2 = time.time()
print('------ IQ ------')
print(iq_data[0].keys())
t3 = time.time()
pydarnio.write_iqdat(iq_data, iq_out_file)
t4 = time.time()
print('IQ Read took: ', t2-t1, ' seconds')
print('IQ Write took: ', t4-t3, ' seconds')
print('SUCCESS IQ')
except Exception as e:
print('FAIL IQ')
print(e)
# ====================================== RAWACF ======================================
try:
raw_in_file = '/Users/carley/Documents/data/rawacf/20200713.0200.06.rkn.rawacf'
raw_out_file = '/Users/carley/Documents/data/write_files/20200713.0200.06.rkn.rawacf'
t1 = time.time()
raw_data = pydarnio.read_rawacf(raw_in_file)
t2 = time.time()
print('------ RAWACF ------')
print(raw_data[0].keys())
t3 = time.time()
pydarnio.write_rawacf(raw_data, raw_out_file)
t4 = time.time()
print('RAWACF Read took: ', t2-t1, ' seconds')
print('RAWACF Write took: ', t4-t3, ' seconds')
print('SUCCESS RAWACF')
except Exception as e:
print('FAIL RAWACF')
print(e)
# ====================================== GRID ======================================
try:
grid_in_file = '/Users/carley/Documents/data/grids/20150308.south.grid2'
grid_out_file = '/Users/carley/Documents/data/write_files/20150308.south.grid2'
t1 = time.time()
grid_data = pydarnio.read_grid(grid_in_file)
t2 = time.time()
print('------ GRID ------')
print(grid_data[0].keys())
t3 = time.time()
pydarnio.write_grid(grid_data, grid_out_file)
t4 = time.time()
print('GRID Read took: ', t2-t1, ' seconds')
print('GRID Write took: ', t4-t3, ' seconds')
print('SUCCESS GRID')
except Exception as e:
print('FAIL GRID')
print(e)
# ====================================== MAP ======================================
try:
map_in_file = '/Users/carley/Documents/data/maps/20210101.n.map'
map_out_file = '/Users/carley/Documents/data/write_files/20210101.n.map'
t1 = time.time()
map_data = pydarnio.read_map(map_in_file)
t2 = time.time()
print('------ MAP ------')
print(map_data[0].keys())
t3 = time.time()
pydarnio.write_map(map_data, map_out_file)
t4 = time.time()
print('MAP Read took: ', t2-t1, ' seconds')
print('MAP Write took: ', t4-t3, ' seconds')
print('SUCCESS MAP')
except Exception as e:
print('FAIL MAP')
print(e)
# ====================================== SND ======================================
try:
snd_in_file = '/Users/carley/Documents/data/snd/20230404.00.ice.snd'
snd_out_file = '/Users/carley/Documents/data/write_files/20230404.00.ice.snd'
t1 = time.time()
snd_data = pydarnio.read_snd(snd_in_file)
t2 = time.time()
print('------ SND ------')
print(snd_data[0].keys())
t3 = time.time()
pydarnio.write_snd(snd_data, snd_out_file)
t4 = time.time()
print('SND Read took: ', t2-t1, ' seconds')
print('SND Write took: ', t4-t3, ' seconds')
print('SUCCESS SND')
except Exception as e:
print('FAIL SND')
print(e)
# ====================================== FITACF ======================================
try:
fit_in_file = '/Users/carley/Documents/data/20211102.0000.00.rkn.a.fitacf'
fit_out_file = '/Users/carley/Documents/data/write_files/20211102.0000.00.rkn.a.fitacf'
t1 = time.time()
fit_data = pydarnio.read_fitacf(fit_in_file)
t2 = time.time()
print('------ FITACF ------')
print(fit_data[0].keys())
t3 = time.time()
pydarnio.write_fitacf(fit_data, fit_out_file)
t4 = time.time()
print('FITACF Read took: ', t2-t1, ' seconds')
print('FITACF Write took: ', t4-t3, ' seconds')
print('SUCCESS FITACF')
except Exception as e:
print('FAIL FITACF')
print(e)
I get this output:
------ IQ ------
dict_keys(['radar.revision.major', 'radar.revision.minor', 'origin.code', 'origin.time', 'origin.command', 'cp', 'stid', 'time.yr', 'time.mo', 'time.dy', 'time.hr', 'time.mt', 'time.sc', 'time.us', 'txpow', 'nave', 'atten', 'lagfr', 'smsep', 'ercod', 'stat.agc', 'stat.lopwr', 'noise.search', 'noise.mean', 'channel', 'bmnum', 'bmazm', 'scan', 'offset', 'rxrise', 'intt.sc', 'intt.us', 'txpl', 'mpinc', 'mppul', 'mplgs', 'nrang', 'frang', 'rsep', 'xcf', 'tfreq', 'mxpwr', 'lvmax', 'iqdata.revision.major', 'iqdata.revision.minor', 'combf', 'seqnum', 'chnnum', 'smpnum', 'skpnum', 'ptab', 'ltab', 'tsc', 'tus', 'tatten', 'tnoise', 'toff', 'tsze', 'data'])
IQ Read took: 0.4273972511291504 seconds
IQ Write took: 2.983741044998169 seconds
SUCCESS IQ
------ RAWACF ------
dict_keys(['radar.revision.major', 'radar.revision.minor', 'origin.code', 'origin.time', 'origin.command', 'cp', 'stid', 'time.yr', 'time.mo', 'time.dy', 'time.hr', 'time.mt', 'time.sc', 'time.us', 'txpow', 'nave', 'atten', 'lagfr', 'smsep', 'ercod', 'stat.agc', 'stat.lopwr', 'noise.search', 'noise.mean', 'channel', 'bmnum', 'bmazm', 'scan', 'offset', 'rxrise', 'intt.sc', 'intt.us', 'txpl', 'mpinc', 'mppul', 'mplgs', 'nrang', 'frang', 'rsep', 'xcf', 'tfreq', 'mxpwr', 'lvmax', 'rawacf.revision.major', 'rawacf.revision.minor', 'combf', 'thr', 'ptab', 'ltab', 'slist', 'pwr0', 'acfd', 'xcfd'])
RAWACF Read took: 0.1154787540435791 seconds
RAWACF Write took: 0.7436809539794922 seconds
SUCCESS RAWACF
------ GRID ------
dict_keys(['start.year', 'start.month', 'start.day', 'start.hour', 'start.minute', 'start.second', 'end.year', 'end.month', 'end.day', 'end.hour', 'end.minute', 'end.second', 'stid', 'channel', 'nvec', 'freq', 'major.revision', 'minor.revision', 'program.id', 'noise.mean', 'noise.sd', 'gsct', 'v.min', 'v.max', 'p.min', 'p.max', 'w.min', 'w.max', 've.min', 've.max', 'vector.mlat', 'vector.mlon', 'vector.kvect', 'vector.stid', 'vector.channel', 'vector.index', 'vector.vel.median', 'vector.vel.sd', 'vector.pwr.median', 'vector.pwr.sd', 'vector.wdt.median', 'vector.wdt.sd'])
GRID Read took: 0.0359339714050293 seconds
GRID Write took: 0.5599510669708252 seconds
SUCCESS GRID
FAIL MAP
Unsupported fields ["IMF.Kp"], fields supported are ["start.year", "start.month", "start.day", "start.hour", "start.minute", "start.second", "end.year", "end.month", "end.day", "end.hour", "end.minute", "end.second", "map.major.revision", "map.minor.revision", "doping.level", "model.wt", "error.wt", "IMF.flag", "hemisphere", "fit.order", "latmin", "chi.sqr", "chi.sqr.dat", "rms.err", "lon.shft", "lat.shft", "mlt.start", "mlt.end", "mlt.av", "pot.drop", "pot.drop.err", "pot.max", "pot.max.err", "pot.min", "pot.min.err", "source", "IMF.delay", "IMF.Bx", "IMF.By", "IMF.Bz", "IMF.Vx", "IMF.tilt", "IMT.Kp", "model.angle", "model.level", "model.tilt", "model.name", "noigrf", "stid", "channel", "nvec", "freq", "major.revision", "minor.revision", "program.id", "noise.mean", "noise.sd", "gsct", "v.min", "v.max", "p.min", "p.max", "w.min", "w.max", "ve.min", "ve.max", "vector.mlat", "vector.mlon", "vector.kvect", "vector.stid", "vector.channel", "vector.index", "vector.vel.median", "vector.vel.sd", "vector.pwr.median", "vector.pwr.sd", "vector.wdt.median", "vector.wdt.sd", "N", "N+1", "N+2", "N+3", "model.mlat", "model.mlon", "model.kvect", "model.vel.median", "boundary.mlat", "boundary.mlon"]: record 0
------ SND ------
dict_keys(['radar.revision.major', 'radar.revision.minor', 'origin.code', 'origin.time', 'origin.command', 'cp', 'stid', 'time.yr', 'time.mo', 'time.dy', 'time.hr', 'time.mt', 'time.sc', 'time.us', 'nave', 'lagfr', 'smsep', 'noise.search', 'noise.mean', 'channel', 'bmnum', 'bmazm', 'scan', 'rxrise', 'intt.sc', 'intt.us', 'nrang', 'frang', 'rsep', 'xcf', 'tfreq', 'noise.sky', 'combf', 'fitacf.revision.major', 'fitacf.revision.minor', 'snd.revision.major', 'snd.revision.minor', 'slist', 'qflg', 'gflg', 'v', 'v_e', 'p_l', 'w_l', 'x_qflg', 'phi0', 'phi0_e'])
/Users/carley/Documents/testing/specific_testing_scripts/dmap-read.py:91: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
pydarnio.write_snd(snd_data, snd_out_file)
FAIL SND
Corrupted records: [(68, InvalidRecord("Field slist is a scalar, expected vector")), (488, InvalidRecord("Field slist is a scalar, expected vector"))]
------ FITACF ------
dict_keys(['radar.revision.major', 'radar.revision.minor', 'origin.code', 'origin.time', 'origin.command', 'cp', 'stid', 'time.yr', 'time.mo', 'time.dy', 'time.hr', 'time.mt', 'time.sc', 'time.us', 'txpow', 'nave', 'atten', 'lagfr', 'smsep', 'ercod', 'stat.agc', 'stat.lopwr', 'noise.search', 'noise.mean', 'channel', 'bmnum', 'bmazm', 'scan', 'offset', 'rxrise', 'intt.sc', 'intt.us', 'txpl', 'mpinc', 'mppul', 'mplgs', 'mplgexs', 'ifmode', 'nrang', 'frang', 'rsep', 'xcf', 'tfreq', 'mxpwr', 'lvmax', 'combf', 'fitacf.revision.major', 'fitacf.revision.minor', 'noise.sky', 'noise.lag0', 'noise.vel', 'ptab', 'ltab', 'pwr0', 'slist', 'nlag', 'qflg', 'gflg', 'p_l', 'p_l_e', 'p_s', 'p_s_e', 'v', 'v_e', 'w_l', 'w_l_e', 'w_s', 'w_s_e', 'sd_l', 'sd_s', 'sd_phi', 'x_qflg', 'x_gflg', 'x_p_l', 'x_p_l_e', 'x_p_s', 'x_p_s_e', 'x_v', 'x_v_e', 'x_w_l', 'x_w_l_e', 'x_w_s', 'x_w_s_e', 'phi0', 'phi0_e', 'elv', 'elv_low', 'elv_high', 'x_sd_l', 'x_sd_s', 'x_sd_phi'])
/Users/carley/Documents/testing/specific_testing_scripts/dmap-read.py:111: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
pydarnio.write_fitacf(fit_data, fit_out_file)
FAIL FITACF
Corrupted records: [(301, InvalidRecord("Field slist is a scalar, expected vector"))]
So to summarise, I tried different fitacf files and they all got the corrupted slist fail (+dep warning for numpy >1.25), same error for SND both in writing, reading was fine. I printed the slist I was trying to write, and it's not a scalar so I don't really know why it's giving that error. Unsupported field found when reading a map file. All others read and wrote fine.
Numpy version 1.26.4, 1.25.0 and 1.24.4 tested.
Speedy reading is excellent though!
Did same tests as above and added a read_dmap and a read_fitacf which has a bz2 input:
# ====================================== GENERIC DMAP ======================================
try:
fit_in_file = '/Users/carley/Documents/data/20211102.0000.00.rkn.a.fitacf'
fit_out_file = '/Users/carley/Documents/data/write_files/20211102.0000.00.rkn.dmap.fitacf'
t1 = time.time()
fit_data = pydarnio.read_dmap(fit_in_file)
t2 = time.time()
print('------ Generic DMAP ------')
t3 = time.time()
pydarnio.write_fitacf(fit_data, fit_out_file)
t4 = time.time()
print('Generic DMAP Read took: ', t2-t1, ' seconds')
print('Generic DMAP Write took: ', t4-t3, ' seconds')
print('SUCCESS Generic DMAP')
except Exception as e:
print('FAIL Generic DMAP')
print(e)
# ====================================== BZ2 ======================================
try:
fit_in_file = '/Users/carley/Documents/data/20211102.0000.00.rkn.a.fitacf.bz2'
t1 = time.time()
fit_data = pydarnio.read_fitacf(fit_in_file)
t2 = time.time()
print('------ Generic BZ2 ------')
print('BZ2 Read took: ', t2-t1, ' seconds')
print('SUCCESS BZ2')
except Exception as e:
print('FAIL BZ2')
print(e)
Both succeeded:
------ Generic DMAP ------
Generic DMAP Read took: 0.05032610893249512 seconds
Generic DMAP Write took: 0.17098283767700195 seconds
SUCCESS Generic DMAP
------ Generic BZ2 ------
BZ2 Read took: 0.21122193336486816 seconds
SUCCESS BZ2
I'm having an issue with the map file still though where it is expecting vector.mlat
Field vector.mlat missing: record 0
It's not unusual to have none of the 'vector' fields and get some partial records (especially the first records for some reason, but I did test with another file and got the 300th record missing vector.mlat too) - I'm assuming it's just not set as optional - in superdarn_formats.py in pydarnio these fields are called 'partial fields' for some reason, but they're essentially the same as extra fields in that they might not be present in the file and that's fine. https://github.com/SuperDARN/pyDARNio/blob/543ee3a23c3310b63b9fc858975a139217b17adc/pydarnio/dmap/superdarn_formats.py#L329
I've fixed the most recent bug that @carleyjmartin has encountered. This branch is ready for a PR as soon as the darn-dmap
dependency supports numpy>=2
.
This may be a naive question, but it looks like all of the native python dmap
code has been removed - where does the new source code live?
Not at all, I didn't realize I hadn't linked it anywhere! The source code is auto-generated from this repository. The repository is wholly written in Rust, and uses a tool to generate Python bindings. I've used a GitHub action that will build wheel files for a wide range of target OS's and CPU architectures, so it should be a simple pip install
for anyone who wants to use it. It should be as near a one-to-one replacement as possible, other than a slightly different pyDARNio interface now since the SDarnRead
and SDarnWrite
classes are now gone.
Also, I did conduct some benchmarking to quantify the improved performance.
OS: openSUSE Leap 15.3 Python: 3.8.18 pyDARNio: v1.3 CPU: Intel(R) Core(TM) i7-8700K @ 3.70 GHz, 12 core
File type | Size | Number of records | pyDARNio v1.3 |
darn-dmap |
Speedup |
---|---|---|---|---|---|
rawacf | 853 MB | 30592 | 55.67 s | 2.65 s | 21.0x |
fitacf | 147 MB | 30592 | 116.82 s | 3.45 s | 33.9x |
rawacf | 52.3 MB | 1423 | 2.79 s | 372.8 ms | 7.5x |
fitacf | 7.4 MB | 1423 | 5.60 s | 410.0 ms | 13.7x |
iqdat | 12.2 MB | 80 | 494.4 ms | 274.0 ms | 1.8x |
grid | 169.6 kB | 58 | 457.2 ms | 263.2 ms | 1.7x |
map | 15.8 MB | 717 | 2.94 s | 354.0 ms | 8.3x |
snd | 1.7 kB | 2 | 340.8 ms | 254.2 ms | 1.3x |
fitacf.bz2 | 3.1 MB | 1423 | 5.85 s | 596.7 ms | 9.8x |
The conclusion I drew from this is that the speed boost is most noticeable for files with lots of records, and file types with more fields per record.
File type | Size | Number of records | pyDARNio v1.3 |
darn-dmap |
Speedup |
---|---|---|---|---|---|
rawacf | 853 MB | 30592 | 33.0 s | 17.6 s | 1.9x |
fitacf | 147 MB | 30592 | 55.9 s | 13.1 s | 4.3x |
rawacf | 52.3 MB | 1423 | 1.45 s | 757.4 ms | 1.9x |
fitacf | 7.4 MB | 1423 | 2.67 s | 623 ms | 4.3x |
iqdat | 12.2 MB | 80 | 99.53 ms | 74.71 ms | 1.3x |
grid | 169.6 kB | 58 | 56.10 ms | 7.00 ms | 8.0x |
map | 15.8 MB | 717 | 1.357 s | 638 ms | 2.1x |
snd | 1.7 kB | 2 | 1.70 ms | 0.80 ms | 2.1x |
The speed boost is more moderate with dmap
for writing operations.
@egthomas do you have any interest in reviewing this PR? I've included the srng
field into the supported fields, so it somewhat supersedes #75. I'd like to get someone from outside SuperDARN Canada to review as well, as typically it is Carley and Draven that I get to review pyDARNio PR's.
Scope
This PR is a major change to the API, completely changing the DMAP I/O workflow.
SDarnRead
andSDarnWrite
are gone, replaced by one-off reading and writing functions for each specific file type.darn-dmap
.pyyaml
andpathlib2
and thesetup.py
file.issue: N/A
Approval
Number of approvals: 2
Test
Please test the following DMAP I/O functions:
read_iqdat
read_rawacf
read_fitacf
read_grid
read_map
read_snd
read_dmap
write_iqdat
write_rawacf
write_fitacf
write_grid
write_map
write_snd
write_dmap