astronomy-commons / axs

Astronomy eXtensions for Spark: Fast, Scalable, Analytics of Billion+ row catalogs
https://axs.readthedocs.io/
BSD 3-Clause "New" or "Revised" License
23 stars 12 forks source link

region query fails with `spans_prime_mer=True` #8

Open danielsf opened 5 years ago

danielsf commented 5 years ago

The command

ztf_mar19.region(ra1=340.0, ra2=1.0, dec1=10.0, dec2=20.0, spans_prime_mer=True).count()

Results in

ERROR: Py4JError: An error occurred while calling o1569.or. Trace:
py4j.Py4JException: Method or([class java.lang.Double]) does not exist
    at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:318)
    at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:326)
    at py4j.Gateway.invoke(Gateway.java:274)
    at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
    at py4j.commands.CallCommand.execute(CallCommand.java:79)
    at py4j.GatewayConnection.run(GatewayConnection.java:238)
    at java.lang.Thread.run(Thread.java:748)

 [py4j.protocol]

---------------------------------------------------------------------------
Py4JError                                 Traceback (most recent call last)
<ipython-input-88-79eee7240433> in <module>()
----> 1 ztf_mar19.region(ra1=340.0, ra2=1.0, dec1=10.0, dec2=20.0, spans_prime_mer=True).count()

/epyc/opt/spark-axs/python/axs/axsframe.py in region(self, ra1, dec1, ra2, dec2, spans_prime_mer)
    235             return wrap(self._df.where(self._df.zone.between(zone1, zone2) &
    236                                        (self._df.ra >= ra1 |
--> 237                                         self._df.ra <= ra2) &
    238                                        self._df.dec.between(dec1, dec2)), self._table_info)
    239         else:

/epyc/opt/spark-axs/python/pyspark/sql/column.py in _(self, other)
    113     def _(self, other):
    114         jc = other._jc if isinstance(other, Column) else other
--> 115         njc = getattr(self._jc, name)(jc)
    116         return Column(njc)
    117     _.__doc__ = doc

/epyc/opt/spark-axs/python/py4j/java_gateway.py in __call__(self, *args)
   1255         answer = self.gateway_client.send_command(command)
   1256         return_value = get_return_value(
-> 1257             answer, self.gateway_client, self.target_id, self.name)
   1258 
   1259         for temp_arg in temp_args:

/epyc/opt/spark-axs/python/pyspark/sql/utils.py in deco(*a, **kw)
     61     def deco(*a, **kw):
     62         try:
---> 63             return f(*a, **kw)
     64         except py4j.protocol.Py4JJavaError as e:
     65             s = e.java_exception.toString()

/epyc/opt/spark-axs/python/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
    330                 raise Py4JError(
    331                     "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
--> 332                     format(target_id, ".", name, value))
    333         else:
    334             raise Py4JError(

Py4JError: An error occurred while calling o1569.or. Trace:
py4j.Py4JException: Method or([class java.lang.Double]) does not exist
    at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:318)
    at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:326)
    at py4j.Gateway.invoke(Gateway.java:274)
    at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
    at py4j.commands.CallCommand.execute(CallCommand.java:79)
    at py4j.GatewayConnection.run(GatewayConnection.java:238)
    at java.lang.Thread.run(Thread.java:748)

It runs fine if spans_prime_mer=False

After discussing with @ctslater, it seems likely that this block

235             return wrap(self._df.where(self._df.zone.between(zone1, zone2) &
    236                                        (self._df.ra >= ra1 |
--> 237                                         self._df.ra <= ra2) &
    238                                        self._df.dec.between(dec1, dec2)), self._table_info)
    239         else:

needs parentheses around self._df.ra>=ra1 and self._df.ra<=ra2, so that pyspark can figure out the precedence of the comparisons.