Open pwayner opened 4 years ago
Thanks for posting this. It would appear that the input data that you are providing the E2E with is producing a model that cannot be solved.
The key is:
Infeasible model
What is the input data that you are using, what value of epsilon are you using, and are you using the stock E2E reader, or have you made changes?
I'm starting with a new data set and getting failures. Any thoughts what this might be caused by? Is there any strategy for debugging the data -- which could be glitchy-- and figuring out what is causing this?
Solved in 0 iterations and 0.00 seconds Infeasible model 20/03/22 10:25:21 ERROR Executor: Exception in task 58.0 in stage 99.0 (TID 650) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/home/pcw/.local/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/home/pcw/.local/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/home/pcw/.local/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/serializers.py", line 400, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/home/pcw/.local/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/home/pcw/Census/census2020-das-e2e/programs/engine/topdown_engine.py", line 154, in
nodes_dict[levels_reversed[i+1]] = parent_child_rdd.map(lambda nodes: geoimp_wrapper(config,nodes,feas_dict[levels_reversed[i]],min_schema_dims)).flatMap(lambda children: tuple([child for child in children])).persist()
File "/home/pcw/Census/census2020-das-e2e/programs/engine/topdown_engine.py", line 516, in geoimp_wrapper
constraints=constraints_comb,identifier=parent_geocode, parent_constraints = parent_constraints, min_schema_add_over_dims=min_schema_add_over_dims)
File "/home/pcw/Census/census2020-das-e2e/programs/engine/geoimpgbopt.py", line 560, in L2geoimp_wrapper
min_schema_add_over_dims = min_schema_add_over_dims)
File "/home/pcw/Census/census2020-das-e2e/programs/engine/geoimpgbopt.py", line 257, in l2geoimp
obj_val = m.ObjVal
File "model.pxi", line 287, in gurobipy.Model.getattr (../../src/python/gurobipy.c:50351)
File "model.pxi", line 1589, in gurobipy.Model.getAttr (../../src/python/gurobipy.c:65331)
File "model.pxi", line 4028, in gurobipy.Model.__gettypedattr (../../src/python/gurobipy.c:99455)
AttributeError: b"Unable to retrieve attribute 'ObjVal'"
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:456) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:592) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:575) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:410) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221) at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:349) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:357) at org.apache.spark.rdd.RDD.iterator(RDD.scala:308) at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:65) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:123) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 20/03/22 10:25:21 ERROR TaskSetManager: Task 58 in stage 99.0 failed 1 times; aborting job Traceback (most recent call last): File "/home/pcw/Census/census2020-das-e2e/das_framework/driver.py", line 742, in main raise(e) File "/home/pcw/Census/census2020-das-e2e/das_framework/driver.py", line 739, in main data = das.run() File "/home/pcw/Census/census2020-das-e2e/das_framework/driver.py", line 614, in run written_data = self.runWriter(privatized_data) File "/home/pcw/Census/census2020-das-e2e/das_framework/driver.py", line 562, in runWriter written_data = self.writer.write(privatized_data) File "/home/pcw/Census/census2020-das-e2e/programs/writer/e2e_1940_writer.py", line 221, in write unit_data = unit_data.flatMapValues(unit_to_list).flatMap(expand_unit).zipWithIndex().map(lambda row: (row[0][1], (row[0][0], row[1], row[0][2]))).persist() File "/home/pcw/.local/lib/python3.6/site-packages/pyspark/rdd.py", line 2174, in zipWithIndex nums = self.mapPartitions(lambda it: [sum(1 for i in it)]).collect() File "/home/pcw/.local/lib/python3.6/site-packages/pyspark/rdd.py", line 816, in collect sock_info = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd()) File "/home/pcw/.local/lib/python3.6/site-packages/py4j/java_gateway.py", line 1257, in call answer, self.gateway_client, self.target_id, self.name) File "/home/pcw/.local/lib/python3.6/site-packages/pyspark/sql/utils.py", line 63, in deco return f(*a, *kw) File "/home/pcw/.local/lib/python3.6/site-packages/py4j/protocol.py", line 328, in get_return_value format(target_id, ".", name), value) py4j.protocol.Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe. : org.apache.spark.SparkException: Job aborted due to stage failure: Task 58 in stage 99.0 failed 1 times, most recent failure: Lost task 58.0 in stage 99.0 (TID 650, localhost, executor driver): org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/home/pcw/.local/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/home/pcw/.local/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/home/pcw/.local/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/serializers.py", line 400, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/home/pcw/.local/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(args, **kwargs) File "/home/pcw/Census/census2020-das-e2e/programs/engine/topdown_engine.py", line 154, in
nodes_dict[levels_reversed[i+1]] = parent_child_rdd.map(lambda nodes: geoimp_wrapper(config,nodes,feas_dict[levels_reversed[i]],min_schema_dims)).flatMap(lambda children: tuple([child for child in children])).persist()
File "/home/pcw/Census/census2020-das-e2e/programs/engine/topdown_engine.py", line 516, in geoimp_wrapper
constraints=constraints_comb,identifier=parent_geocode, parent_constraints = parent_constraints, min_schema_add_over_dims=min_schema_add_over_dims)
File "/home/pcw/Census/census2020-das-e2e/programs/engine/geoimpgbopt.py", line 560, in L2geoimp_wrapper
min_schema_add_over_dims = min_schema_add_over_dims)
File "/home/pcw/Census/census2020-das-e2e/programs/engine/geoimpgbopt.py", line 257, in l2geoimp
obj_val = m.ObjVal
File "model.pxi", line 287, in gurobipy.Model.getattr (../../src/python/gurobipy.c:50351)
File "model.pxi", line 1589, in gurobipy.Model.getAttr (../../src/python/gurobipy.c:65331)
File "model.pxi", line 4028, in gurobipy.Model.__gettypedattr (../../src/python/gurobipy.c:99455)
AttributeError: b"Unable to retrieve attribute 'ObjVal'"
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:456) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:592) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:575) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:410) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221) at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:349) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:357) at org.apache.spark.rdd.RDD.iterator(RDD.scala:308) at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:65) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:123) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748)
Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1891) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1879) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1878) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1878) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:927) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2112) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2061) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2050) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:738) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126) at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:990) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:385) at org.apache.spark.rdd.RDD.collect(RDD.scala:989) at org.apache.spark.api.python.PythonRDD$.collectAndServe(PythonRDD.scala:166) at org.apache.spark.api.python.PythonRDD.collectAndServe(PythonRDD.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:238) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/home/pcw/.local/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/home/pcw/.local/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/home/pcw/.local/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/serializers.py", line 400, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/home/pcw/.local/lib/python3.6/site-packages/pyspark/python/lib/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/home/pcw/Census/census2020-das-e2e/programs/engine/topdown_engine.py", line 154, in
nodes_dict[levels_reversed[i+1]] = parent_child_rdd.map(lambda nodes: geoimp_wrapper(config,nodes,feas_dict[levels_reversed[i]],min_schema_dims)).flatMap(lambda children: tuple([child for child in children])).persist()
File "/home/pcw/Census/census2020-das-e2e/programs/engine/topdown_engine.py", line 516, in geoimp_wrapper
constraints=constraints_comb,identifier=parent_geocode, parent_constraints = parent_constraints, min_schema_add_over_dims=min_schema_add_over_dims)
File "/home/pcw/Census/census2020-das-e2e/programs/engine/geoimpgbopt.py", line 560, in L2geoimp_wrapper
min_schema_add_over_dims = min_schema_add_over_dims)
File "/home/pcw/Census/census2020-das-e2e/programs/engine/geoimpgbopt.py", line 257, in l2geoimp
obj_val = m.ObjVal
File "model.pxi", line 287, in gurobipy.Model.getattr (../../src/python/gurobipy.c:50351)
File "model.pxi", line 1589, in gurobipy.Model.getAttr (../../src/python/gurobipy.c:65331)
File "model.pxi", line 4028, in gurobipy.Model.__gettypedattr (../../src/python/gurobipy.c:99455)
AttributeError: b"Unable to retrieve attribute 'ObjVal'"
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:456) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:592) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:575) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:410) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221) at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:349) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182) at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156) at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091) at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156) at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882) at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:357) at org.apache.spark.rdd.RDD.iterator(RDD.scala:308) at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:65) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346) at org.apache.spark.rdd.RDD.iterator(RDD.scala:310) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:123) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ... 1 more