When concatenating a DList created before a GBK with one created after the GBK, scoobi doens't output any data. Example spec below
import org.specs2._
import com.nicta.scoobi.Scoobi._
import com.nicta.scoobi.testing.mutable._
import com.nicta.scoobi.testing.SimpleJobs
class GraphBugSpec extends HadoopSpecification with SimpleJobs {
override def isCluster = false
"Concat breaks graph" >> { implicit sc: ScoobiConfiguration =>
val init = DList(1, 2, 3, 2)
val validated: DList[Either[String, Int]] = init.map(i => if(i < 2) Left("number too low") else Right(i))
val errs: DList[String] = validated.collect {
case Left(e) => e
}
val good: DList[Int] = validated.collect {
case Right(i) => i
}
val secondVal: DList[Either[String, Int]] = good.groupBy(identity).collect {
case (k, vs) if vs.size > 1 => Right(k)
case (k, vs) => Left(s"Too few entries in group $k")
}
val secondErrs = secondVal.collect {
case Left(e) => e
}
val secondGood = secondVal.collect {
case Right(i) => i
}
// This works
//persist(errs, secondErrs, secondGood)
persist((errs ++ secondErrs), secondGood)
secondGood.run.toList must_== List(2) // fails because list is empty
}
}
When concatenating a DList created before a GBK with one created after the GBK, scoobi doens't output any data. Example spec below