apache / iceberg

Apache Iceberg
https://iceberg.apache.org/
Apache License 2.0
6.49k stars 2.24k forks source link

Spark 3.5: Fix flaky test due to temp directory not empty during delete #11470

Closed manuzhang closed 2 weeks ago

manuzhang commented 2 weeks ago

Follow-up of #10811 to ignore other types of FileSystemException like DirectoryNotEmptyException as well

TestDataFrameWrites > testFaultToleranceOnWrite() > format = parquet FAILED
    java.nio.file.DirectoryNotEmptyException: /tmp/junit-7768099913831474039/parquet/test
        at java.base/sun.nio.fs.UnixFileSystemProvider.implDelete(UnixFileSystemProvider.java:289)
        at java.base/sun.nio.fs.AbstractFileSystemProvider.delete(AbstractFileSystemProvider.java:104)
        at java.base/java.nio.file.Files.delete(Files.java:1152)
        at org.apache.commons.io.FileUtils.delete(FileUtils.java:1222)
        at org.apache.commons.io.FileUtils.deleteDirectory(FileUtils.java:1242)
        at org.apache.iceberg.spark.source.TestDataFrameWrites.testFaultToleranceOnWrite(TestDataFrameWrites.java:427)

cc @Fokko @nastra

nastra commented 2 weeks ago

I'd rather try and fix it slightly differently:

+  @TempDir private File location;
+
   private static SparkSession spark = null;
   private static JavaSparkContext sc = null;

@@ -140,14 +141,12 @@ public class TestDataFrameWrites extends ParameterizedAvroDataTest {

   @Override
   protected void writeAndValidate(Schema schema) throws IOException {
-    File location = createTableFolder();
     Table table = createTable(schema, location);
     writeAndValidateWithLocations(table, location, new File(location, "data"));
   }

   @TestTemplate
   public void testWriteWithCustomDataLocation() throws IOException {
-    File location = createTableFolder();
     File tablePropertyDataLocation = temp.resolve("test-table-property-data-dir").toFile();
     Table table = createTable(new Schema(SUPPORTED_PRIMITIVES.fields()), location);
     table
@@ -157,13 +156,6 @@ public class TestDataFrameWrites extends ParameterizedAvroDataTest {
     writeAndValidateWithLocations(table, location, tablePropertyDataLocation);
   }

-  private File createTableFolder() throws IOException {
-    File parent = temp.resolve("parquet").toFile();
-    File location = new File(parent, "test");
-    assertThat(location.mkdirs()).as("Mkdir should succeed").isTrue();
-    return location;
-  }
-
   private Table createTable(Schema schema, File location) {
     HadoopTables tables = new HadoopTables(CONF);
     return tables.create(schema, PartitionSpec.unpartitioned(), location.toString());
@@ -397,7 +389,6 @@ public class TestDataFrameWrites extends ParameterizedAvroDataTest {

   @TestTemplate
   public void testFaultToleranceOnWrite() throws IOException {
-    File location = createTableFolder();
     Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields());
     Table table = createTable(schema, location);

@@ -421,13 +412,5 @@ public class TestDataFrameWrites extends ParameterizedAvroDataTest {

     assertThat(snapshotBeforeFailingWrite).isEqualTo(snapshotAfterFailingWrite);
     assertThat(resultBeforeFailingWrite).isEqualTo(resultAfterFailingWrite);
-
-    while (location.exists()) {
-      try {
-        FileUtils.deleteDirectory(location);
-      } catch (NoSuchFileException e) {
-        // ignore NoSuchFileException when a file is already deleted
-      }
-    }

@manuzhang can you try and see whether that fixes the issue?