istt / nifi

Apache NiFi
https://nifi.apache.org/
Apache License 2.0
0 stars 0 forks source link

Compile NIFI for Mapr #1

Open dinhtrung opened 3 years ago

dinhtrung commented 3 years ago

How to Compile NIFI with Mapr support

  1. Clone the latest nifi project from https://github.com/apache/nifi
$ git clone git@github.com:apache/nifi.git 
  1. Patch the nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/pom.xml with following changes:
@@ -69,6 +68,11 @@
             <scope>provided</scope>
         </dependency>
         <dependency>
+           <groupId>com.mapr.hadoop</groupId>
+           <artifactId>maprfs</artifactId>
+           <version>${mapr.version}</version>
+       </dependency>
+        <dependency>
             <groupId>org.apache.nifi</groupId>
             <artifactId>nifi-distributed-cache-client-service-api</artifactId>
         </dependency>
@@ -106,16 +109,6 @@
             <artifactId>nifi-record</artifactId>
             <scope>compile</scope>
         </dependency>
-        <dependency>
-            <groupId>com.mapr.hadoop</groupId>
-            <artifactId>maprfs</artifactId>
-            <version>6.2.0.0-mapr</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-mapreduce-client-contrib</artifactId>
-            <version>${hadoop.version}</version>
-        </dependency>
     </dependencies>

     <build>
  1. Patch the ListHDFS.java and PutHDFS.java source under nifi-hdfs-processors with following changes:
diff --git a/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/processors/hadoop/ListHDFS.java b/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/processors/hadoop/ListHDFS.java
index 02209e3ee0..b8699a678d 100644
--- a/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/processors/hadoop/ListHDFS.java
+++ b/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/processors/hadoop/ListHDFS.java
@@ -550,7 +550,7 @@ public class ListHDFS extends AbstractHadoopProcessor {
         values.put(IS_DIRECTORY, fileStatus.isDirectory());
         values.put(IS_SYM_LINK, fileStatus.isSymlink());
         values.put(IS_ENCRYPTED, fileStatus.isEncrypted());
-        values.put(IS_ERASURE_CODED, fileStatus.isErasureCoded());
+        // values.put(IS_ERASURE_CODED, fileStatus.isErasureCoded());

         return new MapRecord(getRecordSchema(), values);
     }
diff --git a/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/processors/hadoop/PutHDFS.java b/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/processors/hadoop/PutHDFS.java
index fbcd3b1457..493ed25224 100644
--- a/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/processors/hadoop/PutHDFS.java
+++ b/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/processors/hadoop/PutHDFS.java
@@ -20,7 +20,6 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsCreateModes;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -329,13 +328,12 @@ public class PutHDFS extends AbstractHadoopProcessor {
                                 } else {
                                   final EnumSet<CreateFlag> cflags = EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE);

-                                  final Boolean ignoreLocality = context.getProperty(IGNORE_LOCALITY).asBoolean();
-                                  if (ignoreLocality) {
-                                    cflags.add(CreateFlag.IGNORE_CLIENT_LOCALITY);
-                                  }
+                                //   final Boolean ignoreLocality = context.getProperty(IGNORE_LOCALITY).asBoolean();
+                                //   if (ignoreLocality) {
+                                //     cflags.add(CreateFlag.IGNORE_CLIENT_LOCALITY);
+                                //   }

-                                  fos = hdfs.create(tempCopyFile, FsCreateModes.applyUMask(FsPermission.getFileDefault(),
-                                      FsPermission.getUMask(hdfs.getConf())), cflags, bufferSize, replication, blockSize,
+                                  fos = hdfs.create(tempCopyFile, FsPermission.getFileDefault(), cflags, bufferSize, replication, blockSize,
                                       null, null);
                                 }
  1. Compile nifi from source:
$ cd nifi
$ mvn -Pmapr -T 2.0C -DskipTests=true -Dmapr.version=6.2.0.0-mapr -Dhadoop.version=2.7.5.0-mapr-710 clean versions:set -DnewVersion=1.14.0-mapr-710 package install 

If everything works then the entire nifi binaries will be located at nifi-assembly/target/ folder, ready to deploy.

dinhtrung commented 3 years ago

1 - Install Mapr dependencies:

$ cat /etc/yum.repos.d/mapr_core.repo 
[MapR_Core]
baseurl = https://package.mapr.com/releases/v6.2.0/redhat
enabled = 1
gpgcheck = 1
name = MapR Core Components
protect = 1

$ cat /etc/yum.repos.d/mapr_ecosystem.repo 
[MapR_Ecosystem]
baseurl = https://package.mapr.com/releases/MEP/MEP-7.1.0/redhat
enabled = 1
gpgcheck = 1
name = MapR Ecosystem Components
protect = 1

$ yum -y install mapr-client mapr-hadoop-client

2 - Configure NIFI to load mapr native library:

Add the native library path into Nifi bootstrap.conf:

@@ -73,6 +73,7 @@
 # It is needed when a custom jar (eg. JDBC driver) has been configured on a component in the flow and this custom jar depends on a native library
 # and tries to load it by its absolute path (java.lang.System.load(String filename) method call).
 # Use this Java Agent only if you get "Native Library ... already loaded in another classloader" errors otherwise!
+java.arg.18=-Djava.library.path=/opt/mapr/lib
 #java.arg.18=-javaagent:./lib/aspectj/aspectjweaver-1.9.6.jar
 #java.arg.19=-Daj.weaving.loadersToSkip=sun.misc.Launcher$AppClassLoader,jdk.internal.loader.ClassLoaders$AppClassLoader,org.eclipse.jetty.webapp.WebAppClassLoader,\
 #            org.apache.jasper.servlet.JasperLoader,org.jvnet.hk2.internal.DelegatingClassLoader,org.apache.nifi.nar.NarClassLoader

2 - Under Hadoop Processor setting, add following settings: