Open Fahad021 opened 6 months ago
HDFS_DIR="/path/to/your/directory"
OLD_FILES_LIST="old_files.txt"
LOG_FILE="delete_oldfiles$(date +%Y-%m-%d_%H-%M-%S).log"
echo "Starting script at $(date)" | tee -a $LOG_FILE echo "HDFS Directory: $HDFS_DIR" | tee -a $LOG_FILE echo "--------------------------------------" | tee -a $LOG_FILE
is_file_old() { local file_date_seconds=$1 if [ "$file_date_seconds" -lt "$(date +%s --date='6 months ago')" ]; then return 0 # True, file is older else return 1 # False, file is not older fi }
echo "Checking for files older than 6 months in $HDFS_DIR..." | tee -a $LOG_FILE hadoop fs -ls $HDFS_DIR | while read line; do file_path=$(echo $line | awk '{print $8}') file_mod_time=$(hadoop fs -stat "%Y" "$file_path") if is_file_old "$file_mod_time"; then echo $file_path >> $OLD_FILES_LIST echo "Found old file: $file_path" | tee -a $LOG_FILE fi done
if [ -s "$OLD_FILES_LIST" ]; then echo "Files older than 6 months found. Starting deletion process..." | tee -a $LOG_FILE while read file; do echo "Deleting $file..." | tee -a $LOG_FILE if hadoop fs -rm "$file"; then echo "Successfully deleted $file" | tee -a $LOG_FILE else echo "Failed to delete $file" | tee -a $LOG_FILE fi done < $OLD_FILES_LIST echo "Deletion process completed." | tee -a $LOG_FILE else echo "No files older than 6 months found. No action required." | tee -a $LOG_FILE fi
rm $OLD_FILES_LIST echo "Script execution completed at $(date)" | tee -a $LOG_FILE echo "--------------------------------------" | tee -a $LOG_FILE
HDFS_DIR="/path/to/your/directory"
LOG_FILE="delete_oldfiles$(date +%Y-%m-%d_%H-%M-%S).log"
echo "Starting script at $(date)" | tee -a $LOG_FILE echo "Checking for files older than 6 months in $HDFS_DIR..." | tee -a $LOG_FILE
CUTOFF_DATE=$(date -d '6 months ago' +%Y-%m-%d)
hdfs dfs -ls $HDFS_DIR | while read -r line; do
mod_date=$(echo $line | awk '{print $6}')
file_path=$(echo $line | awk '{print $8}')
# Check if the modification date is older than the cutoff date
if [[ "$mod_date" < "$CUTOFF_DATE" ]]; then
echo "Deleting old file: $file_path" | tee -a $LOG_FILE
if hdfs dfs -rm "$file_path"; then
echo "Successfully deleted: $file_path" | tee -a $LOG_FILE
else
echo "Failed to delete: $file_path" | tee -a $LOG_FILE
fi
fi
done
echo "Deletion process completed." | tee -a $LOG_FILE
SELECT t1.*, CASE WHEN t1.client_no_1 = '' THEN '' WHEN t2.client_code IS NOT NULL THEN t2.client_code ELSE 'N' END as code_client_1, CASE WHEN t1.client_no_2 = '' THEN '' WHEN t3.client_code IS NOT NULL THEN t3.client_code ELSE 'N' END as code_client_2, CASE WHEN t1.client_no_3 = '' THEN '' WHEN t4.client_code IS NOT NULL THEN t4.client_code ELSE 'N' END as code_client_3 FROM your_first_table t1 LEFT JOIN your_second_table t2 ON t1.client_no_1 = t2.client_no LEFT JOIN your_second_table t3 ON t1.client_no_2 = t3.client_no LEFT JOIN your_second_table t4 ON t1.client_no_3 = t4.client_no