User Tools

Site Tools


linux:parallel_rsync

This is an old revision of the document!


Description

This set of scripts will parallelize the transfer of a huge directory tree keeping in mind a maximum number of simultaneous transfers.

Instructions

Pre-Reqs

  • gnu screen
  • rsync
  • ssh

psync

This script will:

  • Check if the directory to transfer exists
  • Calculate the directories to transfer at the maximum deep of ${MAXDEPTH}
  • Parallel Transfer of the upper directories from deep 1 to deep ${MAXDEPTH} (It will show a message each 100 directories)
  • Parallel transfer of the directories at deep ${MAXDEPTH} (It will show a message for each folder)
  • Think that the ${MAXPARALEL} is flexible because the “sleep 1” of the “checkmaxprocesses()” function.
#!/bin/bash
[ ! $1 ] && echo "Usage: $0 /path/to/run" && exit 1

TARGET="$1"

[[ ! "${TARGET}" ]] && echo -e "$TARGET\n not a directory" && exit 1
[ ! -d ${TARGET} ] && echo -e "$TARGET\n not a directory" && exit 1

LOGDIR=$(dirname $0)/$(basename ${TARGET})
[ -d ${LOGDIR} ] && echo "Cleanup" && rm -fr ${LOGDIR}
mkdir -p ${LOGDIR}/transferlogs

check_max_processes()
{
    local let MAXPARALEL=$1
    while [ $(ps waux | egrep ":[0-9]{2} rsync" | wc -l) -gt ${MAXPARALEL} ] ; do
    printf "%s" .
    sleep 1
    done
}

sync_this()
{
    local let MAXDEPTH=3
    local let MAXPARALEL=20

    LAUCHRSYNC="/root/autosync/launch_rsync.sh"
    local let y=0
    for FOLDER in $(find ${TARGET} -mindepth ${MAXDEPTH} -maxdepth ${MAXDEPTH} -type d) ; do
        DIRLIST[$y]="${FOLDER}"
        let y++
    done

    echo "Copying files and directories NOT recursively"
    for ((i=0;i<${MAXDEPTH}; i++));do
        let x=0
        for ITEM in $(find ${TARGET} -mindepth $i -maxdepth $i -type d) ; do
            check_max_processes ${MAXPARALEL}
            screen -S ${x} -d -m ${LAUCHRSYNC} -nr ${ITEM} nr_${x} ${LOGDIR}
            let x++
            [[ $x =~ [0-9]{1,2}00$ ]] && printf "\n%s\n" "$x Directories Copied Not recursively"
        done
        echo "Deep $i DONE, going upper"
    done
    echo "Launching recursive rsyncs in deep ${MAXDEPTH}"
    let x=0
    for ((i=0;i<${#DIRLIST[@]}; i++ )); do
        printf "\n%s" "Launching rsync $i of ${#DIRLIST[@]}"
        check_max_processes ${MAXPARALEL}
        screen -S ${i} -d -m ${LAUCHRSYNC} -r ${DIRLIST[$i]} r_${i} ${LOGDIR}
    done
}

sync_this ${TARGET}
linux/parallel_rsync.1354816094.txt.gz · Last modified: 2012/12/06 17:48 by dodger