#!/bin/sh ######################################################################### # This script is a generalized cleanup script. The input is the # # directory to clean, the file template, and the number of most # # recent files to keep. The fourth optional input is "r" or "l", to # # remove or list, respectively, the names of files or directories # # satisfying the template. To remove files, the "r" option must be # # given as the fourth input. # # # # Log: # # K. Brill/HPC 20070612 # # K. Brill/HPC 20090820 Use /bin/rm -rf to avoid prompts # # K. Brill/HPC 20090917 Changes to allow 2-digit year (YY) # # K. Brill/HPC 20091119 Put gc: in output text # # K. Brill/HPC 20091120 Always search for the date-time (YMDH) # # to avoid including other matching files # # K. Brill/HPC 20091207 Documentation; search for date-time # # (YMDH) for case when one string remains # # after removing template # # K. Brill/HPC 20101028 Remove [ ] from tr string to translate # # K. Brill/HPC 20110809 Allow "=" to stand for any character in # # the template. # # K. Brill/HPC 20110810 Documentation update # ######################################################################### wid=`whoami` exit if [ $# -lt 3 ]; then cat << EOF gc is a general cleanup script. Use> gc path template number [r/l] Enter the following on the command line: 1. The complete path to the directory to be cleaned. 2. The file or directory name template with cycle year represented by YYYY or YY, month by MM, day by DD, hour by HH. All forecast hours for a given YYYYMMDDHH cycle date-time are kept. In a template, replace the forecast hour digits with F. A series of "=" characters may be used anywhere in the template to replace a varying string of characters. For example, the member name in an ensemble template is replace with "=" characters. 3. The number of cycles to keep. 4. Optionally enter either "r" or "l": r = remove the files or directories matching the template. l = ONLY list the files or directories matching the template that would otherwise be removed. This is the default if no 4th argument is given. Example: gc \$MYMODEL/mdl my_mdl_YYYYMMDDHHfFFF.grd 5 r Assumptions: A. Date-time groups are contiguous. In other words, YYYY/YY, MM, DD, and HH cannot be separated by intevening characters. B. Upper case "Y", "M", "D", "H", or "F" cannot appear anywhere in the actual file names. C. The "=" character does not appear in the file name. D. Any varying string replaced by "=" characters is the same length in each file name, and each character in the varying string is replaced by an "=" character in the template. For example, here is a cleanup for the GEFS ensemble: gc $MODEL/gefs gep==_YYYYMMDDHHfFFF 3 r Note that "==" replaces the two-digit member number in the template in this case. More than one string of "=" characters is allowed. Use> gc path template number [r/l] EOF exit fi qpath=$1 template=$2 nkeep=$3 action=$4 if [ .$action = "." ]; then action=l fi nctmp=`echo $template | wc -c` ncm1=`expr $nctmp - 1` # Find the cut points for the templated elements. sumnum=0 # Variable parts of the template may be replaced by a series # of "=" characters. modtpl=no ept=`echo $template | cut -d"=" -f1 | wc -c` if [ $ept -eq $nctmp ]; then ept=0 else modtpl=yes fi Ypt=`echo $template | cut -d"Y" -f1 | wc -c` if [ $Ypt -eq $nctmp ]; then Ypt=0 else echo $template | grep YYYY > /dev/null 2>&1 if [ $? -ne 0 ]; then echo $template | grep YY > /dev/null 2>&1 if [ $? -ne 0 ]; then echo gc: Year template is not valid. exit 1 else Yadd=1 sumnum=`expr $sumnum + 2` fi echo $template | grep YYY > /dev/null 2>&1 if [ $? -eq 0 ]; then echo gc: Year template is not valid. exit 1 fi else Yadd=3 sumnum=`expr $sumnum + 4` fi echo $template | grep YYYYY > /dev/null 2>&1 if [ $? -eq 0 ]; then echo gc: Year template is not valid. exit 1 fi fi Mpt=`echo $template | cut -d"M" -f1 | wc -c` if [ $Mpt -eq $nctmp ]; then Mpt=0 else sumnum=`expr $sumnum + 2` echo $template | grep MM > /dev/null 2>&1 if [ $? -ne 0 ]; then echo gc: Month template is not valid. exit 1 fi echo $template | grep MMM > /dev/null 2>&1 if [ $? -eq 0 ]; then echo gc: Month template is not valid. exit 1 fi fi Dpt=`echo $template | cut -d"D" -f1 | wc -c` if [ $Dpt -eq $nctmp ]; then Dpt=0 else sumnum=`expr $sumnum + 2` echo $template | grep DD > /dev/null 2>&1 if [ $? -ne 0 ]; then echo gc: Day template is not valid. exit 1 fi echo $template | grep DDD > /dev/null 2>&1 if [ $? -eq 0 ]; then echo gc: Day template is not valid. exit 1 fi fi Hpt=`echo $template | cut -d"H" -f1 | wc -c` if [ $Hpt -eq $nctmp ]; then Hpt=0 else sumnum=`expr $sumnum + 2` echo $template | grep HH > /dev/null 2>&1 if [ $? -ne 0 ]; then echo gc: Hour template is not valid. exit 1 fi echo $template | grep HHH > /dev/null 2>&1 if [ $? -eq 0 ]; then echo gc: Hour template is not valid. exit 1 fi fi if [ $sumnum -eq 0 ]; then echo " " echo gc: No valid date-time template was given. exit 1 fi Fpt=`echo $template | cut -d"F" -f1 | wc -c` if [ $Fpt -eq $nctmp ]; then vgrep=' | grep -v "f[0-9]\{2,3\}"' else vgrep= echo $template | grep 'F\{2,3\}' > /dev/null 2>&1 if [ $? -ne 0 ]; then echo gc: Forecast hour template is not valid. exit 1 fi fi if [ $modtpl = "yes" ]; then template=`echo $template | tr '[=]' '[ ]'` fi #echo Modified template = $template grepstrings="`echo $template | tr 'Y' ' ' | tr 'M' ' ' | tr 'D' ' ' | tr 'H' ' ' | tr 'F' ' '`" nwrd=`echo $grepstrings | wc -w` if [ $nwrd -eq 1 ]; then if [ $grepstrings = "f" ]; then grepit=' | grep f | grep "^[0-9]\{$sumnum\}"' else grepit=" | grep $grepstrings | grep '[0-9]\{$sumnum\}'" fi elif [ $nwrd -gt 0 ]; then i=0 for strng in $grepstrings; do i=`expr $i + 1` if [ $i -eq 1 ]; then grepit="grep $strng" else grepit="$grepit | grep $strng" fi done grepit=" | $grepit | grep '[0-9]\{$sumnum\}'" else grepit=' | grep "^[0-9]\{$sumnum\}\$"' fi if [ -d $qpath ]; then cd $qpath else echo gc: Directory $qpath does not exist. exit fi numchk=" | grep '^.\{$ncm1\}$'" #echo GREP string: $numchk $grepit $vgrep #echo Ypt Mpt Dpt Hpt = $Ypt $Mpt $Dpt $Hpt fdlist=`eval /bin/ls -1 $numchk $grepit $vgrep` # Construct list of YYYYMMDDHH cycles. cyclelist= for fd in $fdlist; do cyc= if [ $Ypt -gt 0 ]; then iend=`expr $Ypt + $Yadd` yyyy=`echo $fd | cut -c${Ypt}-${iend}` cyc=$yyyy fi if [ $Mpt -gt 0 ]; then iend=`expr $Mpt + 1` mm=`echo $fd | cut -c${Mpt}-${iend}` cyc="$cyc${mm}" fi if [ $Dpt -gt 0 ]; then iend=`expr $Dpt + 1` dd=`echo $fd | cut -c${Dpt}-${iend}` cyc="$cyc${dd}" fi if [ $Hpt -gt 0 ]; then iend=`expr $Hpt + 1` hh=`echo $fd | cut -c${Hpt}-${iend}` cyc="$cyc${hh}" fi echo $cyclelist | grep $cyc > /dev/null 2>&1 if [ $? -ne 0 ]; then echo ${cyc} >> /tmp/$$cycles.$wid fi cyclelist="$cyclelist ${cyc}" done nymdh=0 if [ -s /tmp/$$cycles.$wid ]; then nymdh=`cat /tmp/$$cycles.$wid | wc -l` if [ $? -ne 0 ]; then nymdh=0 fi else echo gc: Template may not be valid or appropriate. exit 1 fi if [ $nymdh -eq 0 ]; then echo gc: No file or directories match the template. /bin/rm /tmp/$$cycles.$wid exit 1 fi ndel=`expr $nymdh - $nkeep` if [ $? -gt 1 ]; then echo gc: The keep number is not valid. exit 1 fi if [ $ndel -le 0 ]; then echo gc: There are no cycles to remove. /bin/rm /tmp/$$cycles.$wid exit fi dcycles=`sort -ru /tmp/$$cycles.$wid | tail -$ndel` /bin/rm /tmp/$$cycles.$wid i=0 for fd in $fdlist; do i=`expr $i + 1` cyc=`echo $cyclelist | cut -d" " -f$i` echo $dcycles | grep $cyc > /dev/null 2>&1 sttus=$? if [ $sttus -eq 0 ]; then if [ $action = r ]; then echo gc: Removing $fd /bin/rm -rf $fd else echo $fd would be removed. fi fi done