comparison moefetch.sh @ 225:265a9ca47a19

- Replaced md5(sum) with openssl. Less platform dependent because the tool is same across platforms - Preparation for name-safe file operations. - Sanity against sed - Correctly encoded url (hopefully) - getopts replacing fail hackjob with while and shift - support for login (provided with openssl)
author edhoprima
date Fri, 13 Nov 2009 18:47:02 +0000
parents 0ac1805621d4
children f8be4a3d3b4a
comparison
equal deleted inserted replaced
224:0ac1805621d4 225:265a9ca47a19
16 # 16 #
17 # Lots of bugs here. Use with care 17 # Lots of bugs here. Use with care
18 # USE WITH CARE 18 # USE WITH CARE
19 # 19 #
20 # what it does: fetch every picture that has the specified TAGS. 20 # what it does: fetch every picture that has the specified TAGS.
21 # requirement: wget, libxslt, md5sum (or md5) 21 # requirement: wget, libxslt, openssl
22 22
23 # program additional paths for: cut, sed, wc, MD5(sum), wget, xsltproc, grep 23 # program additional paths for: cut, sed, wc, openssl, wget, xsltproc, grep
24 ADDITIONAL_PATH= 24 ADDITIONAL_PATH=
25
26 # custom md5 path with arguments, expected output: <32digit md5><space(s)><filename>
27 # Leave empty for "md5sum" (Linux, Solaris), "md5 -r" (*BSD)
28 MD5=
29 25
30 # default server address. Danbooru only! I do not take responsibility of stupidity. 26 # default server address. Danbooru only! I do not take responsibility of stupidity.
31 DEFAULT_SITE="moe.imouto.org" 27 DEFAULT_SITE="moe.imouto.org"
32 28
33 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one. 29 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one.
45 ### - sanity checking 41 ### - sanity checking
46 ### WILL BE FOR 0.3 42 ### WILL BE FOR 0.3
47 43
48 # useless welcome message. Also version 44 # useless welcome message. Also version
49 Msg_Welcome() { 45 Msg_Welcome() {
50 MOEFETCHVERSION="0.3-beta1" 46 MOEFETCHVERSION="0.3-beta2"
51 echo "moefetch ${MOEFETCHVERSION} 47 echo "moefetch ${MOEFETCHVERSION}
52 Copyright (c) 2009 edogawaconan <me@myconan.net> 48 Copyright (c) 2009 edogawaconan <me@myconan.net>
53 " 49 "
54 } 50 }
51
52 get_md5() { cat -- "$1" | openssl dgst -md5; }
53 get_basename() { basename -- "$1"; }
54 get_filename() { get_basename "${1%.*}"; }
55 get_cleantags() { printf "%s " "$@" | sed -e 's/\&/%26/g;s/=/%3D/g'; }
56 Is_NotMD5() { get_filename "$1" | sed -e 's/\([0-9a-f]\{32\}\)//g'; }
57
55 58
56 # fatal error handler 59 # fatal error handler
57 Err_Fatal() { 60 Err_Fatal() {
58 echo " 61 echo "
59 Fatal error: ${1}" 62 Fatal error: ${1}"
87 Fetching XML file" 90 Fetching XML file"
88 tempnum=1000 91 tempnum=1000
89 iternum=1 92 iternum=1
90 > "${TEMP_PREFIX}-list" 93 > "${TEMP_PREFIX}-list"
91 while [ "${tempnum}" -ge 1000 ]; do 94 while [ "${tempnum}" -ge 1000 ]; do
92 wget "http://${SITE}/post/index.xml?tags=${TAGS}&offset=0&limit=1000&page=${_i}" -O "${TEMP_PREFIX}-xml" -e continue=off 95 wget "http://${SITE}/post/index.xml?tags=$(get_cleantags "${TAGS}")&offset=0&limit=1000&page=${iternum}&login=${LOGIN_USER}&password_hash=${LOGIN_PASS}" -O "${TEMP_PREFIX}-xml" -e continue=off
93 printf "Processing XML file... " 96 printf "Processing XML file... "
94 # xslt evilry 97 # xslt evilry
95 xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-templist" 98 xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-templist"
96 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> 99 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
97 <xsl:output method="xml" indent="yes"/> 100 <xsl:output method="xml" indent="yes"/>
108 numfiles=$(echo $(wc -l < "${TEMP_PREFIX}-list")) 111 numfiles=$(echo $(wc -l < "${TEMP_PREFIX}-list"))
109 echo "${numfiles} file(s) available on server" 112 echo "${numfiles} file(s) available on server"
110 [ "${numfiles}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site." 113 [ "${numfiles}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site."
111 } 114 }
112 115
113 Is_NotMD5() {
114 echo "${*}" | sed -e 's/\([0-9a-f]\{32\}\..*\)//g'
115 }
116 116
117 Progress_Init() { 117 Progress_Init() {
118 _last="-" 118 _last="-"
119 printf "${_last}" 119 printf "${_last}"
120 } 120 }
143 } 143 }
144 144
145 # check tools availability 145 # check tools availability
146 Check_Tools() { 146 Check_Tools() {
147 # verify all programs required do indeed exist 147 # verify all programs required do indeed exist
148 #MD5 148 commands="cut sed wc wget xsltproc xargs rm mkdir chown comm grep date openssl"
149 if [ -z "${MD5}" ]; then
150 case "$(uname)" in
151 *BSD) MD5="md5 -r";;
152 Linux|SunOS) MD5="md5sum";;
153 *) Fatal_Err "No known md5 tool for this platform. Please specify manually";;
154 esac
155 fi
156 md5_command=$(echo ${MD5} | cut -d' ' -f1)
157 # basic tools
158 commands="cut sed wc wget xsltproc xargs rm mkdir chown comm grep date ${md5_command}"
159 for cmd in ${commands} 149 for cmd in ${commands}
160 do 150 do
161 [ "$(command -v "${cmd}")" ] || Err_Fatal "${cmd} doesn't exist in ${PATH}" 151 [ "$(command -v "${cmd}")" ] || Err_Fatal "${cmd} doesn't exist in ${PATH}"
162 done 152 done
163 } 153 }
197 fi 187 fi
198 fi 188 fi
199 for trash in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* 189 for trash in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
200 do 190 do
201 is_trash= 191 is_trash=
202 if [ -d "${trash}" ] || [ -n "$(Is_NotMD5 "$(basename "${trash}")")" ] || [ -z "$(grep "$(basename "${trash}")" "${TEMP_PREFIX}-list")" ]; then 192 if [ -d "${trash}" ] || [ -n "$(Is_NotMD5 "${trash}")" ] || [ -z "$(grep "$(get_basename "${trash}")" "${TEMP_PREFIX}-list")" ]; then
203 is_trash=1 193 is_trash=1
204 has_trash=1 194 has_trash=1
205 mv -f "${trash}" "${trash_dir}" || Err_Impossible 195 mv -f -- "${trash}" "${trash_dir}" || Err_Impossible
206 trashes="${trashes} 196 trashes="${trashes}
207 $(basename "${trash}")" 197 $(get_basename "${trash}")"
208 fi 198 fi
209 Progress_Anim 199 Progress_Anim
210 done 200 done
211 rmdir "${trash_dir}" 2>/dev/null 201 rmdir "${trash_dir}" 2>/dev/null
212 Progress_Done 202 Progress_Done
226 > "${TEMP_PREFIX}-error" 216 > "${TEMP_PREFIX}-error"
227 > "${TEMP_PREFIX}-ok" 217 > "${TEMP_PREFIX}-ok"
228 for file in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* 218 for file in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
229 do 219 do
230 if [ "${file}" != "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/*" ]; then 220 if [ "${file}" != "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/*" ]; then
231 if [ -n "$(Is_NotMD5 "$(basename "${file}")")" ] || [ -d "${file}" ]; then 221 if [ -n "$(Is_NotMD5 "${file}")" ] || [ -d "${file}" ]; then
232 files_notdanbooru="${files_notdanbooru} 222 files_notdanbooru="${files_notdanbooru}
233 $(basename "${file}")" 223 $(get_basename "${file}")"
234 has_err_filename=1 224 has_err_filename=1
235 else 225 else
236 if [ "$(${MD5} "${file}" | cut -d ' ' -f 1)" = "$(basename "${file}" | cut -d '.' -f 1)" ]; then 226 if [ "$(get_md5 "${file}")" = "$(get_filename "${file}")" ]; then
237 echo "$(basename "${file}")" >> "${TEMP_PREFIX}-ok" 227 echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-ok"
238 else 228 else
239 rm "${file}" || Err_Fatal "Error removing ${file}" 229 rm "${file}" || Err_Fatal "Error removing ${file}"
240 echo "$(basename "${file}")" >> "${TEMP_PREFIX}-error" 230 echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-error"
241 files_error="${files_error} 231 files_error="${files_error}
242 $(basename "${file}")" 232 $(get_basename "${file}")"
243 has_err_md5=1 233 has_err_md5=1
244 fi 234 fi
245 fi 235 fi
246 fi 236 fi
247 Progress_Anim 237 Progress_Anim
310 ;; 300 ;;
311 esac 301 esac
312 shift 302 shift
313 SITE= 303 SITE=
314 TAGS= 304 TAGS=
315 while [ "${1}" ]; do 305 x=1
316 case "$1" in 306 while getopts "s:(site)n(noclean)u:(user)p:(password)" opt
317 -s|--site) 307 do
318 shift 308 case "$opt" in
319 SITE="$1" 309 s) SITE="$OPTARG";;
320 ;; 310 n) NOCLEAN=1;;
321 -nc|--noclean) 311 p) LOGIN_PASS=$(printf "%s" "$OPTARG" | openssl dgst -sha1);;
322 NOCLEAN=1 312 u) LOGIN_USER="$OPTARG";;
323 ;;
324 *)
325 if [ "${TAGS}" ]; then
326 TAGS="$1 ${TAGS}"
327 else
328 TAGS="$1"
329 fi
330 ;;
331 esac 313 esac
332 shift 314 x=${OPTIND}
333 done 315 done
316 shift $(($x-1))
317 TAGS="$@"
334 [ -n "${SITE}" ] || SITE=${DEFAULT_SITE} 318 [ -n "${SITE}" ] || SITE=${DEFAULT_SITE}
335 [ -n "${TAGS}" ] || Err_Fatal "No tag specified" 319 [ -n "${TAGS}" ] || Err_Fatal "No tag specified"
336 # Get base folder - default, current folder or fallback to ${HOME} 320 # Get base folder - default, current folder or fallback to ${HOME}
337 [ -n "${BASE_DIR}" ] || BASE_DIR=${PWD} 321 [ -n "${BASE_DIR}" ] || BASE_DIR=${PWD}
338 [ -n "${BASE_DIR}" ] || BASE_DIR=${HOME} 322 [ -n "${BASE_DIR}" ] || BASE_DIR=${HOME}