comparison moefetch.sh @ 228:5d3a0645b504

- Restructured some things. - Removed -- from commands. Replaced with safe_path command. - Updated help. - All global variables should be initally set at init_globals command.
author edhoprima@gmail.com <edhoprima@gmail.com>
date Tue, 15 Dec 2009 04:39:46 +0000
parents 8b1f6f6b6a3b
children b03fef57b465
comparison
equal deleted inserted replaced
227:8b1f6f6b6a3b 228:5d3a0645b504
41 ### - sanity checking 41 ### - sanity checking
42 ### - MOAR comments 42 ### - MOAR comments
43 ### WILL BE FOR 0.3 43 ### WILL BE FOR 0.3
44 44
45 # useless welcome message. Also version 45 # useless welcome message. Also version
46 Msg_Welcome() { 46 msg_welcome() {
47 MOEFETCHVERSION="0.3-beta2" 47 echo "moefetch ${_version}
48 echo "moefetch ${MOEFETCHVERSION}
49 Copyright (c) 2009 edogawaconan <me@myconan.net> 48 Copyright (c) 2009 edogawaconan <me@myconan.net>
50 " 49 "
51 } 50 }
52 51
53 get_md5() { cat -- "$1" | openssl dgst -md5; } 52 # Sanitize path. Totally safe. Usage: cmd "$(safe_path "${filename}")"
54 get_basename() { basename /"$1"; } 53 safe_path()
54 {
55 # It all depends on the first character.
56 __start=$(printf "%s" "$*" | cut -c 1)
57 __path=
58 case "${__start}" in
59 .|/) __path="$*";; # . and / is safe. No change.
60 *) __path="./$*";; # Anything else must be prefixed with ./
61 esac
62 printf "%s" "${__path}" # Return.
63 }
64
65 # Checks md5. OpenSSL should be available on anything usable.
66 get_md5() { cat "$(safe_path "${1}")" | openssl dgst -md5; }
67
68 # Safely get basename.
69 get_basename() { basename "$(safe_path "${1}")"; }
70
71 # Safely get filename (basename without the extension).
55 get_filename() { get_basename "${1%.*}"; } 72 get_filename() { get_basename "${1%.*}"; }
56 get_cleantags() { printf "%s " "$@" | sed -e 's/\&/%26/g;s/=/%3D/g'; } 73
57 Is_NotMD5() { get_filename "$1" | sed -e 's/\([0-9a-f]\{32\}\)//g'; } 74 # Transformation for tag url.
75 get_cleantags() { printf "%s " "$*" | sed -e 's/\&/%26/g;s/=/%3D/g'; }
76
77 # Returns something if not an md5 value.
78 is_not_md5() { get_filename "$1" | sed -e 's/\([0-9a-f]\{32\}\)//g'; }
58 79
59 80
60 # fatal error handler 81 # fatal error handler
61 Err_Fatal() { 82 Err_Fatal() {
62 echo " 83 echo "
71 exit 1 92 exit 1
72 } 93 }
73 94
74 # help message 95 # help message
75 Err_Help() { 96 Err_Help() {
76 echo "moefetch.sh COMMAND [-s SITE_URL] TAGS 97 echo "moefetch.sh COMMAND [-n] [-p PASSWORD] [-s SITE_URL] [-u USERNAME] TAGS
77 98
78 COMMAND: 99 COMMAND:
79 (quick)fetch: do a complete update. Add prefix quick to skip file checking 100 (quick)fetch:
80 check: get list of new files, clean up local folder and print total new files 101 Do a complete update. Add prefix quick to skip file checking
81 102 check:
82 -s SITE_URL: Specify URL of the Danbooru powered site you want to leech from. Default is ${DEFAULT_SITE} 103 Get list of new files, clean up local folder and print total new files
83 104
84 TAGS: Tags you want to download. Separated by spaces. Tag name follows standard Danbooru tagging scheme" 105 OPTIONS:
106 -n:
107 Skip checking repository directory.
108 -p PASSWORD:
109 Specifies password for login.
110 -s SITE_URL:
111 Specify URL of the Danbooru powered site you want to leech from. Default is ${DEFAULT_SITE}.
112 -u USERNAME:
113 Specifies username for login.
114 TAGS:
115 Tags you want to download. Separated by spaces. Tag name follows standard Danbooru tagging scheme."
85 exit 2 116 exit 2
86 } 117 }
87 118
88 # generate link by transforming xml 119 # generate link by transforming xml
89 Generate_Link() { 120 Generate_Link() {
90 echo " 121 echo "
91 Fetching XML file" 122 Fetching XML file"
92 tempnum=1000 123 __tempnum=1000
93 iternum=1 124 __iternum=1
94 > "${TEMP_PREFIX}-list" 125 > "${TEMP_PREFIX}-list"
95 while [ "${tempnum}" -ge 1000 ]; do 126 while [ "${__tempnum}" -ge 1000 ]; do
96 __url="http://${SITE}/post/index.xml?tags=$(get_cleantags "${TAGS}")&offset=0&limit=1000&page=${iternum}" 127 __url="http://${SITE}/post/index.xml?tags=$(get_cleantags "${TAGS}")&offset=0&limit=1000&page=${__iternum}"
97 [ ${_use_login} -eq 1 ] && __url="${__url}&login=${LOGIN_USER}&password_hash=${LOGIN_PASS}" 128 [ ${_use_login} -eq 1 ] && __url="${__url}&login=${LOGIN_USER}&password_hash=${LOGIN_PASS}"
98 wget "${__url}" -O "${TEMP_PREFIX}-xml" -e continue=off || Err_Fatal "Failed download catalog file" 129 wget "${__url}" -O "${TEMP_PREFIX}-xml" -e continue=off || Err_Fatal "Failed download catalog file"
99 printf "Processing XML file... " 130 printf "Processing XML file... "
100 # xslt evilry 131 # xslt evilry
101 xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-templist" 132 xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-templist"
104 <xsl:template match="post"> 135 <xsl:template match="post">
105 <xsl:value-of select="@file_url" /> 136 <xsl:value-of select="@file_url" />
106 </xsl:template> 137 </xsl:template>
107 </xsl:stylesheet> 138 </xsl:stylesheet>
108 EOF 139 EOF
109 tempnum=$(echo $(wc -l < "${TEMP_PREFIX}-templist")) 140 __tempnum=$(echo $(wc -l < "${TEMP_PREFIX}-templist"))
110 iternum=$((iternum + 1)) 141 __iternum=$((__iternum + 1))
111 cat "${TEMP_PREFIX}-templist" >> "${TEMP_PREFIX}-list" 142 cat "${TEMP_PREFIX}-templist" >> "${TEMP_PREFIX}-list"
112 echo "${tempnum} file(s) available" 143 echo "${__tempnum} file(s) available"
113 done 144 done
114 numfiles=$(echo $(wc -l < "${TEMP_PREFIX}-list")) 145 numfiles=$(echo $(wc -l < "${TEMP_PREFIX}-list"))
115 echo "${numfiles} file(s) available on server" 146 echo "${numfiles} file(s) available on server"
116 [ "${numfiles}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site." 147 [ "${numfiles}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site."
117 } 148 }
118 149
119 150
120 Progress_Init() { 151 progress_init() {
121 _last="-" 152 _last="-"
122 printf "${_last}" 153 printf "${_last}"
123 } 154 }
124 155
125 Progress_Anim() { 156 progress_anim() {
126 case "${_last}" in 157 case "${_last}" in
127 /) _last="-";; 158 /) _last="-";;
128 -) _last=\\;; 159 -) _last=\\;;
129 \\) _last=\|;; 160 \\) _last=\|;;
130 \|) _last="/";; 161 \|) _last="/";;
131 esac 162 esac
132 printf "\b${_last}" 163 printf "\b${_last}"
133 } 164 }
134 165
135 Progress_Done() { printf "\bdone\n"; } 166 progress_done() { printf "\bdone\n"; }
136 167
137 # getting rid of ls (as per suggestion) 168 # getting rid of ls (as per suggestion)
138 Count_Files() { 169 Count_Files() {
139 numfiles=0 170 numfiles=0
140 for dircontent in "${*}/"* "${*}/".*; do 171 for dircontent in "${*}/"* "${*}/".*; do
176 207
177 # Do some cleanup 208 # Do some cleanup
178 Cleanup_Repository() { 209 Cleanup_Repository() {
179 # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR} 210 # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}
180 printf "Cleaning up repository folder... " 211 printf "Cleaning up repository folder... "
181 Progress_Init 212 progress_init
182 trash_dir="${BASE_DIR}/trash/${trash_dir}/$(date -u "+${SITE_DIR}-${TARGET_DIR}-%Y%m%d-%H.%M")" 213 trash_dir="${BASE_DIR}/trash/${trash_dir}/$(date -u "+${SITE_DIR}-${TARGET_DIR}-%Y%m%d-%H.%M")"
183 trashes="These files have been moved to ${trash_dir}:" 214 trashes="These files have been moved to ${trash_dir}:"
184 has_trash= 215 has_trash=
185 if [ ! -d "${trash_dir}" ]; then 216 if [ ! -d "${trash_dir}" ]; then
186 mkdir -p "${trash_dir}" || Err_Impossible 217 mkdir -p "${trash_dir}" || Err_Impossible
190 fi 221 fi
191 fi 222 fi
192 for trash in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* 223 for trash in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
193 do 224 do
194 is_trash= 225 is_trash=
195 if [ -d "${trash}" ] || [ -n "$(Is_NotMD5 "${trash}")" ] || [ -z "$(grep "$(get_basename "${trash}")" "${TEMP_PREFIX}-list")" ]; then 226 if [ -d "${trash}" ] || [ -n "$(is_not_md5 "${trash}")" ] || [ -z "$(grep "$(get_basename "${trash}")" "${TEMP_PREFIX}-list")" ]; then
196 is_trash=1 227 is_trash=1
197 has_trash=1 228 has_trash=1
198 mv -f -- "${trash}" "${trash_dir}" || Err_Impossible 229 mv -f -- "${trash}" "${trash_dir}" || Err_Impossible
199 trashes="${trashes} 230 trashes="${trashes}
200 $(get_basename "${trash}")" 231 $(get_basename "${trash}")"
201 fi 232 fi
202 Progress_Anim 233 progress_anim
203 done 234 done
204 rmdir "${trash_dir}" 2>/dev/null 235 rmdir "${trash_dir}" 2>/dev/null
205 Progress_Done 236 progress_done
206 [ -n "${has_trash}" ] && echo "${trashes}" 237 [ -n "${has_trash}" ] && echo "${trashes}"
207 } 238 }
208 239
209 # check files correctness 240 # check files correctness
210 Check_Files() { 241 Check_Files() {
211 if [ ! -n "${ISNEW}" ]; then 242 if [ ! -n "${ISNEW}" ]; then
212 [ -z "${NOCLEAN}" ] && Cleanup_Repository 243 [ -z "${NOCLEAN}" ] && Cleanup_Repository
213 printf "Checking for errors... " 244 printf "Checking for errors... "
214 Progress_Init 245 progress_init
215 files_error="These files do not match its md5:" 246 __files_error="These files do not match its md5:"
216 files_notdanbooru="These files are not checked:" 247 __files_notdanbooru="These files are not checked:"
217 has_err_filename= 248 __has_err_filename=
218 has_err_md5= 249 __has_err_md5=
219 > "${TEMP_PREFIX}-error" 250 > "${TEMP_PREFIX}-error"
220 > "${TEMP_PREFIX}-ok" 251 > "${TEMP_PREFIX}-ok"
221 for file in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* 252 for file in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
222 do 253 do
223 if [ "${file}" != "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/*" ]; then 254 if [ "${file}" != "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/*" ]; then
224 if [ -n "$(Is_NotMD5 "${file}")" ] || [ -d "${file}" ]; then 255 if [ -n "$(is_not_md5 "${file}")" ] || [ -d "${file}" ]; then
225 files_notdanbooru="${files_notdanbooru} 256 __files_notdanbooru="${__files_notdanbooru}
226 $(get_basename "${file}")" 257 $(get_basename "${file}")"
227 has_err_filename=1 258 __has_err_filename=1
228 else 259 else
229 if [ "$(get_md5 "${file}")" = "$(get_filename "${file}")" ]; then 260 if [ "$(get_md5 "${file}")" = "$(get_filename "${file}")" ]; then
230 echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-ok" 261 echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-ok"
231 else 262 else
232 rm "${file}" || Err_Fatal "Error removing ${file}" 263 rm "${file}" || Err_Fatal "Error removing ${file}"
233 echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-error" 264 echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-error"
234 files_error="${files_error} 265 __files_error="${__files_error}
235 $(get_basename "${file}")" 266 $(get_basename "${file}")"
236 has_err_md5=1 267 __has_err_md5=1
237 fi 268 fi
238 fi 269 fi
239 fi 270 fi
240 Progress_Anim 271 progress_anim
241 done 272 done
242 Progress_Done 273 progress_done
243 if [ ! -n "${has_err_md5}" ] && [ ! -n "${has_err_filename}" ]; then 274 if [ ! -n "${__has_err_md5}" ] && [ ! -n "${__has_err_filename}" ]; then
244 echo "All files OK" 275 echo "All files OK"
245 else 276 else
246 if [ -n "${has_err_md5}" ]; then 277 if [ -n "${__has_err_md5}" ]; then
247 echo "${files_error}" 278 echo "${__files_error}"
248 echo "$(echo $(wc -l < "${TEMP_PREFIX}-error")) file(s) removed" 279 echo "$(echo $(wc -l < "${TEMP_PREFIX}-error")) file(s) removed"
249 fi 280 fi
250 [ -n "${has_err_filename}" ] && echo "${files_notdanbooru}" 281 [ -n "${__has_err_filename}" ] && echo "${__files_notdanbooru}"
251 fi 282 fi
252 echo "$(echo $(wc -l < "${TEMP_PREFIX}-ok")) file(s) available locally" 283 echo "$(echo $(wc -l < "${TEMP_PREFIX}-ok")) file(s) available locally"
253 284
254 printf "Generating list of new files... " 285 printf "Generating list of new files... "
255 Progress_Init 286 progress_init
256 cp -f "${TEMP_PREFIX}-list" "${TEMP_PREFIX}-templist" 287 cp -f "${TEMP_PREFIX}-list" "${TEMP_PREFIX}-templist"
257 while read -r is_ok; do 288 while read -r is_ok; do
258 grep -v "${is_ok}" "${TEMP_PREFIX}-templist" > "${TEMP_PREFIX}-newlist" 289 grep -v "${is_ok}" "${TEMP_PREFIX}-templist" > "${TEMP_PREFIX}-newlist"
259 cp -f "${TEMP_PREFIX}-newlist" "${TEMP_PREFIX}-templist" || Err_Impossible 290 cp -f "${TEMP_PREFIX}-newlist" "${TEMP_PREFIX}-templist" || Err_Impossible
260 Progress_Anim 291 progress_anim
261 done < "${TEMP_PREFIX}-ok" 292 done < "${TEMP_PREFIX}-ok"
262 Progress_Done 293 progress_done
263 echo "$(echo $(wc -l < "${TEMP_PREFIX}-newlist")) file(s) to be downloaded" 294 echo "$(echo $(wc -l < "${TEMP_PREFIX}-newlist")) file(s) to be downloaded"
264 else 295 else
265 if [ -n "${ISQUICK}" ]; then 296 if [ -n "${ISQUICK}" ]; then
266 echo "Quick mode selected. Skipping check" 297 echo "Quick mode selected. Skipping check"
267 else 298 else
295 fi 326 fi
296 327
297 # misc variables 328 # misc variables
298 ISQUICK= 329 ISQUICK=
299 ISNEW= 330 ISNEW=
300 # variable to check whether a login is used or not
301 _use_login=0
302 331
303 # minimum number of arguments: 2 (command and tag). If less than two, exit and print help message 332 # minimum number of arguments: 2 (command and tag). If less than two, exit and print help message
304 [ $# -lt 2 ] && Err_Help 333 [ $# -lt 2 ] && Err_Help
305 case "$1" in 334 case "$1" in
306 check|fetch|quickfetch) 335 check|fetch|quickfetch)
346 TARGET_DIR=$(echo "${TAGS}" | sed -e 's/\//_/g') 375 TARGET_DIR=$(echo "${TAGS}" | sed -e 's/\//_/g')
347 SITE_DIR=$(echo "${SITE}" | sed -e 's/\/$//g;s/\//_/g') 376 SITE_DIR=$(echo "${SITE}" | sed -e 's/\/$//g;s/\//_/g')
348 TEMP_PREFIX="${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}" 377 TEMP_PREFIX="${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}"
349 } 378 }
350 379
380 # global variables goes here
381 init_globals()
382 {
383 _version="0.3-beta3" # version of this script
384 _use_login=0 # variable to check whether a login is used or not
385 }
386
351 main() 387 main()
352 { 388 {
389 #initialize global variables
390 init_globals
391 #print welcome message
392 msg_welcome
353 # initialization 393 # initialization
354 Msg_Welcome
355 init "$@" 394 init "$@"
356 Check_Tools 395 Check_Tools
357 Check_Folders 396 Check_Folders
358 397
359 398