Mercurial > ec-dotfiles
comparison bin/moefetch @ 303:e4208bf9c585
Merge
| author | Edho Arief <edho@myconan.net> |
|---|---|
| date | Thu, 16 Feb 2012 17:48:04 +0700 |
| parents | b90ebadbfd5d |
| children | 21b86001b0c5 |
comparison
equal
deleted
inserted
replaced
| 295:ce17ed77a7fa | 303:e4208bf9c585 |
|---|---|
| 1 #!/bin/sh | |
| 2 | |
| 3 # Copyright (c) 2009-2012, edogawaconan <edho@myconan.net> | |
| 4 # | |
| 5 # Permission to use, copy, modify, and/or distribute this software for any | |
| 6 # purpose with or without fee is hereby granted, provided that the above | |
| 7 # copyright notice and this permission notice appear in all copies. | |
| 8 # | |
| 9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
| 10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
| 11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
| 12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
| 13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
| 14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
| 15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
| 16 # | |
| 17 # Lots of bugs here. Use with care | |
| 18 # USE WITH CARE | |
| 19 # | |
| 20 # what it does: fetch every picture that has the specified TAGS. | |
| 21 # requirement: wget, libxslt, openssl | |
| 22 | |
| 23 # program additional paths for: cut, sed, wc, openssl, wget, xsltproc, grep | |
| 24 ADDITIONAL_PATH= | |
| 25 | |
| 26 # default server address. Danbooru only! I do not take responsibility of stupidity. | |
| 27 DEFAULT_SITE="moe.imouto.org" | |
| 28 | |
| 29 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one. | |
| 30 # Structure is ${BASE_DIR}/<TAGS> | |
| 31 # Absolute path only. | |
| 32 # Leave empty to use whatever folder you're running this at | |
| 33 BASE_DIR= | |
| 34 | |
| 35 # not user modifiable from here | |
| 36 | |
| 37 useragent="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0) Gecko/20100101 Firefox/10.0" | |
| 38 | |
| 39 # useless welcome message. Also version | |
| 40 msg_welcome() { | |
| 41 echo "moefetch ${_version} | |
| 42 Copyright (c) 2009-2012 edogawaconan <edho@myconan.net> | |
| 43 " | |
| 44 } | |
| 45 | |
| 46 # Sanitize path. Totally safe. Usage: cmd "$(safe_path "${filename}")" | |
| 47 safe_path() | |
| 48 { | |
| 49 # It all depends on the first character. | |
| 50 start=$(printf "%s" "$*" | cut -c 1) | |
| 51 path= | |
| 52 case "${start}" in | |
| 53 .|/) path="$*";; # . and / is safe. No change. | |
| 54 *) path="./$*";; # Anything else must be prefixed with ./ | |
| 55 esac | |
| 56 printf "%s" "${path}" # Return. | |
| 57 } | |
| 58 | |
| 59 # Checks md5. OpenSSL should be available on anything usable. | |
| 60 get_md5() { cat "$(safe_path "${1}")" | openssl dgst -md5 | tail -n 1 | sed -e 's/.*\([[:xdigit:]]\{32\}\).*/\1/'; } | |
| 61 | |
| 62 # Safely get basename. | |
| 63 get_basename() { basename "$(safe_path "${1}")"; } | |
| 64 | |
| 65 # Safely get filename (basename without the extension). | |
| 66 get_filename() { get_basename "${1%.*}"; } | |
| 67 | |
| 68 # Transformation for tag url. | |
| 69 get_cleantags() { printf "%s " "$*" | sed -e 's/\&/%26/g;s/=/%3D/g'; } | |
| 70 | |
| 71 # Returns something if not an md5 value. | |
| 72 is_not_md5() { get_filename "$1" | sed -e 's/\([0-9a-f]\{32\}\)//g'; } | |
| 73 | |
| 74 | |
| 75 # fatal error handler | |
| 76 Err_Fatal() { | |
| 77 echo " | |
| 78 Fatal error: ${1}" | |
| 79 exit 1 | |
| 80 } | |
| 81 | |
| 82 Err_Impossible() { | |
| 83 echo " | |
| 84 Impossible error. Or you modified content of the working directories when the script is running. | |
| 85 Please report to moefetch.googlecode.com if you see this message (complete with entire run log)" | |
| 86 exit 1 | |
| 87 } | |
| 88 | |
| 89 # help message | |
| 90 Err_Help() { | |
| 91 echo "moefetch.sh COMMAND [-n] [-p PASSWORD] [-s SITE_URL] [-u USERNAME] TAGS | |
| 92 | |
| 93 COMMAND: | |
| 94 (quick)fetch: | |
| 95 Do a complete update. Add prefix quick to skip file checking | |
| 96 check: | |
| 97 Get list of new files, clean up local folder and print total new files | |
| 98 | |
| 99 OPTIONS: | |
| 100 -n: | |
| 101 Skip checking repository directory. | |
| 102 -p PASSWORD: | |
| 103 Specifies password for login. | |
| 104 -s SITE_URL: | |
| 105 Specify URL of the Danbooru powered site you want to leech from. Default is ${DEFAULT_SITE}. | |
| 106 -u USERNAME: | |
| 107 Specifies username for login. | |
| 108 TAGS: | |
| 109 Tags you want to download. Separated by spaces. Tag name follows standard Danbooru tagging scheme." | |
| 110 exit 2 | |
| 111 } | |
| 112 | |
| 113 # generate link by transforming xml | |
| 114 Generate_Link() { | |
| 115 echo " | |
| 116 Fetching XML file" | |
| 117 tempnum=1000 | |
| 118 iternum=1 | |
| 119 > "${TEMP_PREFIX}-list" | |
| 120 while [ "${tempnum}" -ge 1000 ]; do | |
| 121 url="http://${SITE}/post/index.xml?tags=$(get_cleantags "${TAGS}")&offset=0&limit=1000&page=${iternum}" | |
| 122 [ ${_use_login} -eq 1 ] && url="${url}&login=${LOGIN_USER}&password_hash=${LOGIN_PASS}" | |
| 123 wget --quiet "${url}" -O "${TEMP_PREFIX}-xml" --referer="http://${SITE}/post" --user-agent="${useragent}" -e continue=off || Err_Fatal "Failed download catalog file" | |
| 124 printf "Processing XML file... " | |
| 125 # xslt evilry | |
| 126 xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-templist" | |
| 127 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> | |
| 128 <xsl:output method="xml" indent="yes"/> | |
| 129 <xsl:template match="post"> | |
| 130 <xsl:value-of select="@file_url" /> | |
| 131 </xsl:template> | |
| 132 </xsl:stylesheet> | |
| 133 EOF | |
| 134 tempnum=$(grep -c . "${TEMP_PREFIX}-templist") | |
| 135 iternum=$((iternum + 1)) | |
| 136 cat "${TEMP_PREFIX}-templist" >> "${TEMP_PREFIX}-list" | |
| 137 echo "${tempnum} file(s) available" | |
| 138 done | |
| 139 numfiles=$(grep -c . "${TEMP_PREFIX}-list") | |
| 140 echo "${numfiles} file(s) available on server" | |
| 141 [ "${numfiles}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site." | |
| 142 } | |
| 143 | |
| 144 | |
| 145 progress_init() { | |
| 146 _last="-" | |
| 147 printf "${_last}" | |
| 148 } | |
| 149 | |
| 150 progress_anim() { | |
| 151 case "${_last}" in | |
| 152 /) _last="-";; | |
| 153 -) _last=\\;; | |
| 154 \\) _last=\|;; | |
| 155 \|) _last="/";; | |
| 156 esac | |
| 157 printf "\b${_last}" | |
| 158 } | |
| 159 | |
| 160 progress_done() { printf "\bdone\n"; } | |
| 161 | |
| 162 # getting rid of ls (as per suggestion) | |
| 163 Count_Files() { | |
| 164 numfiles=0 | |
| 165 for dircontent in "${*}/"* "${*}/".*; do | |
| 166 if [ -e "${dircontent}" ] && [ x"${dircontent}" != x"${*}/." ] && [ x"${dircontent}" != x"${*}/.." ]; then | |
| 167 numfiles=$((numfiles + 1)) | |
| 168 fi | |
| 169 done | |
| 170 echo $((numfiles)) | |
| 171 } | |
| 172 | |
| 173 # check tools availability | |
| 174 Check_Tools() { | |
| 175 # verify all programs required do indeed exist | |
| 176 commands="cut sed wc wget xsltproc xargs rm mkdir chown comm grep date openssl" | |
| 177 for cmd in ${commands} | |
| 178 do | |
| 179 [ "$(command -v "${cmd}")" ] || Err_Fatal "${cmd} doesn't exist in ${PATH}" | |
| 180 done | |
| 181 } | |
| 182 | |
| 183 # verify required folders exist and writeable | |
| 184 Check_Folders(){ | |
| 185 [ -O "${BASE_DIR}" ] || Err_Fatal "You don't own ${BASE_DIR}. Please fix ${BASE_DIR} or run this script in your own directory." | |
| 186 for directory in temp trash deleted "${SITE_DIR}/${TARGET_DIR}"; do | |
| 187 if [ ! -d "${BASE_DIR}/${directory}" ]; then | |
| 188 mkdir -p "${BASE_DIR}/${directory}" || Err_Impossible | |
| 189 fi | |
| 190 if [ ! -O "${BASE_DIR}/${directory}" ]; then | |
| 191 echo "You don't own the ${BASE_DIR}/${directory}, applying globally writeable permission on it" | |
| 192 chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible | |
| 193 fi | |
| 194 done | |
| 195 [ "$(Count_Files "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}")" -eq 0 ] && ISNEW=1 | |
| 196 for i in error ok list newlist templist; do | |
| 197 touch "${TEMP_PREFIX}-${i}" || Fatal_Err "Error creating ${TEMP_PREFIX}-${i}. This shouldn't happen" | |
| 198 done | |
| 199 # | |
| 200 } | |
| 201 | |
| 202 # Do some cleanup | |
| 203 Cleanup_Repository() { | |
| 204 # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR} | |
| 205 printf "Cleaning up repository folder... " | |
| 206 progress_init | |
| 207 trash_dir="${BASE_DIR}/trash/${trash_dir}/$(date -u "+${SITE_DIR}-${TARGET_DIR}-%Y%m%d-%H.%M")" | |
| 208 trashes="These files have been moved to ${trash_dir}:" | |
| 209 has_trash= | |
| 210 if [ ! -d "${trash_dir}" ]; then | |
| 211 mkdir -p "${trash_dir}" || Err_Impossible | |
| 212 else | |
| 213 if [ ! -O "${trash_dir}" ]; then | |
| 214 chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible | |
| 215 fi | |
| 216 fi | |
| 217 for trash in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* | |
| 218 do | |
| 219 if [ -e "${trash}" ]; then | |
| 220 is_trash= | |
| 221 if [ -d "${trash}" ] || [ -n "$(is_not_md5 "${trash}")" ] || [ -z "$(grep "$(get_basename "${trash}")" "${TEMP_PREFIX}-list")" ]; then | |
| 222 is_trash=1 | |
| 223 has_trash=1 | |
| 224 mv -f -- "${trash}" "${trash_dir}" || Err_Impossible | |
| 225 trashes="${trashes} | |
| 226 $(get_basename "${trash}")" | |
| 227 fi | |
| 228 fi | |
| 229 progress_anim | |
| 230 done | |
| 231 rmdir "${trash_dir}" 2>/dev/null | |
| 232 progress_done | |
| 233 [ -n "${has_trash}" ] && echo "${trashes}" | |
| 234 } | |
| 235 | |
| 236 # check files correctness | |
| 237 Check_Files() { | |
| 238 if [ ! -n "${ISNEW}" ]; then | |
| 239 [ -z "${NOCLEAN}" ] && Cleanup_Repository | |
| 240 printf "Checking for errors... " | |
| 241 progress_init | |
| 242 files_error="These files do not match its md5:" | |
| 243 files_notdanbooru="These files are not checked:" | |
| 244 has_err_filename= | |
| 245 has_err_md5= | |
| 246 > "${TEMP_PREFIX}-error" | |
| 247 > "${TEMP_PREFIX}-ok" | |
| 248 for file in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* | |
| 249 do | |
| 250 if [ -e "${file}" ]; then | |
| 251 if [ -n "$(is_not_md5 "${file}")" ] || [ -d "${file}" ]; then | |
| 252 files_notdanbooru="${files_notdanbooru} | |
| 253 $(get_basename "${file}")" | |
| 254 has_err_filename=1 | |
| 255 else | |
| 256 if [ "$(get_md5 "${file}")" = "$(get_filename "${file}")" ]; then | |
| 257 echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-ok" | |
| 258 else | |
| 259 rm "${file}" || Err_Fatal "Error removing ${file}" | |
| 260 echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-error" | |
| 261 files_error="${files_error} | |
| 262 $(get_basename "${file}")" | |
| 263 has_err_md5=1 | |
| 264 fi | |
| 265 fi | |
| 266 fi | |
| 267 progress_anim | |
| 268 done | |
| 269 progress_done | |
| 270 if [ ! -n "${has_err_md5}" ] && [ ! -n "${has_err_filename}" ]; then | |
| 271 echo "All files OK" | |
| 272 else | |
| 273 if [ -n "${has_err_md5}" ]; then | |
| 274 echo "${files_error}" | |
| 275 echo "$(grep -c . "${TEMP_PREFIX}-error") file(s) removed" | |
| 276 fi | |
| 277 [ -n "${has_err_filename}" ] && echo "${files_notdanbooru}" | |
| 278 fi | |
| 279 echo "$(grep -c . "${TEMP_PREFIX}-ok") file(s) available locally" | |
| 280 | |
| 281 printf "Generating list of new files... " | |
| 282 progress_init | |
| 283 cp -f "${TEMP_PREFIX}-list" "${TEMP_PREFIX}-templist" | |
| 284 while read -r is_ok; do | |
| 285 grep -v "${is_ok}" "${TEMP_PREFIX}-templist" > "${TEMP_PREFIX}-newlist" | |
| 286 cp -f "${TEMP_PREFIX}-newlist" "${TEMP_PREFIX}-templist" || Err_Impossible | |
| 287 progress_anim | |
| 288 done < "${TEMP_PREFIX}-ok" | |
| 289 progress_done | |
| 290 echo "$(grep -c . "${TEMP_PREFIX}-newlist") file(s) to be downloaded" | |
| 291 else | |
| 292 if [ -n "${ISQUICK}" ]; then | |
| 293 echo "Quick mode selected. Skipping check" | |
| 294 else | |
| 295 echo "Empty local repository" | |
| 296 fi | |
| 297 cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-newlist" | |
| 298 fi | |
| 299 } | |
| 300 | |
| 301 # start downloading the images | |
| 302 Fetch_Images() { | |
| 303 if [ "$(grep -c . "${TEMP_PREFIX}-newlist")" -eq 0 ]; then | |
| 304 echo "No new file" | |
| 305 else | |
| 306 printf "Downloading files... " | |
| 307 cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | |
| 308 wget -e continue=on -i "${TEMP_PREFIX}-newlist" -o "${TEMP_PREFIX}.log" --referer="http://${SITE}/post" --user-agent="${useragent}" | |
| 309 fi | |
| 310 } | |
| 311 | |
| 312 # initialize base variables and initial command check | |
| 313 init() | |
| 314 { | |
| 315 # path initialization | |
| 316 # check if additional path is specified | |
| 317 if [ -n "${ADDITIONAL_PATH}" ] | |
| 318 then | |
| 319 # insert the additional path | |
| 320 PATH="${ADDITIONAL_PATH}:${PATH}" | |
| 321 export PATH | |
| 322 fi | |
| 323 | |
| 324 # misc variables | |
| 325 ISQUICK= | |
| 326 ISNEW= | |
| 327 | |
| 328 # minimum number of arguments: 2 (command and tag). If less than two, exit and print help message | |
| 329 [ $# -lt 2 ] && Err_Help | |
| 330 case "$1" in | |
| 331 check|fetch|quickfetch) | |
| 332 echo "Starting..." | |
| 333 JOB="$1" | |
| 334 ;; | |
| 335 *) | |
| 336 Err_Help | |
| 337 ;; | |
| 338 esac | |
| 339 shift | |
| 340 SITE= | |
| 341 TAGS= | |
| 342 has_pass=0 | |
| 343 has_user=0 | |
| 344 x=1 | |
| 345 while getopts "s:nu:p:" opt | |
| 346 do | |
| 347 case "$opt" in | |
| 348 s) SITE="$OPTARG";; | |
| 349 n) NOCLEAN=1;; | |
| 350 p) | |
| 351 LOGIN_PASS=$(printf "%s" "$OPTARG" | openssl dgst -sha1 | sed -e 's/.*\([[:xdigit:]]\{40\}\).*/\1/') | |
| 352 has_pass=1 | |
| 353 ;; | |
| 354 u) | |
| 355 LOGIN_USER="$OPTARG" | |
| 356 has_user=1 | |
| 357 ;; | |
| 358 esac | |
| 359 x=$OPTIND | |
| 360 done | |
| 361 shift $(($x-1)) | |
| 362 if [ "$1" = -- ]; then shift; fi | |
| 363 TAGS="$@" | |
| 364 [ -n "${SITE}" ] || SITE=${DEFAULT_SITE} | |
| 365 [ -n "${TAGS}" ] || Err_Fatal "No tag specified" | |
| 366 # Get base folder - default, current folder or fallback to ${HOME} | |
| 367 [ -n "${BASE_DIR}" ] || BASE_DIR=${PWD} | |
| 368 [ -n "${BASE_DIR}" ] || BASE_DIR=${HOME} | |
| 369 [ -n "$(echo "${BASE_DIR}" | cut -c1 | grep \/)" ] || BASE_DIR="/${BASE_DIR}" | |
| 370 # see if both pass and use are set. If they're set, switch _use_login variable content to 1. | |
| 371 [ ${has_pass} -eq 1 -a ${has_user} -eq 1 ] && _use_login=1 | |
| 372 | |
| 373 echo "Tags: ${TAGS}" | |
| 374 # slash is not wanted for folder name | |
| 375 TARGET_DIR=$(echo "${TAGS}" | sed -e 's/\//_/g') | |
| 376 SITE_DIR=$(echo "${SITE}" | sed -e 's/\/$//g;s/\//_/g') | |
| 377 TEMP_PREFIX="${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}" | |
| 378 } | |
| 379 | |
| 380 # global variables goes here | |
| 381 init_globals() | |
| 382 { | |
| 383 _version="1.0-rc3" # version of this script | |
| 384 _use_login=0 # variable to check whether a login is used or not | |
| 385 } | |
| 386 | |
| 387 main() | |
| 388 { | |
| 389 # removing GNU-ism as much as possible | |
| 390 POSIXLY_CORRECT=1 | |
| 391 #initialize global variables | |
| 392 init_globals | |
| 393 #print welcome message | |
| 394 msg_welcome | |
| 395 # initialization | |
| 396 init "$@" | |
| 397 Check_Tools | |
| 398 Check_Folders | |
| 399 | |
| 400 | |
| 401 # let's do the job! | |
| 402 case "${JOB}" in | |
| 403 check) | |
| 404 Generate_Link | |
| 405 Check_Files | |
| 406 ;; | |
| 407 fetch) | |
| 408 Generate_Link | |
| 409 Check_Files | |
| 410 Fetch_Images | |
| 411 ;; | |
| 412 quickfetch) | |
| 413 ISNEW=1 | |
| 414 ISQUICK=1 | |
| 415 Generate_Link | |
| 416 Check_Files | |
| 417 Fetch_Images | |
| 418 ;; | |
| 419 esac | |
| 420 } | |
| 421 | |
| 422 # call the main routine! | |
| 423 main "$@" | |
| 424 |
