Mercurial > ec-dotfiles
diff bin/moefetch @ 400:657f787fbae8
Merge remote-tracking branch 'origin/master'
author | Edho Arief <edho@myconan.net> |
---|---|
date | Tue, 04 Sep 2012 10:23:37 +0700 |
parents | 38c7615caf9e |
children |
line wrap: on
line diff
--- a/bin/moefetch Tue Sep 04 10:22:46 2012 +0700 +++ b/bin/moefetch Tue Sep 04 10:23:37 2012 +0700 @@ -42,7 +42,7 @@ # useless welcome message. Also version msg_welcome() { - echo "moefetch ${_version} + echo "moefetch ${_version} Copyright (c) 2009-2012 edogawaconan <edho@myconan.net> " } @@ -50,14 +50,14 @@ # Sanitize path. Totally safe. Usage: cmd "$(safe_path "${filename}")" safe_path() { - # It all depends on the first character. - start=$(printf "%s" "$*" | cut -c 1) - path= - case "${start}" in - .|/) path="$*";; # . and / is safe. No change. - *) path="./$*";; # Anything else must be prefixed with ./ - esac - printf "%s" "${path}" # Return. + # It all depends on the first character. + start=$(printf "%s" "$*" | cut -c 1) + path= + case "${start}" in + .|/) path="$*";; # . and / is safe. No change. + *) path="./$*";; # Anything else must be prefixed with ./ + esac + printf "%s" "${path}" # Return. } # Checks md5. OpenSSL should be available on anything usable. @@ -78,56 +78,57 @@ # fatal error handler Err_Fatal() { - echo " + echo " Fatal error: ${1}" - exit 1 + exit 1 } Err_Impossible() { - echo " + echo " Impossible error. Or you modified content of the working directories when the script is running. Please report to moefetch.googlecode.com if you see this message (complete with entire run log)" - exit 1 + exit 1 } # help message Err_Help() { - echo "moefetch.sh COMMAND [-n] [-p PASSWORD] [-s SITE_URL] [-u USERNAME] TAGS + echo "moefetch.sh COMMAND [-n] [-p PASSWORD] [-s SITE_URL] [-u USERNAME] TAGS COMMAND: - (quick)fetch: - Do a complete update. Add prefix quick to skip file checking - check: - Get list of new files, clean up local folder and print total new files + (quick)fetch: + Do a complete update. Add prefix quick to skip file checking + check: + Get list of new files, clean up local folder and print total new files OPTIONS: - -n: - Skip checking repository directory. - -p PASSWORD: - Specifies password for login. - -s SITE_URL: - Specify URL of the Danbooru powered site you want to leech from. Default is ${DEFAULT_SITE}. - -u USERNAME: - Specifies username for login. - TAGS: - Tags you want to download. Separated by spaces. Tag name follows standard Danbooru tagging scheme." - exit 2 + -n: + Skip checking repository directory. + -p PASSWORD: + Specifies password for login. + -s SITE_URL: + Specify URL of the Danbooru powered site you want to leech from. Default is ${DEFAULT_SITE}. + -u USERNAME: + Specifies username for login. + TAGS: + Tags you want to download. Separated by spaces. Tag name follows standard Danbooru tagging scheme." + exit 2 } # generate link by transforming xml Generate_Link() { - echo " + echo " Fetching XML file" - tempnum=1000 - iternum=1 - > "${TEMP_PREFIX}-list" - while [ "${tempnum}" -ge 1000 ]; do - url="${SITE}/post/index.xml?tags=$(get_cleantags "${TAGS}")&offset=0&limit=1000&page=${iternum}" - [ ${_use_login} -eq 1 ] && url="${url}&login=${LOGIN_USER}&password_hash=${LOGIN_PASS}" - wget --no-check-certificate --quiet "${url}" -O "${TEMP_PREFIX}-xml" --referer="${SITE}/post" --user-agent="${useragent}" -e continue=off || Err_Fatal "Failed download catalog file" - printf "Processing XML file... " - # xslt evilry - xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(https*.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-templist" + pagelimit=100 + tempnum="${pagelimit}" + iternum=1 + > "${TEMP_PREFIX}-list" + while [ "${tempnum}" -ge "${pagelimit}" ]; do + url="${SITE}/post/index.xml?tags=$(get_cleantags "${TAGS}")&offset=0&limit=${pagelimit}&page=${iternum}" + [ ${_use_login} -eq 1 ] && url="${url}&login=${LOGIN_USER}&password_hash=${LOGIN_PASS}" + wget --no-check-certificate --quiet "${url}" -O "${TEMP_PREFIX}-xml" --referer="${SITE}/post" --user-agent="${useragent}" -e continue=off || Err_Fatal "Failed download catalog file" + printf "Processing XML file... " + # xslt evilry + xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(https*.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-templist" <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> <xsl:output method="xml" indent="yes"/> <xsl:template match="post"> @@ -135,292 +136,292 @@ </xsl:template> </xsl:stylesheet> EOF - tempnum=$(grep -c . "${TEMP_PREFIX}-templist") - iternum=$((iternum + 1)) - cat "${TEMP_PREFIX}-templist" >> "${TEMP_PREFIX}-list" - echo "${tempnum} file(s) available" - done - numfiles=$(grep -c . "${TEMP_PREFIX}-list") - echo "${numfiles} file(s) available on server" - [ "${numfiles}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site." + tempnum=$(grep -c . "${TEMP_PREFIX}-templist") + iternum=$((iternum + 1)) + cat "${TEMP_PREFIX}-templist" >> "${TEMP_PREFIX}-list" + echo "${tempnum} file(s) available" + done + numfiles=$(grep -c . "${TEMP_PREFIX}-list") + echo "${numfiles} file(s) available on server" + [ "${numfiles}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site." } progress_init() { - _last="-" - printf "${_last}" + _last="-" + printf "${_last}" } progress_anim() { - case "${_last}" in - /) _last="-";; - -) _last=\\;; - \\) _last=\|;; - \|) _last="/";; - esac - printf "\b${_last}" + case "${_last}" in + /) _last="-";; + -) _last=\\;; + \\) _last=\|;; + \|) _last="/";; + esac + printf "\b${_last}" } progress_done() { printf "\bdone\n"; } # getting rid of ls (as per suggestion) Count_Files() { - numfiles=0 - for dircontent in "${*}/"* "${*}/".*; do - if [ -e "${dircontent}" ] && [ x"${dircontent}" != x"${*}/." ] && [ x"${dircontent}" != x"${*}/.." ]; then - numfiles=$((numfiles + 1)) - fi - done - echo $((numfiles)) + numfiles=0 + for dircontent in "${*}/"* "${*}/".*; do + if [ -e "${dircontent}" ] && [ x"${dircontent}" != x"${*}/." ] && [ x"${dircontent}" != x"${*}/.." ]; then + numfiles=$((numfiles + 1)) + fi + done + echo $((numfiles)) } # check tools availability Check_Tools() { - # verify all programs required do indeed exist - commands="cut sed wc wget xsltproc xargs rm mkdir chown comm grep date openssl" - for cmd in ${commands} - do - [ "$(command -v "${cmd}")" ] || Err_Fatal "${cmd} doesn't exist in ${PATH}" - done + # verify all programs required do indeed exist + commands="cut sed wc wget xsltproc xargs rm mkdir chown comm grep date openssl" + for cmd in ${commands} + do + [ "$(command -v "${cmd}")" ] || Err_Fatal "${cmd} doesn't exist in ${PATH}" + done } # verify required folders exist and writeable Check_Folders(){ - [ -O "${BASE_DIR}" ] || Err_Fatal "You don't own ${BASE_DIR}. Please fix ${BASE_DIR} or run this script in your own directory." - for directory in temp trash deleted "${SITE_DIR}/${TARGET_DIR}"; do - if [ ! -d "${BASE_DIR}/${directory}" ]; then - mkdir -p "${BASE_DIR}/${directory}" || Err_Impossible - fi - if [ ! -O "${BASE_DIR}/${directory}" ]; then - echo "You don't own the ${BASE_DIR}/${directory}, applying globally writeable permission on it" - chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible - fi - done - [ "$(Count_Files "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}")" -eq 0 ] && ISNEW=1 - for i in error ok list newlist templist; do - touch "${TEMP_PREFIX}-${i}" || Fatal_Err "Error creating ${TEMP_PREFIX}-${i}. This shouldn't happen" - done - # + [ -O "${BASE_DIR}" ] || Err_Fatal "You don't own ${BASE_DIR}. Please fix ${BASE_DIR} or run this script in your own directory." + for directory in temp trash deleted "${SITE_DIR}/${TARGET_DIR}"; do + if [ ! -d "${BASE_DIR}/${directory}" ]; then + mkdir -p "${BASE_DIR}/${directory}" || Err_Impossible + fi + if [ ! -O "${BASE_DIR}/${directory}" ]; then + echo "You don't own the ${BASE_DIR}/${directory}, applying globally writeable permission on it" + chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible + fi + done + [ "$(Count_Files "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}")" -eq 0 ] && ISNEW=1 + for i in error ok list newlist templist; do + touch "${TEMP_PREFIX}-${i}" || Fatal_Err "Error creating ${TEMP_PREFIX}-${i}. This shouldn't happen" + done + # } # Do some cleanup Cleanup_Repository() { - # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR} - printf "Cleaning up repository folder... " - progress_init - trash_dir="${BASE_DIR}/trash/${trash_dir}/$(date -u "+${SITE_DIR}-${TARGET_DIR}-%Y%m%d-%H.%M")" - trashes="These files have been moved to ${trash_dir}:" - has_trash= - if [ ! -d "${trash_dir}" ]; then - mkdir -p "${trash_dir}" || Err_Impossible - else - if [ ! -O "${trash_dir}" ]; then - chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible - fi - fi - for trash in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* - do - if [ -e "${trash}" ]; then - is_trash= - if [ -d "${trash}" ] || [ -n "$(is_not_md5 "${trash}")" ] || [ -z "$(grep "$(get_basename "${trash}")" "${TEMP_PREFIX}-list")" ]; then - is_trash=1 - has_trash=1 - mv -f -- "${trash}" "${trash_dir}" || Err_Impossible - trashes="${trashes} + # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR} + printf "Cleaning up repository folder... " + progress_init + trash_dir="${BASE_DIR}/trash/${trash_dir}/$(date -u "+${SITE_DIR}-${TARGET_DIR}-%Y%m%d-%H.%M")" + trashes="These files have been moved to ${trash_dir}:" + has_trash= + if [ ! -d "${trash_dir}" ]; then + mkdir -p "${trash_dir}" || Err_Impossible + else + if [ ! -O "${trash_dir}" ]; then + chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible + fi + fi + for trash in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* + do + if [ -e "${trash}" ]; then + is_trash= + if [ -d "${trash}" ] || [ -n "$(is_not_md5 "${trash}")" ] || [ -z "$(grep "$(get_basename "${trash}")" "${TEMP_PREFIX}-list")" ]; then + is_trash=1 + has_trash=1 + mv -f -- "${trash}" "${trash_dir}" || Err_Impossible + trashes="${trashes} $(get_basename "${trash}")" - fi - fi - progress_anim - done - rmdir "${trash_dir}" 2>/dev/null - progress_done - [ -n "${has_trash}" ] && echo "${trashes}" + fi + fi + progress_anim + done + rmdir "${trash_dir}" 2>/dev/null + progress_done + [ -n "${has_trash}" ] && echo "${trashes}" } # check files correctness Check_Files() { - if [ ! -n "${ISNEW}" ]; then - [ -z "${NOCLEAN}" ] && Cleanup_Repository - printf "Checking for errors... " - progress_init - files_error="These files do not match its md5:" - files_notdanbooru="These files are not checked:" - has_err_filename= - has_err_md5= - > "${TEMP_PREFIX}-error" - > "${TEMP_PREFIX}-ok" - for file in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* - do - if [ -e "${file}" ]; then - if [ -n "$(is_not_md5 "${file}")" ] || [ -d "${file}" ]; then - files_notdanbooru="${files_notdanbooru} + if [ ! -n "${ISNEW}" ]; then + [ -z "${NOCLEAN}" ] && Cleanup_Repository + printf "Checking for errors... " + progress_init + files_error="These files do not match its md5:" + files_notdanbooru="These files are not checked:" + has_err_filename= + has_err_md5= + > "${TEMP_PREFIX}-error" + > "${TEMP_PREFIX}-ok" + for file in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* + do + if [ -e "${file}" ]; then + if [ -n "$(is_not_md5 "${file}")" ] || [ -d "${file}" ]; then + files_notdanbooru="${files_notdanbooru} $(get_basename "${file}")" - has_err_filename=1 - else - if [ "$(get_md5 "${file}")" = "$(get_filename "${file}")" ]; then - echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-ok" - else - rm "${file}" || Err_Fatal "Error removing ${file}" - echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-error" - files_error="${files_error} + has_err_filename=1 + else + if [ "$(get_md5 "${file}")" = "$(get_filename "${file}")" ]; then + echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-ok" + else + rm "${file}" || Err_Fatal "Error removing ${file}" + echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-error" + files_error="${files_error} $(get_basename "${file}")" - has_err_md5=1 - fi - fi - fi - progress_anim - done - progress_done - if [ ! -n "${has_err_md5}" ] && [ ! -n "${has_err_filename}" ]; then - echo "All files OK" - else - if [ -n "${has_err_md5}" ]; then - echo "${files_error}" - echo "$(grep -c . "${TEMP_PREFIX}-error") file(s) removed" - fi - [ -n "${has_err_filename}" ] && echo "${files_notdanbooru}" - fi - echo "$(grep -c . "${TEMP_PREFIX}-ok") file(s) available locally" + has_err_md5=1 + fi + fi + fi + progress_anim + done + progress_done + if [ ! -n "${has_err_md5}" ] && [ ! -n "${has_err_filename}" ]; then + echo "All files OK" + else + if [ -n "${has_err_md5}" ]; then + echo "${files_error}" + echo "$(grep -c . "${TEMP_PREFIX}-error") file(s) removed" + fi + [ -n "${has_err_filename}" ] && echo "${files_notdanbooru}" + fi + echo "$(grep -c . "${TEMP_PREFIX}-ok") file(s) available locally" - printf "Generating list of new files... " - progress_init - cp -f "${TEMP_PREFIX}-list" "${TEMP_PREFIX}-templist" - while read -r is_ok; do - grep -v "${is_ok}" "${TEMP_PREFIX}-templist" > "${TEMP_PREFIX}-newlist" - cp -f "${TEMP_PREFIX}-newlist" "${TEMP_PREFIX}-templist" || Err_Impossible - progress_anim - done < "${TEMP_PREFIX}-ok" - progress_done - echo "$(grep -c . "${TEMP_PREFIX}-newlist") file(s) to be downloaded" - else - if [ -n "${ISQUICK}" ]; then - echo "Quick mode selected. Skipping check" - else - echo "Empty local repository" - fi - cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-newlist" - fi + printf "Generating list of new files... " + progress_init + cp -f "${TEMP_PREFIX}-list" "${TEMP_PREFIX}-templist" + while read -r is_ok; do + grep -v "${is_ok}" "${TEMP_PREFIX}-templist" > "${TEMP_PREFIX}-newlist" + cp -f "${TEMP_PREFIX}-newlist" "${TEMP_PREFIX}-templist" || Err_Impossible + progress_anim + done < "${TEMP_PREFIX}-ok" + progress_done + echo "$(grep -c . "${TEMP_PREFIX}-newlist") file(s) to be downloaded" + else + if [ -n "${ISQUICK}" ]; then + echo "Quick mode selected. Skipping check" + else + echo "Empty local repository" + fi + cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-newlist" + fi } # start downloading the images Fetch_Images() { - if [ "$(grep -c . "${TEMP_PREFIX}-newlist")" -eq 0 ]; then - echo "No new file" - else - printf "Downloading files... " - cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" - wget --no-check-certificate -e continue=on -i "${TEMP_PREFIX}-newlist" -o "${TEMP_PREFIX}.log" --referer="${SITE}/post" --user-agent="${useragent}" - fi + if [ "$(grep -c . "${TEMP_PREFIX}-newlist")" -eq 0 ]; then + echo "No new file" + else + printf "Downloading files... " + cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" + wget --no-check-certificate -e continue=on -i "${TEMP_PREFIX}-newlist" -o "${TEMP_PREFIX}.log" --referer="${SITE}/post" --user-agent="${useragent}" + fi } # initialize base variables and initial command check init() { - # path initialization - # check if additional path is specified - if [ -n "${ADDITIONAL_PATH}" ] - then - # insert the additional path - PATH="${ADDITIONAL_PATH}:${PATH}" - export PATH - fi - - # misc variables - ISQUICK= - ISNEW= - - # minimum number of arguments: 2 (command and tag). If less than two, exit and print help message - [ $# -lt 2 ] && Err_Help - case "$1" in - check|fetch|quickfetch) - echo "Starting..." - JOB="$1" - ;; - *) - Err_Help - ;; - esac - shift - SITE= - TAGS= - has_pass=0 - has_user=0 + # path initialization + # check if additional path is specified + if [ -n "${ADDITIONAL_PATH}" ] + then + # insert the additional path + PATH="${ADDITIONAL_PATH}:${PATH}" + export PATH + fi + + # misc variables + ISQUICK= + ISNEW= + + # minimum number of arguments: 2 (command and tag). If less than two, exit and print help message + [ $# -lt 2 ] && Err_Help + case "$1" in + check|fetch|quickfetch) + echo "Starting..." + JOB="$1" + ;; + *) + Err_Help + ;; + esac + shift + SITE= + TAGS= + has_pass=0 + has_user=0 x=1 - while getopts "s:nu:p:" opt - do - case "$opt" in - s) SITE="$OPTARG";; - n) NOCLEAN=1;; - p) - LOGIN_PASS=$(printf "%s" "$OPTARG" | openssl dgst -sha1 | sed -e 's/.*\([[:xdigit:]]\{40\}\).*/\1/') - has_pass=1 - ;; - u) - LOGIN_USER="$OPTARG" - has_user=1 - ;; - esac + while getopts "s:nu:p:" opt + do + case "$opt" in + s) SITE="$OPTARG";; + n) NOCLEAN=1;; + p) + LOGIN_PASS=$(printf "%s" "$OPTARG" | openssl dgst -sha1 | sed -e 's/.*\([[:xdigit:]]\{40\}\).*/\1/') + has_pass=1 + ;; + u) + LOGIN_USER="$OPTARG" + has_user=1 + ;; + esac x=$OPTIND - done + done shift $(($x-1)) if [ "$1" = -- ]; then shift; fi - TAGS="$@" - [ -n "${SITE}" ] || SITE=${DEFAULT_SITE} - [ -n "${TAGS}" ] || Err_Fatal "No tag specified" - # Get base folder - default, current folder or fallback to ${HOME} - [ -n "${BASE_DIR}" ] || BASE_DIR=${PWD} - [ -n "${BASE_DIR}" ] || BASE_DIR=${HOME} - [ -n "$(echo "${BASE_DIR}" | cut -c1 | grep \/)" ] || BASE_DIR="/${BASE_DIR}" - # see if both pass and use are set. If they're set, switch _use_login variable content to 1. - [ ${has_pass} -eq 1 -a ${has_user} -eq 1 ] && _use_login=1 + TAGS="$@" + [ -n "${SITE}" ] || SITE=${DEFAULT_SITE} + [ -n "${TAGS}" ] || Err_Fatal "No tag specified" + # Get base folder - default, current folder or fallback to ${HOME} + [ -n "${BASE_DIR}" ] || BASE_DIR=${PWD} + [ -n "${BASE_DIR}" ] || BASE_DIR=${HOME} + [ -n "$(echo "${BASE_DIR}" | cut -c1 | grep \/)" ] || BASE_DIR="/${BASE_DIR}" + # see if both pass and use are set. If they're set, switch _use_login variable content to 1. + [ ${has_pass} -eq 1 -a ${has_user} -eq 1 ] && _use_login=1 - echo "Tags: ${TAGS}" - # slash is not wanted for folder name - TARGET_DIR=$(echo "${TAGS}" | sed -e 's/\//_/g') - SITE_DIR=$(echo "${SITE}" | sed -e 's/\/$//g;s/\//_/g') - TEMP_PREFIX="${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}" + echo "Tags: ${TAGS}" + # slash is not wanted for folder name + TARGET_DIR=$(echo "${TAGS}" | sed -e 's/\//_/g') + SITE_DIR=$(echo "${SITE}" | sed -e 's/\/$//g;s/\//_/g') + TEMP_PREFIX="${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}" } # global variables goes here init_globals() { - _version="1.0-rc3" # version of this script - _use_login=0 # variable to check whether a login is used or not + _version="1.0-rc3" # version of this script + _use_login=0 # variable to check whether a login is used or not } main() { - # removing GNU-ism as much as possible - POSIXLY_CORRECT=1 - #initialize global variables - init_globals - #print welcome message - msg_welcome - # initialization - init "$@" - Check_Tools - Check_Folders + # removing GNU-ism as much as possible + POSIXLY_CORRECT=1 + #initialize global variables + init_globals + #print welcome message + msg_welcome + # initialization + init "$@" + Check_Tools + Check_Folders - # let's do the job! - case "${JOB}" in - check) - Generate_Link - Check_Files - ;; - fetch) - Generate_Link - Check_Files - Fetch_Images - ;; - quickfetch) - ISNEW=1 - ISQUICK=1 - Generate_Link - Check_Files - Fetch_Images - ;; - esac + # let's do the job! + case "${JOB}" in + check) + Generate_Link + Check_Files + ;; + fetch) + Generate_Link + Check_Files + Fetch_Images + ;; + quickfetch) + ISNEW=1 + ISQUICK=1 + Generate_Link + Check_Files + Fetch_Images + ;; + esac } # call the main routine!