Mercurial > ec-dotfiles
view moefetch.sh @ 221:e891b563b797
wrong rule caused mass headache
author | edhoprima@gmail.com <edhoprima@gmail.com> |
---|---|
date | Thu, 02 Jul 2009 20:33:48 +0000 |
parents | fe4d74801b28 |
children | e3fb9507cf7f |
line wrap: on
line source
#!/bin/sh # Copyright (c) 2009, edogawaconan <me@myconan.net> # # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. # # Lots of bugs here. Use with care # USE WITH CARE # # what it does: fetch every picture that has the specified TAGS. # requirement: wget, libxslt, md5sum (or md5) # program additional paths for: cut, sed, wc, MD5(sum), wget, xsltproc, grep ADDITIONAL_PATH= # custom md5 path with arguments, expected output: <32digit md5><space(s)><filename> # Leave empty for "md5sum" (Linux, Solaris), "md5 -r" (*BSD) MD5= # default server address. Danbooru only! I do not take responsibility of stupidity. DEFAULT_SITE="moe.imouto.org" # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one. # Structure is ${BASE_DIR}/<TAGS> # Absolute path only. # Leave empty to use whatever folder you're running this at BASE_DIR= # not user modifiable from here ### TODO: ### - sanity validator(?) ### - unified repository to save bandwidth ### - bug stomping ### - sanity checking ### WILL BE FOR 0.3 # useless welcome message. Also version Msg_Welcome() { MOEFETCHVERSION="0.3-beta1" echo "moefetch ${MOEFETCHVERSION} Copyright (c) 2009 edogawaconan <me@myconan.net> " } # fatal error handler Err_Fatal() { echo " Fatal error: ${1}" exit 1 } Err_Impossible() { echo " Impossible error. Or you modified content of the working directories when the script is running. Please report to moefetch.googlecode.com if you see this message (complete with entire run log)" exit 1 } # help message Err_Help() { echo "moefetch.sh COMMAND [-s SITE_URL] TAGS COMMAND: (quick)fetch: do a complete update. Add prefix quick to skip file checking check: get list of new files, clean up local folder and print total new files -s SITE_URL: Specify URL of the Danbooru powered site you want to leech from. Default is ${DEFAULT_SITE} TAGS: Tags you want to download. Separated by spaces. Tag name follows standard Danbooru tagging scheme" exit 2 } # generate link by transforming xml Generate_Link() { echo " Fetching XML file" tempnum=1000 _i=1 > "${TEMP_PREFIX}-list" while [ "${tempnum}" -ge 1000 ]; do wget "http://${SITE}/post/index.xml?tags=${TAGS}&offset=0&limit=1000&page=${_i}" -O "${TEMP_PREFIX}-xml" -e continue=off printf "Processing XML file... " # xslt evilry xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-templist" <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> <xsl:output method="xml" indent="yes"/> <xsl:template match="post"> <xsl:value-of select="@file_url" /> </xsl:template> </xsl:stylesheet> EOF tempnum=$(echo $(wc -l < "${TEMP_PREFIX}-templist")) _i=$((_i+1)) cat "${TEMP_PREFIX}-templist" >> "${TEMP_PREFIX}-list" echo "${tempnum} file(s) available" done numfiles=$(echo $(wc -l < "${TEMP_PREFIX}-list")) echo "${numfiles} file(s) available on server" [ "${numfiles}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site." } Is_NotMD5() { echo "${*}" | sed -e 's/\([0-9a-f]\{32\}\..*\)//g' } Progress_Init() { _last="-" printf "${_last}" } Progress_Anim() { case "${_last}" in /) _last="-";; -) _last=\\;; \\) _last=\|;; \|) _last="/";; esac printf "\b${_last}" } Progress_Done() { printf "\bdone\n"; } # getting rid of ls (as per suggestion) Count_Files() { _i=0 for _f in "${*}/"* "${*}/".*; do if test "${_f}" != "${*}/"'*' || test -e "${_f}"; then _i=$((_i + 1)) fi done echo $((_i - 2)) } # check tools availability Check_Tools() { # verify all programs required do indeed exist #MD5 if test -z "${MD5}"; then case "$(uname)" in *BSD) MD5="md5 -r";; Linux|SunOS) MD5="md5sum";; *) Fatal_Err "No known md5 tool for this platform. Please specify manually";; esac fi md5_command=$(echo ${MD5} | cut -d' ' -f1) # basic tools commands="cut sed wc wget xsltproc xargs rm mkdir chown comm grep date ${md5_command}" for cmd in ${commands} do [ "$(command -v "${cmd}")" ] || Err_Fatal "${cmd} doesn't exist in ${PATH}" done } # verify required folders exist and writeable Check_Folders(){ test -O "${BASE_DIR}" || Err_Fatal "You don't own ${BASE_DIR}. Please fix ${BASE_DIR} or run this script in your own directory." for directory in temp trash deleted "${SITE_DIR}/${TARGET_DIR}"; do if [ ! -d "${BASE_DIR}/${directory}" ]; then mkdir -p "${BASE_DIR}/${directory}" || Err_Impossible fi if [ ! -O "${BASE_DIR}/${directory}" ]; then echo "You don't own the ${BASE_DIR}/${directory}, applying globally writeable permission on it" chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible fi done test "$(Count_Files "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}")" -eq 0 && ISNEW=1 for i in error ok list newlist templist; do touch "${TEMP_PREFIX}-${i}" || Fatal_Err "Error creating ${TEMP_PREFIX}-${i}. This shouldn't happen" done # } # Do some cleanup Cleanup_Repository() { # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR} printf "Cleaning up repository folder... " Progress_Init trash_dir="${BASE_DIR}/trash/${trash_dir}/$(date -u "+${SITE_DIR}-${TARGET_DIR}-%Y%m%d-%H.%M")" trashes="These files have been moved to ${trash_dir}:" has_trash= if test ! -d "${trash_dir}"; then mkdir -p "${trash_dir}" || Err_Impossible else if test ! -o "${trash_dir}"; then chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible fi fi for trash in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* do is_trash= if test -d "${trash}" || test -n "$(Is_NotMD5 "$(basename "${trash}")")" || test -z "$(grep "$(basename "${trash}")" "${TEMP_PREFIX}-list")"; then is_trash=1 has_trash=1 mv -f "${trash}" "${trash_dir}" || Err_Impossible trashes="${trashes} $(basename "${trash}")" fi Progress_Anim done rmdir "${trash_dir}" 2>/dev/null Progress_Done test -n "${has_trash}" && echo "${trashes}" } # check files correctness Check_Files() { if test ! -n "${ISNEW}"; then test -z "${NOCLEAN}" && Cleanup_Repository printf "Checking for errors... " Progress_Init files_error="These files do not match its md5:" files_notdanbooru="These files are not checked:" has_err_filename= has_err_md5= > "${TEMP_PREFIX}-error" > "${TEMP_PREFIX}-ok" for file in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* do if test "${file}" != "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/*"; then if test -n "$(Is_NotMD5 "$(basename "${file}")")" || test -d "${file}"; then files_notdanbooru="${files_notdanbooru} $(basename "${file}")" has_err_filename=1 else if test "$(${MD5} "${file}" | cut -d ' ' -f 1)" = "$(basename "${file}" | cut -d '.' -f 1)"; then echo "$(basename "${file}")" >> "${TEMP_PREFIX}-ok" else rm "${file}" || Err_Fatal "Error removing ${file}" echo "$(basename "${file}")" >> "${TEMP_PREFIX}-error" files_error="${files_error} $(basename "${file}")" has_err_md5=1 fi fi fi Progress_Anim done Progress_Done if test ! -n "${has_err_md5}" && test ! -n "${has_err_filename}"; then echo "All files OK" else if test -n "${has_err_md5}"; then echo "${files_error}" echo "$(echo $(wc -l < "${TEMP_PREFIX}-error")) file(s) removed" fi test -n "${has_err_filename}" && echo "${files_notdanbooru}" fi echo "$(echo $(wc -l < "${TEMP_PREFIX}-ok")) file(s) available locally" printf "Generating list of new files... " Progress_Init cp -f "${TEMP_PREFIX}-list" "${TEMP_PREFIX}-templist" while read -r is_ok; do grep -v "${is_ok}" "${TEMP_PREFIX}-templist" > "${TEMP_PREFIX}-newlist" cp -f "${TEMP_PREFIX}-newlist" "${TEMP_PREFIX}-templist" || Err_Impossible Progress_Anim done < "${TEMP_PREFIX}-ok" Progress_Done echo "$(echo $(wc -l < "${TEMP_PREFIX}-newlist")) file(s) to be downloaded" else if test -n "${ISQUICK}"; then echo "Quick mode selected. Skipping check" else echo "Empty local repository" fi cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-newlist" fi } # start downloading the images Fetch_Images() { if test "$(echo $(wc -l < "${TEMP_PREFIX}-newlist"))" -eq 0; then echo "No new file" else printf "Starting wget... " cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" wget -e continue=on -bi "${TEMP_PREFIX}-newlist" -o "${TEMP_PREFIX}.log" fi } # initialize base variables and initial command check Init(){ # path initialization test -n "${ADDITIONAL_PATH}" && PATH="${ADDITIONAL_PATH}:${PATH}" export PATH # misc variables ISQUICK= ISNEW= [ $# -lt 2 ] && Err_Help case "$1" in check|fetch|quickfetch) echo "Starting..." JOB="$1" ;; *) Err_Help ;; esac shift SITE= TAGS= while [ "${1}" ]; do case "$1" in -s|--site) shift SITE="$1" ;; -nc|--noclean) NOCLEAN=1 ;; *) if [ "${TAGS}" ]; then TAGS="$1 ${TAGS}" else TAGS="$1" fi ;; esac shift done test -n "${SITE}" || SITE=${DEFAULT_SITE} test -n "${TAGS}" || Err_Fatal "No tag specified" # Get base folder - default, current folder or fallback to ${HOME} test -n "${BASE_DIR}" || BASE_DIR=${PWD} test -n "${BASE_DIR}" || BASE_DIR=${HOME} test -n "$(echo "${BASE_DIR}" | cut -c1 | grep \/)" || BASE_DIR="/${BASE_DIR}" echo "Tags: ${TAGS}" # slash is not wanted for folder name TARGET_DIR=$(echo "${TAGS}" | sed -e 's/\//_/g') SITE_DIR=$(echo "${SITE}" | sed -e 's/\/$//g;s/\//_/g') TEMP_PREFIX="${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}" } # initialization Msg_Welcome Init "$@" Check_Tools Check_Folders # let's do the job! case "${JOB}" in check) Generate_Link Check_Files ;; fetch) Generate_Link Check_Files Fetch_Images ;; quickfetch) ISNEW=1 ISQUICK=1 Generate_Link Check_Files Fetch_Images ;; esac