Mercurial > ec-dotfiles
view moefetch.sh @ 168:2781576c06a5
cleanup
author | edhoprima@gmail.com <edhoprima@gmail.com> |
---|---|
date | Fri, 05 Jun 2009 19:31:19 +0000 |
parents | 78ac6fd03e3a |
children | b9a49b36e4ab |
line wrap: on
line source
#!/bin/sh # Copyright (c) 2009, edogawaconan <me@myconan.net> # # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. # # Lots of bugs here. Use with care # USE WITH CARE # # what it does: fetch every picture that has the specified TAGS. # requirement: wget, libxslt, md5sum (or md5) # program additional paths for: cut, sed, wc, MD5(sum), wget, xsltproc, grep ADDITIONAL_PATH= # custom md5 path with arguments, expected output: <32digit md5><space(s)><filename> # Leave empty for "md5sum" (Linux, Solaris), "md5 -r" (*BSD) MD5= # default server address. Danbooru only! I do not take responsibility of stupidity. DEFAULT_SITE="moe.imouto.org" # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one. # Structure is ${BASE_DIR}/<TAGS> # Absolute path only. # Leave empty to use whatever folder you're running this at BASE_DIR="" # not user modifiable from here # useless welcome message. Also version Msg_Welcome() { MOEFETCHVERSION="0.1-beta" cat <<EOF moefetch ${MOEFETCHVERSION} Copyright (c) 2009 edogawaconan <me@myconan.net> EOF } # fatal error handler Err_Fatal() { echo "Fatal error: ${1}" exit 1 } # help message Err_Help() { cat <<EOF Usage: moefetch (quick)fetch|status <TAGS> EOF exit 0 } # generate link by transforming xml Generate_Link() { cd "${BASE_DIR}/temp" echo echo "Fetching xml file" wget "http://${SITE}/post/index.xml?tags=${TAGS}&offset=0&limit=100000" -O "${SITE_DIR}-${TARGET_DIR}-xml" -e continue=off echo "Processing XML file..." # xslt evilry xsltproc - "${SITE_DIR}-${TARGET_DIR}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([jp][pn]g\)/\1\2.\3/g' | grep ^http > "${SITE_DIR}-${TARGET_DIR}-list" <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> <xsl:output method="xml" indent="yes"/> <xsl:template match="post"> <xsl:value-of select="@file_url" /> </xsl:template> </xsl:stylesheet> EOF echo "`echo \`wc -l < \"${SITE_DIR}-${TARGET_DIR}-list\" \`` file(s) available on server" #output file: ${TARGET_DIR}-list } # check tools availability Check_Tools() { # verify all programs required do indeed exist #MD5 if [ ! "${MD5}" ]; then case `uname` in *BSD) MD5="md5 -r";; Linux|SunOS) MD5="md5sum";; *) Fatal_Err "No known md5 tool for this platform. Please specify manually" esac fi MD5_COMMAND=`echo ${MD5} | cut -d' ' -f1` # basic tools COMMANDS="cut sed wc wget xsltproc xargs rm mkdir chown comm grep ${MD5_COMMAND}" for COMMAND in ${COMMANDS} do COMMAND_CHECK=`command -v "${COMMAND}"` [ "${COMMAND_CHECK}" ] || Err_Fatal "${COMMAND} doesn't exist in ${PATH}" done # grep checking # originally created for workaround on solaris #if [ `uname` = "SunOS" ]; then FAIL="" echo "blah" > superrandomtestfile echo "blah" > superrandomtestfile.2 grep -f superrandomtestfile.2 superrandomtestfile > /dev/null 2>&1 || FAIL=1 rm -f superrandomtestfile superrandomtestfile.2 [ "${FAIL}" ] && Err_Fatal "Your grep is not compatible. Please install or set path of correct grep" } # verify required folders exist and writeable Check_Folders(){ [ -O "${BASE_DIR}" ] || Err_Fatal "You don't own ${BASE_DIR}. Please fix ${BASE_DIR}." for FOLDER in temp trash deleted ${SITE_DIR}/${TARGET_DIR} do if [ ! -d "${BASE_DIR}/${FOLDER}" ]; then mkdir "${BASE_DIR}/${FOLDER}" || Err_Fatal "${FOLDER} folder creation failed" fi if [ ! -O "${BASE_DIR}/${FOLDER}" ]; then echo "You don't own the ${BASE_DIR}/${FOLDER}, applying globally writeable permission on it" chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${FOLDER}" || Err_Fatal "Error changing ownership. This shouldn't happen" fi done [ `echo \`ls "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | wc -l\`` -eq 0 ] && ISNEW=1 # let's move to workdir cd "${BASE_DIR}/temp" for i in error ok list newlist; do touch "${SITE_DIR}-${TARGET_DIR}-${i}" || Fatal_Err "Error creating ${TARGET_DIR}-${i}. This shouldn't happen" done # } # check files correctness Check_Files() { if [ ! "${ISNEW}" ]; then echo "Checking for errors..." # THE FILES # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR} cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" for TRASH in `ls * | sed -e 's/\([0-9a-f]\{32\}.*\)//g' | grep -v ^$` do mv -f "${TRASH}" "${BASE_DIR}/trash" echo "Moved ${TRASH} to ${BASE_DIR}/trash" done printf "" > "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-error" for FILE in * do if [ `${MD5} "${FILE}" | cut -d ' ' -f1 -` != `echo "${FILE}" | cut -d '.' -f1` ] then echo echo "${FILE}" >> "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-error" echo "Error: ${FILE}" fi printf "." done echo # current dir: ${BASE_DIR}/temp cd ${BASE_DIR}/temp TOTAL_ERROR=`echo \`wc -l < "${SITE_DIR}-${TARGET_DIR}-error"\`` echo "${TOTAL_ERROR} file(s) error" echo "Generating list of new files..." # THE FILES #ls "../${TARGET_DIR}" | grep -vf "${TARGET_DIR}-error" > "${TARGET_DIR}-ok" # ls "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | comm -1 -3 "${SITE_DIR}-${TARGET_DIR}-error" - > "${SITE_DIR}-${TARGET_DIR}-ok" cat "${SITE_DIR}-${TARGET_DIR}-list" | grep -vf "${SITE_DIR}-${TARGET_DIR}-ok" > "${SITE_DIR}-${TARGET_DIR}-newlist" echo "`echo \`wc -l < \"${SITE_DIR}-${TARGET_DIR}-newlist\"\`` file(s) to be downloaded" # back to target dir cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" echo "Removing error files" if [ "${TOTAL_ERROR}" -eq 0 ]; then echo "No error file. 0 file removed" else cat "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-error" | xargs rm echo "${TOTAL_ERROR} file(s) removed" fi echo "`echo \`ls | wc -l\`` file(s) available locally" else if [ "${ISQUICK}" ]; then echo "quick mode selected. Skipping check" else echo "Empty local repository" fi cd "${BASE_DIR}/temp" cat "${SITE_DIR}-${TARGET_DIR}-list" > "${SITE_DIR}-${TARGET_DIR}-newlist" fi } # start downloading the images Fetch_Images() { cd "${BASE_DIR}/temp" if [ `echo \`wc -l < "${SITE_DIR}-${TARGET_DIR}-newlist"\`` -eq 0 ]; then echo "No new file" else echo "Starting wget" cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" wget -e continue=on -bi "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-newlist" -o "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}.log" fi } # initialize base variables and initial command check Init(){ # Get base folder - current folder or fallback to ${HOME} [ "${BASE_DIR}" ] || BASE_DIR="${PWD}" [ "${BASE_DIR}" ] || BASE_DIR="{$HOME}" [ "`echo ${BASE_DIR} | cut -c1 | grep \/`" ] || BASE_DIR="/${BASE_DIR}" # path initialization [ "${ADDITIONAL_PATH}" ] && PATH=${ADDITIONAL_PATH}:${PATH} export PATH # misc variables ISQUICK= ISNEW= [ $# -lt 2 ] && Err_Help case "$1" in status|fetch|quickfetch) echo "Starting..." JOB="$1" ;; *) Err_Help ;; esac shift SITE= case "$1" in -s|--site) shift SITE="$1" shift ;; *) SITE="${DEFAULT_SITE}" ;; esac TAGS="$@" echo "Tags: ${TAGS}" # slash is not wanted for folder name TARGET_DIR="`echo "${TAGS}" | sed -e 's/\//_/g'`" SITE_DIR="`echo "${SITE}" | sed -e 's/\/$//g;s/\//_/g'`" } Msg_Welcome Init "$@" Check_Tools Check_Folders # let's do the job! case "${JOB}" in status) Generate_Link Check_Files ;; fetch) Generate_Link Check_Files Fetch_Images ;; quickfetch) ISNEW=1 ISQUICK=1 Generate_Link Check_Files Fetch_Images ;; esac