Mercurial > ec-dotfiles
annotate moefetch.sh @ 159:75fe19903b74
Major cleanup
| author | edhoprima@gmail.com <edhoprima@gmail.com> |
|---|---|
| date | Fri, 05 Jun 2009 15:20:36 +0000 |
| parents | cba73f6a96bb |
| children | 68227a30d0b3 |
| rev | line source |
|---|---|
| 148 | 1 #!/bin/sh |
| 2 | |
| 3 # Copyright (c) 2009, edogawaconan <me@myconan.net> | |
| 4 # | |
| 5 # Permission to use, copy, modify, and/or distribute this software for any | |
| 6 # purpose with or without fee is hereby granted, provided that the above | |
| 7 # copyright notice and this permission notice appear in all copies. | |
| 8 # | |
| 9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
| 10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
| 11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
| 12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
| 13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
| 14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
| 15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
| 159 | 16 # |
| 17 # Lots of bugs here. Use with care | |
| 148 | 18 # USE WITH CARE |
| 159 | 19 # |
| 20 # what it does: fetch every picture that has the specified TAGS. | |
| 148 | 21 # requirement: wget, libxslt, md5sum (or md5) |
| 22 | |
| 159 | 23 # program additional paths for: cut, sed, wc, MD5(sum), wget, xsltproc, grep |
| 24 ADDITIONAL_PATH= | |
| 148 | 25 |
| 159 | 26 # custom md5 path with arguments, expected output: <32digit md5><space(s)><filename> |
| 27 # Leave empty for "md5sum" (Linux, Solaris), "md5 -r" (*BSD) | |
| 28 MD5= | |
| 148 | 29 |
| 159 | 30 # default server address. Danbooru only! I do not take responsibility of stupidity. |
| 31 DEFAULT_SITE="moe.imouto.org" | |
| 148 | 32 |
| 33 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one. | |
| 159 | 34 # Structure is ${BASE_DIR}/<TAGS> |
| 35 # Absolute path only. | |
| 36 # Leave empty to use whatever folder you're running this at | |
| 37 BASE_DIR="" | |
| 148 | 38 |
| 39 # not user modifiable from here | |
| 40 | |
| 159 | 41 # useless welcome message. Also version |
| 42 Msg_Welcome() { | |
| 43 MOEFETCHVERSION="0.1-beta" | |
| 44 cat <<EOF | |
| 45 moefetch ${MOEFETCHVERSION} | |
| 46 Copyright (c) 2009 edogawaconan <me@myconan.net> | |
| 47 | |
| 48 EOF | |
| 49 } | |
| 50 | |
| 51 # fatal error handler | |
| 52 Err_Fatal() { | |
| 53 echo "Fatal error: ${1}" | |
| 54 exit 1 | |
| 55 } | |
| 56 | |
| 57 # help message | |
| 58 Err_Help() { | |
| 59 cat <<EOF | |
| 60 Usage: moefetch (quick)fetch|status <TAGS> | |
| 61 EOF | |
| 62 exit 0 | |
| 63 } | |
| 64 | |
| 65 # generate link by transforming xml | |
| 66 Generate_Link() { | |
| 67 cd "${BASE_DIR}/temp" | |
| 148 | 68 echo |
| 69 echo "Fetching xml file" | |
| 159 | 70 wget "http://${SITE}/post/index.xml?tags=${TAGS}&offset=0&limit=100000" -O "${SITE_DIR}-${TARGET_DIR}-xml" -e continue=off |
| 148 | 71 echo "Processing XML file..." |
| 72 # xslt evilry | |
| 159 | 73 xsltproc - "${TARGET_DIR}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([jp][pn]g\)/\1\2.\3/g' | grep ^http > "${TARGET_DIR}-list" |
| 148 | 74 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> |
| 75 <xsl:output method="xml" indent="yes"/> | |
| 76 <xsl:template match="post"> | |
| 77 <xsl:value-of select="@file_url" /> | |
| 78 </xsl:template> | |
| 79 </xsl:stylesheet> | |
| 80 EOF | |
| 159 | 81 echo "`echo \`wc -l < \"${SITE_DIR}-${TARGET_DIR}-list\" \`` file(s) available on server" |
| 82 #output file: ${TARGET_DIR}-list | |
| 148 | 83 } |
| 84 | |
| 159 | 85 # check tools availability |
| 86 Check_Tools() { | |
| 87 # verify all programs required do indeed exist | |
| 88 #MD5 | |
| 89 if [ ! "${MD5}" ]; then | |
| 90 case `uname` in | |
| 91 *BSD) MD5="md5 -r";; | |
| 92 Linux|SunOS) MD5="md5sum";; | |
| 93 *) Fatal_Err "No known md5 tool for this platform. Please specify manually" | |
| 94 esac | |
| 95 fi | |
| 96 MD5_COMMAND=`echo ${MD5} | cut -d' ' -f1` | |
| 97 # basic tools | |
| 98 COMMANDS="cut sed wc wget xsltproc xargs rm mkdir chown comm grep ${MD5_COMMAND}" | |
| 99 for COMMAND in ${COMMANDS} | |
| 100 do | |
| 101 COMMAND_CHECK=`command -v "${COMMAND}"` | |
| 102 [ "${COMMAND_CHECK}" ] || Err_Fatal "${COMMAND} doesn't exist in ${PATH}" | |
| 103 done | |
| 104 | |
| 105 # grep checking | |
| 106 # originally created for workaround on solaris | |
| 107 #if [ `uname` = "SunOS" ]; then | |
| 108 FAIL="" | |
| 109 echo "blah" > superrandomtestfile | |
| 110 echo "blah" > superrandomtestfile.2 | |
| 111 grep -f superrandomtestfile.2 superrandomtestfile > /dev/null 2>&1 || FAIL=1 | |
| 112 rm -f superrandomtestfile superrandomtestfile.2 | |
| 113 [ "${FAIL}" ] && Err_Fatal "Your grep is not compatible. Please install or set path of correct grep" | |
| 114 } | |
| 115 | |
| 116 # verify required folders exist and writeable | |
| 117 Check_Folders(){ | |
| 118 [ -O "${BASE_DIR}" ] || Err_Fatal "You don't own ${BASE_DIR}. Please fix ${BASE_DIR}." | |
| 119 for FOLDER in temp trash deleted ${TARGET_DIR} | |
| 120 do | |
| 121 if [ ! -d "${BASE_DIR}/${FOLDER}" ]; then | |
| 122 mkdir "${BASE_DIR}/${FOLDER}" || Err_Fatal "${FOLDER} folder creation failed" | |
| 123 fi | |
| 124 if [ ! -O "${BASE_DIR}/${FOLDER}" ]; then | |
| 125 echo "You don't own the ${BASE_DIR}/{$FOLDER}, applying globally writeable permission on it" | |
| 126 chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${FOLDER}" || Err_Fatal "Error changing ownership. This shouldn't happen" | |
| 127 fi | |
| 128 done | |
| 129 [ `echo \`ls "${BASE_DIR}/${TARGET_DIR}" | wc -l\`` -eq 0 ] && ISNEW=1 | |
| 130 # let's move to workdir | |
| 131 cd "${BASE_DIR}/temp" | |
| 132 for i in error ok list newlist; do | |
| 133 touch "${TARGET_DIR}-${i}" || Fatal_Err "Error creating ${TARGET_DIR}-${i}. This shouldn't happen" | |
| 134 done | |
| 135 # | |
| 136 } | |
| 137 | |
| 138 # check files correctness | |
| 139 Check_Files() { | |
| 140 if [ "$ISNEW" -ne 1 ]; then | |
| 148 | 141 echo "Checking for errors..." |
| 142 # THE FILES | |
| 159 | 143 |
| 144 # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR} | |
| 145 cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | |
| 146 for TRASH in `ls * | sed -e 's/\([0-9a-f]\{32\}.*\)//g' | grep -v ^$` | |
| 148 | 147 do |
| 159 | 148 mv -f "${TRASH}" "${BASE_DIR}/trash" |
| 149 echo "Moved ${TRASH} to ${BASE_DIR}/trash" | |
| 150 done | |
| 151 printf "" > "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-error" | |
| 152 for FILE in * | |
| 153 do | |
| 154 if [ `${MD5} "${FILE}" | cut -d ' ' -f1 -` != `echo "${FILE}" | cut -d '.' -f1` ] | |
| 148 | 155 then |
| 159 | 156 echo |
| 157 echo "${FILE}" >> "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-error" | |
| 158 echo "Error: ${FILE}" | |
| 148 | 159 fi |
| 160 printf "." | |
| 161 done | |
| 162 echo | |
| 159 | 163 |
| 164 # current dir: ${BASE_DIR}/temp | |
| 165 cd ${BASE_DIR}/temp | |
| 166 TOTAL_ERROR=`echo \`wc -l < "${SITE_DIR}-${TARGET_DIR}-error"\`` | |
| 167 echo "${TOTAL_ERROR} file(s) error" | |
| 148 | 168 |
| 169 echo "Generating list of new files..." | |
| 170 # THE FILES | |
| 159 | 171 #ls "../${TARGET_DIR}" | grep -vf "${TARGET_DIR}-error" > "${TARGET_DIR}-ok" |
|
156
d3b002fd944e
fix: my attempt at speeding up things failed. reverting back to trusty grep -vf
edhoprima
parents:
155
diff
changeset
|
172 # |
| 159 | 173 ls "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | comm -1 -3 "${SITE_DIR}-${TARGET_DIR}-error" - > "${SITE_DIR}-${TARGET_DIR}-ok" |
| 174 cat "${SITE_DIR}-${TARGET_DIR}-list" | grep -vf "${SITE_DIR}-${TARGET_DIR}-ok" > "${SITE_DIR}-${TARGET_DIR}-newlist" | |
| 175 echo "`echo \`wc -l < \"${SITE_DIR}-${TARGET_DIR}-newlist\"\`` file(s) to be downloaded" | |
| 176 | |
| 177 # back to target dir | |
| 178 cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | |
| 148 | 179 |
| 180 echo "Removing error files" | |
| 159 | 181 if [ "${TOTAL_ERROR}" -eq 0 ]; then |
| 153 | 182 echo "No error file. 0 file removed" |
|
156
d3b002fd944e
fix: my attempt at speeding up things failed. reverting back to trusty grep -vf
edhoprima
parents:
155
diff
changeset
|
183 else |
| 159 | 184 cat "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-error" | xargs rm |
| 185 echo "${TOTAL_ERROR} file(s) removed" | |
| 148 | 186 fi |
| 187 echo "`echo \`ls | wc -l\`` file(s) available locally" | |
|
155
78ef726d3845
fix: whoops. Forgot to move back to workdir after
edhoprima
parents:
154
diff
changeset
|
188 cd ../temp |
| 148 | 189 else |
| 159 | 190 if [ "${ISQUICK}" ]; then |
| 152 | 191 echo "quick mode selected. Skipping check" |
| 192 else | |
| 193 echo "Empty local repository" | |
| 194 fi | |
| 159 | 195 cd "${BASE_DIR}/temp" |
| 196 cat "${SITE_DIR}-${TARGET_DIR}-list" > "${TARGET_DIR}-newlist" | |
| 148 | 197 fi |
| 198 } | |
| 199 | |
| 159 | 200 # start downloading the images |
| 201 Fetch_Images() { | |
| 202 if [ `echo \`wc -l < "${TARGET_DIR}-newlist"\`` -eq 0 ]; then | |
| 148 | 203 echo "No new file" |
| 204 else | |
| 205 echo "Starting wget" | |
| 159 | 206 cd "../${TARGET_DIR}" |
| 207 wget -e continue=on -bi "../temp/${TARGET_DIR}-newlist" -o "../temp/${TARGET_DIR}.log" | |
| 148 | 208 fi |
| 209 } | |
| 210 | |
| 159 | 211 # initialize base variables and initial command check |
| 212 Init(){ | |
| 213 # Get base folder - current folder or fallback to ${HOME} | |
| 214 [ "${BASE_DIR}" ] || BASE_DIR="${PWD}" | |
| 215 [ "${BASE_DIR}" ] || BASE_DIR="{$HOME}" | |
| 216 [ "`echo ${BASE_DIR} | cut -c1 | grep \/`" ] || BASE_DIR="/${BASE_DIR}" | |
| 217 # path initialization | |
| 218 [ "${ADDITIONAL_PATH}" ] && PATH=${ADDITIONAL_PATH}:${PATH} | |
| 219 export PATH | |
|
158
cba73f6a96bb
grep check. OpenSolaris' default grep doesn't support -f
edhoprima
parents:
157
diff
changeset
|
220 |
| 159 | 221 # misc variables |
| 222 ISQUICK="" | |
| 223 ISNEW="" | |
|
158
cba73f6a96bb
grep check. OpenSolaris' default grep doesn't support -f
edhoprima
parents:
157
diff
changeset
|
224 |
| 159 | 225 [ $# -lt 2 ] && Err_Help |
| 226 case "$1" in | |
| 227 status|fetch|quickfetch) | |
| 228 echo "Starting..." | |
| 229 JOB="$1" | |
| 230 ;; | |
| 231 *) | |
| 232 Err_Help | |
| 233 ;; | |
| 234 esac | |
| 235 shift | |
| 236 SITE= | |
| 237 case "$1" in | |
| 238 -s|--site) | |
| 239 shift | |
| 240 SITE="$1" | |
| 241 ;; | |
| 242 *) | |
| 243 SITE=DEFAULT_SITE | |
| 244 ;; | |
| 245 esac | |
| 246 shift | |
| 247 TAGS="$@" | |
| 248 echo "Tags: ${TAGS}" | |
| 249 # slash is not wanted for folder name | |
| 250 TARGET_DIR=`echo "${TAGS}" | sed -e 's/\//_/g'` | |
| 251 SITE_DIR=`echo "${SITE}" | sed -e 's/\//_/g'` | |
| 252 } | |
| 148 | 253 |
| 159 | 254 Msg_Welcome |
| 255 Init "$@" | |
| 256 Check_Tools | |
| 257 Check_Folders | |
|
158
cba73f6a96bb
grep check. OpenSolaris' default grep doesn't support -f
edhoprima
parents:
157
diff
changeset
|
258 |
| 148 | 259 |
| 159 | 260 # let's do the job! |
| 261 case "${JOB}" in | |
| 262 status) | |
| 263 Generate_Link | |
| 264 Check_Files | |
| 148 | 265 ;; |
| 159 | 266 fetch) |
| 267 Generate_Link | |
| 268 Check_Files | |
| 269 Fetch_Images | |
| 270 ;; | |
| 271 quickfetch) | |
| 272 ISNEW=1 | |
| 273 ISQUICK=1 | |
| 274 Generate_Link | |
| 275 Check_Files | |
| 276 Fetch_Images | |
| 148 | 277 ;; |
| 278 esac |
