comparison bin/moefetch @ 311:dd2ddddf00d5

Merge.
author Edho Arief <edho@myconan.net>
date Wed, 07 Mar 2012 14:17:51 +0700
parents 21b86001b0c5
children 110d50856dde
comparison
equal deleted inserted replaced
283:108e05eb9b5c 311:dd2ddddf00d5
1 #!/bin/sh
2
3 # Copyright (c) 2009-2012, edogawaconan <edho@myconan.net>
4 #
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
8 #
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 #
17 # Lots of bugs here. Use with care
18 # USE WITH CARE
19 #
20 # what it does: fetch every picture that has the specified TAGS.
21 # requirement: wget, libxslt, openssl
22
23 # program additional paths for: cut, sed, wc, openssl, wget, xsltproc, grep
24 ADDITIONAL_PATH=
25
26 # default server address. Danbooru only! I do not take responsibility of stupidity.
27 DEFAULT_SITE="moe.imouto.org"
28
29 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one.
30 # Structure is ${BASE_DIR}/<TAGS>
31 # Absolute path only.
32 # Leave empty to use whatever folder you're running this at
33 BASE_DIR=
34
35 # not user modifiable from here
36
37 # stop on any error
38 set -e
39 # ensures all variables initialized
40 set -u
41 useragent="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0) Gecko/20100101 Firefox/10.0"
42
43 # useless welcome message. Also version
44 msg_welcome() {
45 echo "moefetch ${_version}
46 Copyright (c) 2009-2012 edogawaconan <edho@myconan.net>
47 "
48 }
49
50 # Sanitize path. Totally safe. Usage: cmd "$(safe_path "${filename}")"
51 safe_path()
52 {
53 # It all depends on the first character.
54 start=$(printf "%s" "$*" | cut -c 1)
55 path=
56 case "${start}" in
57 .|/) path="$*";; # . and / is safe. No change.
58 *) path="./$*";; # Anything else must be prefixed with ./
59 esac
60 printf "%s" "${path}" # Return.
61 }
62
63 # Checks md5. OpenSSL should be available on anything usable.
64 get_md5() { cat "$(safe_path "${1}")" | openssl dgst -md5 | tail -n 1 | sed -e 's/.*\([[:xdigit:]]\{32\}\).*/\1/'; }
65
66 # Safely get basename.
67 get_basename() { basename "$(safe_path "${1}")"; }
68
69 # Safely get filename (basename without the extension).
70 get_filename() { get_basename "${1%.*}"; }
71
72 # Transformation for tag url.
73 get_cleantags() { printf "%s " "$*" | sed -e 's/\&/%26/g;s/=/%3D/g'; }
74
75 # Returns something if not an md5 value.
76 is_not_md5() { get_filename "$1" | sed -e 's/\([0-9a-f]\{32\}\)//g'; }
77
78
79 # fatal error handler
80 Err_Fatal() {
81 echo "
82 Fatal error: ${1}"
83 exit 1
84 }
85
86 Err_Impossible() {
87 echo "
88 Impossible error. Or you modified content of the working directories when the script is running.
89 Please report to moefetch.googlecode.com if you see this message (complete with entire run log)"
90 exit 1
91 }
92
93 # help message
94 Err_Help() {
95 echo "moefetch.sh COMMAND [-n] [-p PASSWORD] [-s SITE_URL] [-u USERNAME] TAGS
96
97 COMMAND:
98 (quick)fetch:
99 Do a complete update. Add prefix quick to skip file checking
100 check:
101 Get list of new files, clean up local folder and print total new files
102
103 OPTIONS:
104 -n:
105 Skip checking repository directory.
106 -p PASSWORD:
107 Specifies password for login.
108 -s SITE_URL:
109 Specify URL of the Danbooru powered site you want to leech from. Default is ${DEFAULT_SITE}.
110 -u USERNAME:
111 Specifies username for login.
112 TAGS:
113 Tags you want to download. Separated by spaces. Tag name follows standard Danbooru tagging scheme."
114 exit 2
115 }
116
117 # generate link by transforming xml
118 Generate_Link() {
119 echo "
120 Fetching XML file"
121 tempnum=1000
122 iternum=1
123 > "${TEMP_PREFIX}-list"
124 while [ "${tempnum}" -ge 1000 ]; do
125 url="http://${SITE}/post/index.xml?tags=$(get_cleantags "${TAGS}")&offset=0&limit=1000&page=${iternum}"
126 [ ${_use_login} -eq 1 ] && url="${url}&login=${LOGIN_USER}&password_hash=${LOGIN_PASS}"
127 wget --quiet "${url}" -O "${TEMP_PREFIX}-xml" --referer="http://${SITE}/post" --user-agent="${useragent}" -e continue=off || Err_Fatal "Failed download catalog file"
128 printf "Processing XML file... "
129 # xslt evilry
130 xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-templist"
131 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
132 <xsl:output method="xml" indent="yes"/>
133 <xsl:template match="post">
134 <xsl:value-of select="@file_url" />
135 </xsl:template>
136 </xsl:stylesheet>
137 EOF
138 tempnum=$(grep -c . "${TEMP_PREFIX}-templist")
139 iternum=$((iternum + 1))
140 cat "${TEMP_PREFIX}-templist" >> "${TEMP_PREFIX}-list"
141 echo "${tempnum} file(s) available"
142 done
143 numfiles=$(grep -c . "${TEMP_PREFIX}-list")
144 echo "${numfiles} file(s) available on server"
145 [ "${numfiles}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site."
146 }
147
148
149 progress_init() {
150 _last="-"
151 printf "${_last}"
152 }
153
154 progress_anim() {
155 case "${_last}" in
156 /) _last="-";;
157 -) _last=\\;;
158 \\) _last=\|;;
159 \|) _last="/";;
160 esac
161 printf "\b${_last}"
162 }
163
164 progress_done() { printf "\bdone\n"; }
165
166 # getting rid of ls (as per suggestion)
167 Count_Files() {
168 numfiles=0
169 for dircontent in "${*}/"* "${*}/".*; do
170 if [ -e "${dircontent}" ] && [ x"${dircontent}" != x"${*}/." ] && [ x"${dircontent}" != x"${*}/.." ]; then
171 numfiles=$((numfiles + 1))
172 fi
173 done
174 echo $((numfiles))
175 }
176
177 # check tools availability
178 Check_Tools() {
179 # verify all programs required do indeed exist
180 commands="cut sed wc wget xsltproc xargs rm mkdir chown comm grep date openssl"
181 for cmd in ${commands}
182 do
183 [ "$(command -v "${cmd}")" ] || Err_Fatal "${cmd} doesn't exist in ${PATH}"
184 done
185 }
186
187 # verify required folders exist and writeable
188 Check_Folders(){
189 [ -O "${BASE_DIR}" ] || Err_Fatal "You don't own ${BASE_DIR}. Please fix ${BASE_DIR} or run this script in your own directory."
190 for directory in temp trash deleted "${SITE_DIR}/${TARGET_DIR}"; do
191 if [ ! -d "${BASE_DIR}/${directory}" ]; then
192 mkdir -p "${BASE_DIR}/${directory}" || Err_Impossible
193 fi
194 if [ ! -O "${BASE_DIR}/${directory}" ]; then
195 echo "You don't own the ${BASE_DIR}/${directory}, applying globally writeable permission on it"
196 chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible
197 fi
198 done
199 [ "$(Count_Files "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}")" -eq 0 ] && ISNEW=1
200 for i in error ok list newlist templist; do
201 touch "${TEMP_PREFIX}-${i}" || Fatal_Err "Error creating ${TEMP_PREFIX}-${i}. This shouldn't happen"
202 done
203 #
204 }
205
206 # Do some cleanup
207 Cleanup_Repository() {
208 # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}
209 printf "Cleaning up repository folder... "
210 progress_init
211 trash_dir="${BASE_DIR}/trash/${trash_dir}/$(date -u "+${SITE_DIR}-${TARGET_DIR}-%Y%m%d-%H.%M")"
212 trashes="These files have been moved to ${trash_dir}:"
213 has_trash=
214 if [ ! -d "${trash_dir}" ]; then
215 mkdir -p "${trash_dir}" || Err_Impossible
216 else
217 if [ ! -O "${trash_dir}" ]; then
218 chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible
219 fi
220 fi
221 for trash in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
222 do
223 if [ -e "${trash}" ]; then
224 is_trash=
225 if [ -d "${trash}" ] || [ -n "$(is_not_md5 "${trash}")" ] || [ -z "$(grep "$(get_basename "${trash}")" "${TEMP_PREFIX}-list")" ]; then
226 is_trash=1
227 has_trash=1
228 mv -f -- "${trash}" "${trash_dir}" || Err_Impossible
229 trashes="${trashes}
230 $(get_basename "${trash}")"
231 fi
232 fi
233 progress_anim
234 done
235 rmdir "${trash_dir}" 2>/dev/null
236 progress_done
237 [ -n "${has_trash}" ] && echo "${trashes}"
238 }
239
240 # check files correctness
241 Check_Files() {
242 if [ ! -n "${ISNEW}" ]; then
243 [ -z "${NOCLEAN}" ] && Cleanup_Repository
244 printf "Checking for errors... "
245 progress_init
246 files_error="These files do not match its md5:"
247 files_notdanbooru="These files are not checked:"
248 has_err_filename=
249 has_err_md5=
250 > "${TEMP_PREFIX}-error"
251 > "${TEMP_PREFIX}-ok"
252 for file in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
253 do
254 if [ -e "${file}" ]; then
255 if [ -n "$(is_not_md5 "${file}")" ] || [ -d "${file}" ]; then
256 files_notdanbooru="${files_notdanbooru}
257 $(get_basename "${file}")"
258 has_err_filename=1
259 else
260 if [ "$(get_md5 "${file}")" = "$(get_filename "${file}")" ]; then
261 echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-ok"
262 else
263 rm "${file}" || Err_Fatal "Error removing ${file}"
264 echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-error"
265 files_error="${files_error}
266 $(get_basename "${file}")"
267 has_err_md5=1
268 fi
269 fi
270 fi
271 progress_anim
272 done
273 progress_done
274 if [ ! -n "${has_err_md5}" ] && [ ! -n "${has_err_filename}" ]; then
275 echo "All files OK"
276 else
277 if [ -n "${has_err_md5}" ]; then
278 echo "${files_error}"
279 echo "$(grep -c . "${TEMP_PREFIX}-error") file(s) removed"
280 fi
281 [ -n "${has_err_filename}" ] && echo "${files_notdanbooru}"
282 fi
283 echo "$(grep -c . "${TEMP_PREFIX}-ok") file(s) available locally"
284
285 printf "Generating list of new files... "
286 progress_init
287 cp -f "${TEMP_PREFIX}-list" "${TEMP_PREFIX}-templist"
288 while read -r is_ok; do
289 grep -v "${is_ok}" "${TEMP_PREFIX}-templist" > "${TEMP_PREFIX}-newlist"
290 cp -f "${TEMP_PREFIX}-newlist" "${TEMP_PREFIX}-templist" || Err_Impossible
291 progress_anim
292 done < "${TEMP_PREFIX}-ok"
293 progress_done
294 echo "$(grep -c . "${TEMP_PREFIX}-newlist") file(s) to be downloaded"
295 else
296 if [ -n "${ISQUICK}" ]; then
297 echo "Quick mode selected. Skipping check"
298 else
299 echo "Empty local repository"
300 fi
301 cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-newlist"
302 fi
303 }
304
305 # start downloading the images
306 Fetch_Images() {
307 if [ "$(grep -c . "${TEMP_PREFIX}-newlist")" -eq 0 ]; then
308 echo "No new file"
309 else
310 printf "Downloading files... "
311 cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}"
312 wget -e continue=on -i "${TEMP_PREFIX}-newlist" -o "${TEMP_PREFIX}.log" --referer="http://${SITE}/post" --user-agent="${useragent}"
313 fi
314 }
315
316 # initialize base variables and initial command check
317 init()
318 {
319 # path initialization
320 # check if additional path is specified
321 if [ -n "${ADDITIONAL_PATH}" ]
322 then
323 # insert the additional path
324 PATH="${ADDITIONAL_PATH}:${PATH}"
325 export PATH
326 fi
327
328 # misc variables
329 ISQUICK=
330 ISNEW=
331
332 # minimum number of arguments: 2 (command and tag). If less than two, exit and print help message
333 [ $# -lt 2 ] && Err_Help
334 case "$1" in
335 check|fetch|quickfetch)
336 echo "Starting..."
337 JOB="$1"
338 ;;
339 *)
340 Err_Help
341 ;;
342 esac
343 shift
344 SITE=
345 TAGS=
346 has_pass=0
347 has_user=0
348 x=1
349 while getopts "s:nu:p:" opt
350 do
351 case "$opt" in
352 s) SITE="$OPTARG";;
353 n) NOCLEAN=1;;
354 p)
355 LOGIN_PASS=$(printf "%s" "$OPTARG" | openssl dgst -sha1 | sed -e 's/.*\([[:xdigit:]]\{40\}\).*/\1/')
356 has_pass=1
357 ;;
358 u)
359 LOGIN_USER="$OPTARG"
360 has_user=1
361 ;;
362 esac
363 x=$OPTIND
364 done
365 shift $(($x-1))
366 if [ "$1" = -- ]; then shift; fi
367 TAGS="$@"
368 [ -n "${SITE}" ] || SITE=${DEFAULT_SITE}
369 [ -n "${TAGS}" ] || Err_Fatal "No tag specified"
370 # Get base folder - default, current folder or fallback to ${HOME}
371 [ -n "${BASE_DIR}" ] || BASE_DIR=${PWD}
372 [ -n "${BASE_DIR}" ] || BASE_DIR=${HOME}
373 [ -n "$(echo "${BASE_DIR}" | cut -c1 | grep \/)" ] || BASE_DIR="/${BASE_DIR}"
374 # see if both pass and use are set. If they're set, switch _use_login variable content to 1.
375 [ ${has_pass} -eq 1 -a ${has_user} -eq 1 ] && _use_login=1
376
377 echo "Tags: ${TAGS}"
378 # slash is not wanted for folder name
379 TARGET_DIR=$(echo "${TAGS}" | sed -e 's/\//_/g')
380 SITE_DIR=$(echo "${SITE}" | sed -e 's/\/$//g;s/\//_/g')
381 TEMP_PREFIX="${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}"
382 }
383
384 # global variables goes here
385 init_globals()
386 {
387 _version="1.0-rc3" # version of this script
388 _use_login=0 # variable to check whether a login is used or not
389 }
390
391 main()
392 {
393 # removing GNU-ism as much as possible
394 POSIXLY_CORRECT=1
395 #initialize global variables
396 init_globals
397 #print welcome message
398 msg_welcome
399 # initialization
400 init "$@"
401 Check_Tools
402 Check_Folders
403
404
405 # let's do the job!
406 case "${JOB}" in
407 check)
408 Generate_Link
409 Check_Files
410 ;;
411 fetch)
412 Generate_Link
413 Check_Files
414 Fetch_Images
415 ;;
416 quickfetch)
417 ISNEW=1
418 ISQUICK=1
419 Generate_Link
420 Check_Files
421 Fetch_Images
422 ;;
423 esac
424 }
425
426 # call the main routine!
427 main "$@"
428