comparison bin/moefetch.sh @ 236:515ffebe9bba

Preparation for merge.
author Edho Arief <edho@myconan.net>
date Sun, 02 Oct 2011 04:32:15 +0700
parents moefetch.sh@649b7d4b056a
children d7e5a2e70cf3
comparison
equal deleted inserted replaced
235:649b7d4b056a 236:515ffebe9bba
1 #!/bin/sh
2
3 # Copyright (c) 2009, edogawaconan <me@myconan.net>
4 #
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
8 #
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 #
17 # Lots of bugs here. Use with care
18 # USE WITH CARE
19 #
20 # what it does: fetch every picture that has the specified TAGS.
21 # requirement: wget, libxslt, openssl
22
23 # program additional paths for: cut, sed, wc, openssl, wget, xsltproc, grep
24 ADDITIONAL_PATH=
25
26 # default server address. Danbooru only! I do not take responsibility of stupidity.
27 DEFAULT_SITE="moe.imouto.org"
28
29 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one.
30 # Structure is ${BASE_DIR}/<TAGS>
31 # Absolute path only.
32 # Leave empty to use whatever folder you're running this at
33 BASE_DIR=
34
35 # not user modifiable from here
36
37 # useless welcome message. Also version
38 msg_welcome() {
39 echo "moefetch ${_version}
40 Copyright (c) 2009 edogawaconan <me@myconan.net>
41 "
42 }
43
44 # Sanitize path. Totally safe. Usage: cmd "$(safe_path "${filename}")"
45 safe_path()
46 {
47 # It all depends on the first character.
48 start=$(printf "%s" "$*" | cut -c 1)
49 path=
50 case "${start}" in
51 .|/) path="$*";; # . and / is safe. No change.
52 *) path="./$*";; # Anything else must be prefixed with ./
53 esac
54 printf "%s" "${path}" # Return.
55 }
56
57 # Checks md5. OpenSSL should be available on anything usable.
58 get_md5() { cat "$(safe_path "${1}")" | openssl dgst -md5 | tail -n 1 | sed -e 's/.*\([[:xdigit:]]\{32\}\).*/\1/'; }
59
60 # Safely get basename.
61 get_basename() { basename "$(safe_path "${1}")"; }
62
63 # Safely get filename (basename without the extension).
64 get_filename() { get_basename "${1%.*}"; }
65
66 # Transformation for tag url.
67 get_cleantags() { printf "%s " "$*" | sed -e 's/\&/%26/g;s/=/%3D/g'; }
68
69 # Returns something if not an md5 value.
70 is_not_md5() { get_filename "$1" | sed -e 's/\([0-9a-f]\{32\}\)//g'; }
71
72
73 # fatal error handler
74 Err_Fatal() {
75 echo "
76 Fatal error: ${1}"
77 exit 1
78 }
79
80 Err_Impossible() {
81 echo "
82 Impossible error. Or you modified content of the working directories when the script is running.
83 Please report to moefetch.googlecode.com if you see this message (complete with entire run log)"
84 exit 1
85 }
86
87 # help message
88 Err_Help() {
89 echo "moefetch.sh COMMAND [-n] [-p PASSWORD] [-s SITE_URL] [-u USERNAME] TAGS
90
91 COMMAND:
92 (quick)fetch:
93 Do a complete update. Add prefix quick to skip file checking
94 check:
95 Get list of new files, clean up local folder and print total new files
96
97 OPTIONS:
98 -n:
99 Skip checking repository directory.
100 -p PASSWORD:
101 Specifies password for login.
102 -s SITE_URL:
103 Specify URL of the Danbooru powered site you want to leech from. Default is ${DEFAULT_SITE}.
104 -u USERNAME:
105 Specifies username for login.
106 TAGS:
107 Tags you want to download. Separated by spaces. Tag name follows standard Danbooru tagging scheme."
108 exit 2
109 }
110
111 # generate link by transforming xml
112 Generate_Link() {
113 echo "
114 Fetching XML file"
115 tempnum=1000
116 iternum=1
117 > "${TEMP_PREFIX}-list"
118 while [ "${tempnum}" -ge 1000 ]; do
119 url="http://${SITE}/post/index.xml?tags=$(get_cleantags "${TAGS}")&offset=0&limit=1000&page=${iternum}"
120 [ ${_use_login} -eq 1 ] && url="${url}&login=${LOGIN_USER}&password_hash=${LOGIN_PASS}"
121 wget --quiet "${url}" -O "${TEMP_PREFIX}-xml" -e continue=off || Err_Fatal "Failed download catalog file"
122 printf "Processing XML file... "
123 # xslt evilry
124 xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-templist"
125 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
126 <xsl:output method="xml" indent="yes"/>
127 <xsl:template match="post">
128 <xsl:value-of select="@file_url" />
129 </xsl:template>
130 </xsl:stylesheet>
131 EOF
132 tempnum=$(grep -c . "${TEMP_PREFIX}-templist")
133 iternum=$((iternum + 1))
134 cat "${TEMP_PREFIX}-templist" >> "${TEMP_PREFIX}-list"
135 echo "${tempnum} file(s) available"
136 done
137 numfiles=$(grep -c . "${TEMP_PREFIX}-list")
138 echo "${numfiles} file(s) available on server"
139 [ "${numfiles}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site."
140 }
141
142
143 progress_init() {
144 _last="-"
145 printf "${_last}"
146 }
147
148 progress_anim() {
149 case "${_last}" in
150 /) _last="-";;
151 -) _last=\\;;
152 \\) _last=\|;;
153 \|) _last="/";;
154 esac
155 printf "\b${_last}"
156 }
157
158 progress_done() { printf "\bdone\n"; }
159
160 # getting rid of ls (as per suggestion)
161 Count_Files() {
162 numfiles=0
163 for dircontent in "${*}/"* "${*}/".*; do
164 if [ "${dircontent}" != "${*}/*" ] || [ -e "${dircontent}" ]; then
165 numfiles=$((numfiles + 1))
166 fi
167 done
168 echo $((numfiles - 2))
169 }
170
171 # check tools availability
172 Check_Tools() {
173 # verify all programs required do indeed exist
174 commands="cut sed wc wget xsltproc xargs rm mkdir chown comm grep date openssl"
175 for cmd in ${commands}
176 do
177 [ "$(command -v "${cmd}")" ] || Err_Fatal "${cmd} doesn't exist in ${PATH}"
178 done
179 }
180
181 # verify required folders exist and writeable
182 Check_Folders(){
183 [ -O "${BASE_DIR}" ] || Err_Fatal "You don't own ${BASE_DIR}. Please fix ${BASE_DIR} or run this script in your own directory."
184 for directory in temp trash deleted "${SITE_DIR}/${TARGET_DIR}"; do
185 if [ ! -d "${BASE_DIR}/${directory}" ]; then
186 mkdir -p "${BASE_DIR}/${directory}" || Err_Impossible
187 fi
188 if [ ! -O "${BASE_DIR}/${directory}" ]; then
189 echo "You don't own the ${BASE_DIR}/${directory}, applying globally writeable permission on it"
190 chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible
191 fi
192 done
193 [ "$(Count_Files "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}")" -eq 0 ] && ISNEW=1
194 for i in error ok list newlist templist; do
195 touch "${TEMP_PREFIX}-${i}" || Fatal_Err "Error creating ${TEMP_PREFIX}-${i}. This shouldn't happen"
196 done
197 #
198 }
199
200 # Do some cleanup
201 Cleanup_Repository() {
202 # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}
203 printf "Cleaning up repository folder... "
204 progress_init
205 trash_dir="${BASE_DIR}/trash/${trash_dir}/$(date -u "+${SITE_DIR}-${TARGET_DIR}-%Y%m%d-%H.%M")"
206 trashes="These files have been moved to ${trash_dir}:"
207 has_trash=
208 if [ ! -d "${trash_dir}" ]; then
209 mkdir -p "${trash_dir}" || Err_Impossible
210 else
211 if [ ! -O "${trash_dir}" ]; then
212 chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible
213 fi
214 fi
215 for trash in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
216 do
217 is_trash=
218 if [ -d "${trash}" ] || [ -n "$(is_not_md5 "${trash}")" ] || [ -z "$(grep "$(get_basename "${trash}")" "${TEMP_PREFIX}-list")" ]; then
219 is_trash=1
220 has_trash=1
221 mv -f -- "${trash}" "${trash_dir}" || Err_Impossible
222 trashes="${trashes}
223 $(get_basename "${trash}")"
224 fi
225 progress_anim
226 done
227 rmdir "${trash_dir}" 2>/dev/null
228 progress_done
229 [ -n "${has_trash}" ] && echo "${trashes}"
230 }
231
232 # check files correctness
233 Check_Files() {
234 if [ ! -n "${ISNEW}" ]; then
235 [ -z "${NOCLEAN}" ] && Cleanup_Repository
236 printf "Checking for errors... "
237 progress_init
238 files_error="These files do not match its md5:"
239 files_notdanbooru="These files are not checked:"
240 has_err_filename=
241 has_err_md5=
242 > "${TEMP_PREFIX}-error"
243 > "${TEMP_PREFIX}-ok"
244 for file in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
245 do
246 if [ "${file}" != "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/*" ]; then
247 if [ -n "$(is_not_md5 "${file}")" ] || [ -d "${file}" ]; then
248 files_notdanbooru="${files_notdanbooru}
249 $(get_basename "${file}")"
250 has_err_filename=1
251 else
252 if [ "$(get_md5 "${file}")" = "$(get_filename "${file}")" ]; then
253 echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-ok"
254 else
255 rm "${file}" || Err_Fatal "Error removing ${file}"
256 echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-error"
257 files_error="${files_error}
258 $(get_basename "${file}")"
259 has_err_md5=1
260 fi
261 fi
262 fi
263 progress_anim
264 done
265 progress_done
266 if [ ! -n "${has_err_md5}" ] && [ ! -n "${has_err_filename}" ]; then
267 echo "All files OK"
268 else
269 if [ -n "${has_err_md5}" ]; then
270 echo "${files_error}"
271 echo "$(grep -c . "${TEMP_PREFIX}-error") file(s) removed"
272 fi
273 [ -n "${has_err_filename}" ] && echo "${files_notdanbooru}"
274 fi
275 echo "$(grep -c . "${TEMP_PREFIX}-ok") file(s) available locally"
276
277 printf "Generating list of new files... "
278 progress_init
279 cp -f "${TEMP_PREFIX}-list" "${TEMP_PREFIX}-templist"
280 while read -r is_ok; do
281 grep -v "${is_ok}" "${TEMP_PREFIX}-templist" > "${TEMP_PREFIX}-newlist"
282 cp -f "${TEMP_PREFIX}-newlist" "${TEMP_PREFIX}-templist" || Err_Impossible
283 progress_anim
284 done < "${TEMP_PREFIX}-ok"
285 progress_done
286 echo "$(grep -c . "${TEMP_PREFIX}-newlist") file(s) to be downloaded"
287 else
288 if [ -n "${ISQUICK}" ]; then
289 echo "Quick mode selected. Skipping check"
290 else
291 echo "Empty local repository"
292 fi
293 cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-newlist"
294 fi
295 }
296
297 # start downloading the images
298 Fetch_Images() {
299 if [ "$(grep -c . "${TEMP_PREFIX}-newlist")" -eq 0 ]; then
300 echo "No new file"
301 else
302 printf "Downloading files... "
303 cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}"
304 wget -e continue=on -i "${TEMP_PREFIX}-newlist" -o "${TEMP_PREFIX}.log"
305 fi
306 }
307
308 # initialize base variables and initial command check
309 init()
310 {
311 # path initialization
312 # check if additional path is specified
313 if [ -n "${ADDITIONAL_PATH}" ]
314 then
315 # insert the additional path
316 PATH="${ADDITIONAL_PATH}:${PATH}"
317 export PATH
318 fi
319
320 # misc variables
321 ISQUICK=
322 ISNEW=
323
324 # minimum number of arguments: 2 (command and tag). If less than two, exit and print help message
325 [ $# -lt 2 ] && Err_Help
326 case "$1" in
327 check|fetch|quickfetch)
328 echo "Starting..."
329 JOB="$1"
330 ;;
331 *)
332 Err_Help
333 ;;
334 esac
335 shift
336 SITE=
337 TAGS=
338 has_pass=0
339 has_user=0
340 x=1
341 while getopts "s:nu:p:" opt
342 do
343 case "$opt" in
344 s) SITE="$OPTARG";;
345 n) NOCLEAN=1;;
346 p)
347 LOGIN_PASS=$(printf "%s" "$OPTARG" | openssl dgst -sha1 | sed -e 's/.*\([[:xdigit:]]\{40\}\).*/\1/')
348 has_pass=1
349 ;;
350 u)
351 LOGIN_USER="$OPTARG"
352 has_user=1
353 ;;
354 esac
355 x=$OPTIND
356 done
357 shift $(($x-1))
358 if [ "$1" = -- ]; then shift; fi
359 TAGS="$@"
360 [ -n "${SITE}" ] || SITE=${DEFAULT_SITE}
361 [ -n "${TAGS}" ] || Err_Fatal "No tag specified"
362 # Get base folder - default, current folder or fallback to ${HOME}
363 [ -n "${BASE_DIR}" ] || BASE_DIR=${PWD}
364 [ -n "${BASE_DIR}" ] || BASE_DIR=${HOME}
365 [ -n "$(echo "${BASE_DIR}" | cut -c1 | grep \/)" ] || BASE_DIR="/${BASE_DIR}"
366 # see if both pass and use are set. If they're set, switch _use_login variable content to 1.
367 [ ${has_pass} -eq 1 -a ${has_user} -eq 1 ] && _use_login=1
368
369 echo "Tags: ${TAGS}"
370 # slash is not wanted for folder name
371 TARGET_DIR=$(echo "${TAGS}" | sed -e 's/\//_/g')
372 SITE_DIR=$(echo "${SITE}" | sed -e 's/\/$//g;s/\//_/g')
373 TEMP_PREFIX="${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}"
374 }
375
376 # global variables goes here
377 init_globals()
378 {
379 _version="1.0-rc2" # version of this script
380 _use_login=0 # variable to check whether a login is used or not
381 }
382
383 main()
384 {
385 # removing GNU-ism as much as possible
386 POSIXLY_CORRECT=1
387 #initialize global variables
388 init_globals
389 #print welcome message
390 msg_welcome
391 # initialization
392 init "$@"
393 Check_Tools
394 Check_Folders
395
396
397 # let's do the job!
398 case "${JOB}" in
399 check)
400 Generate_Link
401 Check_Files
402 ;;
403 fetch)
404 Generate_Link
405 Check_Files
406 Fetch_Images
407 ;;
408 quickfetch)
409 ISNEW=1
410 ISQUICK=1
411 Generate_Link
412 Check_Files
413 Fetch_Images
414 ;;
415 esac
416 }
417
418 # call the main routine!
419 main "$@"
420