Mercurial > ec-dotfiles
comparison bin/moefetch @ 311:dd2ddddf00d5
Merge.
author | Edho Arief <edho@myconan.net> |
---|---|
date | Wed, 07 Mar 2012 14:17:51 +0700 |
parents | 21b86001b0c5 |
children | 110d50856dde |
comparison
equal
deleted
inserted
replaced
283:108e05eb9b5c | 311:dd2ddddf00d5 |
---|---|
1 #!/bin/sh | |
2 | |
3 # Copyright (c) 2009-2012, edogawaconan <edho@myconan.net> | |
4 # | |
5 # Permission to use, copy, modify, and/or distribute this software for any | |
6 # purpose with or without fee is hereby granted, provided that the above | |
7 # copyright notice and this permission notice appear in all copies. | |
8 # | |
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
16 # | |
17 # Lots of bugs here. Use with care | |
18 # USE WITH CARE | |
19 # | |
20 # what it does: fetch every picture that has the specified TAGS. | |
21 # requirement: wget, libxslt, openssl | |
22 | |
23 # program additional paths for: cut, sed, wc, openssl, wget, xsltproc, grep | |
24 ADDITIONAL_PATH= | |
25 | |
26 # default server address. Danbooru only! I do not take responsibility of stupidity. | |
27 DEFAULT_SITE="moe.imouto.org" | |
28 | |
29 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one. | |
30 # Structure is ${BASE_DIR}/<TAGS> | |
31 # Absolute path only. | |
32 # Leave empty to use whatever folder you're running this at | |
33 BASE_DIR= | |
34 | |
35 # not user modifiable from here | |
36 | |
37 # stop on any error | |
38 set -e | |
39 # ensures all variables initialized | |
40 set -u | |
41 useragent="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0) Gecko/20100101 Firefox/10.0" | |
42 | |
43 # useless welcome message. Also version | |
44 msg_welcome() { | |
45 echo "moefetch ${_version} | |
46 Copyright (c) 2009-2012 edogawaconan <edho@myconan.net> | |
47 " | |
48 } | |
49 | |
50 # Sanitize path. Totally safe. Usage: cmd "$(safe_path "${filename}")" | |
51 safe_path() | |
52 { | |
53 # It all depends on the first character. | |
54 start=$(printf "%s" "$*" | cut -c 1) | |
55 path= | |
56 case "${start}" in | |
57 .|/) path="$*";; # . and / is safe. No change. | |
58 *) path="./$*";; # Anything else must be prefixed with ./ | |
59 esac | |
60 printf "%s" "${path}" # Return. | |
61 } | |
62 | |
63 # Checks md5. OpenSSL should be available on anything usable. | |
64 get_md5() { cat "$(safe_path "${1}")" | openssl dgst -md5 | tail -n 1 | sed -e 's/.*\([[:xdigit:]]\{32\}\).*/\1/'; } | |
65 | |
66 # Safely get basename. | |
67 get_basename() { basename "$(safe_path "${1}")"; } | |
68 | |
69 # Safely get filename (basename without the extension). | |
70 get_filename() { get_basename "${1%.*}"; } | |
71 | |
72 # Transformation for tag url. | |
73 get_cleantags() { printf "%s " "$*" | sed -e 's/\&/%26/g;s/=/%3D/g'; } | |
74 | |
75 # Returns something if not an md5 value. | |
76 is_not_md5() { get_filename "$1" | sed -e 's/\([0-9a-f]\{32\}\)//g'; } | |
77 | |
78 | |
79 # fatal error handler | |
80 Err_Fatal() { | |
81 echo " | |
82 Fatal error: ${1}" | |
83 exit 1 | |
84 } | |
85 | |
86 Err_Impossible() { | |
87 echo " | |
88 Impossible error. Or you modified content of the working directories when the script is running. | |
89 Please report to moefetch.googlecode.com if you see this message (complete with entire run log)" | |
90 exit 1 | |
91 } | |
92 | |
93 # help message | |
94 Err_Help() { | |
95 echo "moefetch.sh COMMAND [-n] [-p PASSWORD] [-s SITE_URL] [-u USERNAME] TAGS | |
96 | |
97 COMMAND: | |
98 (quick)fetch: | |
99 Do a complete update. Add prefix quick to skip file checking | |
100 check: | |
101 Get list of new files, clean up local folder and print total new files | |
102 | |
103 OPTIONS: | |
104 -n: | |
105 Skip checking repository directory. | |
106 -p PASSWORD: | |
107 Specifies password for login. | |
108 -s SITE_URL: | |
109 Specify URL of the Danbooru powered site you want to leech from. Default is ${DEFAULT_SITE}. | |
110 -u USERNAME: | |
111 Specifies username for login. | |
112 TAGS: | |
113 Tags you want to download. Separated by spaces. Tag name follows standard Danbooru tagging scheme." | |
114 exit 2 | |
115 } | |
116 | |
117 # generate link by transforming xml | |
118 Generate_Link() { | |
119 echo " | |
120 Fetching XML file" | |
121 tempnum=1000 | |
122 iternum=1 | |
123 > "${TEMP_PREFIX}-list" | |
124 while [ "${tempnum}" -ge 1000 ]; do | |
125 url="http://${SITE}/post/index.xml?tags=$(get_cleantags "${TAGS}")&offset=0&limit=1000&page=${iternum}" | |
126 [ ${_use_login} -eq 1 ] && url="${url}&login=${LOGIN_USER}&password_hash=${LOGIN_PASS}" | |
127 wget --quiet "${url}" -O "${TEMP_PREFIX}-xml" --referer="http://${SITE}/post" --user-agent="${useragent}" -e continue=off || Err_Fatal "Failed download catalog file" | |
128 printf "Processing XML file... " | |
129 # xslt evilry | |
130 xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-templist" | |
131 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> | |
132 <xsl:output method="xml" indent="yes"/> | |
133 <xsl:template match="post"> | |
134 <xsl:value-of select="@file_url" /> | |
135 </xsl:template> | |
136 </xsl:stylesheet> | |
137 EOF | |
138 tempnum=$(grep -c . "${TEMP_PREFIX}-templist") | |
139 iternum=$((iternum + 1)) | |
140 cat "${TEMP_PREFIX}-templist" >> "${TEMP_PREFIX}-list" | |
141 echo "${tempnum} file(s) available" | |
142 done | |
143 numfiles=$(grep -c . "${TEMP_PREFIX}-list") | |
144 echo "${numfiles} file(s) available on server" | |
145 [ "${numfiles}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site." | |
146 } | |
147 | |
148 | |
149 progress_init() { | |
150 _last="-" | |
151 printf "${_last}" | |
152 } | |
153 | |
154 progress_anim() { | |
155 case "${_last}" in | |
156 /) _last="-";; | |
157 -) _last=\\;; | |
158 \\) _last=\|;; | |
159 \|) _last="/";; | |
160 esac | |
161 printf "\b${_last}" | |
162 } | |
163 | |
164 progress_done() { printf "\bdone\n"; } | |
165 | |
166 # getting rid of ls (as per suggestion) | |
167 Count_Files() { | |
168 numfiles=0 | |
169 for dircontent in "${*}/"* "${*}/".*; do | |
170 if [ -e "${dircontent}" ] && [ x"${dircontent}" != x"${*}/." ] && [ x"${dircontent}" != x"${*}/.." ]; then | |
171 numfiles=$((numfiles + 1)) | |
172 fi | |
173 done | |
174 echo $((numfiles)) | |
175 } | |
176 | |
177 # check tools availability | |
178 Check_Tools() { | |
179 # verify all programs required do indeed exist | |
180 commands="cut sed wc wget xsltproc xargs rm mkdir chown comm grep date openssl" | |
181 for cmd in ${commands} | |
182 do | |
183 [ "$(command -v "${cmd}")" ] || Err_Fatal "${cmd} doesn't exist in ${PATH}" | |
184 done | |
185 } | |
186 | |
187 # verify required folders exist and writeable | |
188 Check_Folders(){ | |
189 [ -O "${BASE_DIR}" ] || Err_Fatal "You don't own ${BASE_DIR}. Please fix ${BASE_DIR} or run this script in your own directory." | |
190 for directory in temp trash deleted "${SITE_DIR}/${TARGET_DIR}"; do | |
191 if [ ! -d "${BASE_DIR}/${directory}" ]; then | |
192 mkdir -p "${BASE_DIR}/${directory}" || Err_Impossible | |
193 fi | |
194 if [ ! -O "${BASE_DIR}/${directory}" ]; then | |
195 echo "You don't own the ${BASE_DIR}/${directory}, applying globally writeable permission on it" | |
196 chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible | |
197 fi | |
198 done | |
199 [ "$(Count_Files "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}")" -eq 0 ] && ISNEW=1 | |
200 for i in error ok list newlist templist; do | |
201 touch "${TEMP_PREFIX}-${i}" || Fatal_Err "Error creating ${TEMP_PREFIX}-${i}. This shouldn't happen" | |
202 done | |
203 # | |
204 } | |
205 | |
206 # Do some cleanup | |
207 Cleanup_Repository() { | |
208 # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR} | |
209 printf "Cleaning up repository folder... " | |
210 progress_init | |
211 trash_dir="${BASE_DIR}/trash/${trash_dir}/$(date -u "+${SITE_DIR}-${TARGET_DIR}-%Y%m%d-%H.%M")" | |
212 trashes="These files have been moved to ${trash_dir}:" | |
213 has_trash= | |
214 if [ ! -d "${trash_dir}" ]; then | |
215 mkdir -p "${trash_dir}" || Err_Impossible | |
216 else | |
217 if [ ! -O "${trash_dir}" ]; then | |
218 chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible | |
219 fi | |
220 fi | |
221 for trash in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* | |
222 do | |
223 if [ -e "${trash}" ]; then | |
224 is_trash= | |
225 if [ -d "${trash}" ] || [ -n "$(is_not_md5 "${trash}")" ] || [ -z "$(grep "$(get_basename "${trash}")" "${TEMP_PREFIX}-list")" ]; then | |
226 is_trash=1 | |
227 has_trash=1 | |
228 mv -f -- "${trash}" "${trash_dir}" || Err_Impossible | |
229 trashes="${trashes} | |
230 $(get_basename "${trash}")" | |
231 fi | |
232 fi | |
233 progress_anim | |
234 done | |
235 rmdir "${trash_dir}" 2>/dev/null | |
236 progress_done | |
237 [ -n "${has_trash}" ] && echo "${trashes}" | |
238 } | |
239 | |
240 # check files correctness | |
241 Check_Files() { | |
242 if [ ! -n "${ISNEW}" ]; then | |
243 [ -z "${NOCLEAN}" ] && Cleanup_Repository | |
244 printf "Checking for errors... " | |
245 progress_init | |
246 files_error="These files do not match its md5:" | |
247 files_notdanbooru="These files are not checked:" | |
248 has_err_filename= | |
249 has_err_md5= | |
250 > "${TEMP_PREFIX}-error" | |
251 > "${TEMP_PREFIX}-ok" | |
252 for file in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"* | |
253 do | |
254 if [ -e "${file}" ]; then | |
255 if [ -n "$(is_not_md5 "${file}")" ] || [ -d "${file}" ]; then | |
256 files_notdanbooru="${files_notdanbooru} | |
257 $(get_basename "${file}")" | |
258 has_err_filename=1 | |
259 else | |
260 if [ "$(get_md5 "${file}")" = "$(get_filename "${file}")" ]; then | |
261 echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-ok" | |
262 else | |
263 rm "${file}" || Err_Fatal "Error removing ${file}" | |
264 echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-error" | |
265 files_error="${files_error} | |
266 $(get_basename "${file}")" | |
267 has_err_md5=1 | |
268 fi | |
269 fi | |
270 fi | |
271 progress_anim | |
272 done | |
273 progress_done | |
274 if [ ! -n "${has_err_md5}" ] && [ ! -n "${has_err_filename}" ]; then | |
275 echo "All files OK" | |
276 else | |
277 if [ -n "${has_err_md5}" ]; then | |
278 echo "${files_error}" | |
279 echo "$(grep -c . "${TEMP_PREFIX}-error") file(s) removed" | |
280 fi | |
281 [ -n "${has_err_filename}" ] && echo "${files_notdanbooru}" | |
282 fi | |
283 echo "$(grep -c . "${TEMP_PREFIX}-ok") file(s) available locally" | |
284 | |
285 printf "Generating list of new files... " | |
286 progress_init | |
287 cp -f "${TEMP_PREFIX}-list" "${TEMP_PREFIX}-templist" | |
288 while read -r is_ok; do | |
289 grep -v "${is_ok}" "${TEMP_PREFIX}-templist" > "${TEMP_PREFIX}-newlist" | |
290 cp -f "${TEMP_PREFIX}-newlist" "${TEMP_PREFIX}-templist" || Err_Impossible | |
291 progress_anim | |
292 done < "${TEMP_PREFIX}-ok" | |
293 progress_done | |
294 echo "$(grep -c . "${TEMP_PREFIX}-newlist") file(s) to be downloaded" | |
295 else | |
296 if [ -n "${ISQUICK}" ]; then | |
297 echo "Quick mode selected. Skipping check" | |
298 else | |
299 echo "Empty local repository" | |
300 fi | |
301 cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-newlist" | |
302 fi | |
303 } | |
304 | |
305 # start downloading the images | |
306 Fetch_Images() { | |
307 if [ "$(grep -c . "${TEMP_PREFIX}-newlist")" -eq 0 ]; then | |
308 echo "No new file" | |
309 else | |
310 printf "Downloading files... " | |
311 cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | |
312 wget -e continue=on -i "${TEMP_PREFIX}-newlist" -o "${TEMP_PREFIX}.log" --referer="http://${SITE}/post" --user-agent="${useragent}" | |
313 fi | |
314 } | |
315 | |
316 # initialize base variables and initial command check | |
317 init() | |
318 { | |
319 # path initialization | |
320 # check if additional path is specified | |
321 if [ -n "${ADDITIONAL_PATH}" ] | |
322 then | |
323 # insert the additional path | |
324 PATH="${ADDITIONAL_PATH}:${PATH}" | |
325 export PATH | |
326 fi | |
327 | |
328 # misc variables | |
329 ISQUICK= | |
330 ISNEW= | |
331 | |
332 # minimum number of arguments: 2 (command and tag). If less than two, exit and print help message | |
333 [ $# -lt 2 ] && Err_Help | |
334 case "$1" in | |
335 check|fetch|quickfetch) | |
336 echo "Starting..." | |
337 JOB="$1" | |
338 ;; | |
339 *) | |
340 Err_Help | |
341 ;; | |
342 esac | |
343 shift | |
344 SITE= | |
345 TAGS= | |
346 has_pass=0 | |
347 has_user=0 | |
348 x=1 | |
349 while getopts "s:nu:p:" opt | |
350 do | |
351 case "$opt" in | |
352 s) SITE="$OPTARG";; | |
353 n) NOCLEAN=1;; | |
354 p) | |
355 LOGIN_PASS=$(printf "%s" "$OPTARG" | openssl dgst -sha1 | sed -e 's/.*\([[:xdigit:]]\{40\}\).*/\1/') | |
356 has_pass=1 | |
357 ;; | |
358 u) | |
359 LOGIN_USER="$OPTARG" | |
360 has_user=1 | |
361 ;; | |
362 esac | |
363 x=$OPTIND | |
364 done | |
365 shift $(($x-1)) | |
366 if [ "$1" = -- ]; then shift; fi | |
367 TAGS="$@" | |
368 [ -n "${SITE}" ] || SITE=${DEFAULT_SITE} | |
369 [ -n "${TAGS}" ] || Err_Fatal "No tag specified" | |
370 # Get base folder - default, current folder or fallback to ${HOME} | |
371 [ -n "${BASE_DIR}" ] || BASE_DIR=${PWD} | |
372 [ -n "${BASE_DIR}" ] || BASE_DIR=${HOME} | |
373 [ -n "$(echo "${BASE_DIR}" | cut -c1 | grep \/)" ] || BASE_DIR="/${BASE_DIR}" | |
374 # see if both pass and use are set. If they're set, switch _use_login variable content to 1. | |
375 [ ${has_pass} -eq 1 -a ${has_user} -eq 1 ] && _use_login=1 | |
376 | |
377 echo "Tags: ${TAGS}" | |
378 # slash is not wanted for folder name | |
379 TARGET_DIR=$(echo "${TAGS}" | sed -e 's/\//_/g') | |
380 SITE_DIR=$(echo "${SITE}" | sed -e 's/\/$//g;s/\//_/g') | |
381 TEMP_PREFIX="${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}" | |
382 } | |
383 | |
384 # global variables goes here | |
385 init_globals() | |
386 { | |
387 _version="1.0-rc3" # version of this script | |
388 _use_login=0 # variable to check whether a login is used or not | |
389 } | |
390 | |
391 main() | |
392 { | |
393 # removing GNU-ism as much as possible | |
394 POSIXLY_CORRECT=1 | |
395 #initialize global variables | |
396 init_globals | |
397 #print welcome message | |
398 msg_welcome | |
399 # initialization | |
400 init "$@" | |
401 Check_Tools | |
402 Check_Folders | |
403 | |
404 | |
405 # let's do the job! | |
406 case "${JOB}" in | |
407 check) | |
408 Generate_Link | |
409 Check_Files | |
410 ;; | |
411 fetch) | |
412 Generate_Link | |
413 Check_Files | |
414 Fetch_Images | |
415 ;; | |
416 quickfetch) | |
417 ISNEW=1 | |
418 ISQUICK=1 | |
419 Generate_Link | |
420 Check_Files | |
421 Fetch_Images | |
422 ;; | |
423 esac | |
424 } | |
425 | |
426 # call the main routine! | |
427 main "$@" | |
428 |