Mercurial > ec-dotfiles
annotate moefetch.sh @ 181:d3b7927bdb2b
restructuring and add check if the xml is processed properly
author | edhoprima@gmail.com <edhoprima@gmail.com> |
---|---|
date | Sun, 28 Jun 2009 05:12:41 +0000 |
parents | 8e6555aa8631 |
children | d92dfe857047 |
rev | line source |
---|---|
148 | 1 #!/bin/sh |
2 | |
3 # Copyright (c) 2009, edogawaconan <me@myconan.net> | |
4 # | |
5 # Permission to use, copy, modify, and/or distribute this software for any | |
6 # purpose with or without fee is hereby granted, provided that the above | |
7 # copyright notice and this permission notice appear in all copies. | |
8 # | |
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
159 | 16 # |
17 # Lots of bugs here. Use with care | |
148 | 18 # USE WITH CARE |
159 | 19 # |
20 # what it does: fetch every picture that has the specified TAGS. | |
148 | 21 # requirement: wget, libxslt, md5sum (or md5) |
22 | |
159 | 23 # program additional paths for: cut, sed, wc, MD5(sum), wget, xsltproc, grep |
24 ADDITIONAL_PATH= | |
148 | 25 |
159 | 26 # custom md5 path with arguments, expected output: <32digit md5><space(s)><filename> |
27 # Leave empty for "md5sum" (Linux, Solaris), "md5 -r" (*BSD) | |
28 MD5= | |
148 | 29 |
159 | 30 # default server address. Danbooru only! I do not take responsibility of stupidity. |
31 DEFAULT_SITE="moe.imouto.org" | |
148 | 32 |
33 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one. | |
159 | 34 # Structure is ${BASE_DIR}/<TAGS> |
35 # Absolute path only. | |
36 # Leave empty to use whatever folder you're running this at | |
37 BASE_DIR="" | |
148 | 38 |
39 # not user modifiable from here | |
40 | |
159 | 41 # useless welcome message. Also version |
42 Msg_Welcome() { | |
174
0948e76a57a1
added help. Bump to 0.1-beta2
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
173
diff
changeset
|
43 MOEFETCHVERSION="0.1-beta2" |
159 | 44 cat <<EOF |
45 moefetch ${MOEFETCHVERSION} | |
46 Copyright (c) 2009 edogawaconan <me@myconan.net> | |
47 | |
48 EOF | |
49 } | |
50 | |
51 # fatal error handler | |
52 Err_Fatal() { | |
53 echo "Fatal error: ${1}" | |
54 exit 1 | |
55 } | |
56 | |
57 # help message | |
58 Err_Help() { | |
59 cat <<EOF | |
177 | 60 moefetch.sh COMMAND [-s SITE_URL] TAGS |
174
0948e76a57a1
added help. Bump to 0.1-beta2
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
173
diff
changeset
|
61 |
176
3d2ae9417273
even more improvement
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
175
diff
changeset
|
62 COMMAND: |
3d2ae9417273
even more improvement
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
175
diff
changeset
|
63 (quick)fetch: do a complete update. Add prefix quick to skip file checking |
3d2ae9417273
even more improvement
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
175
diff
changeset
|
64 check: get list of new files, clean up local folder and print total new files |
175
5b7a154dbd21
cosmetics fix for help message
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
174
diff
changeset
|
65 |
177 | 66 -s SITE_URL: Specify URL of the Danbooru powered site you want to leech from. Default is ${DEFAULT_SITE} |
175
5b7a154dbd21
cosmetics fix for help message
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
174
diff
changeset
|
67 |
176
3d2ae9417273
even more improvement
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
175
diff
changeset
|
68 TAGS: Tags you want to download. Separated by spaces. Tag name follows standard Danbooru tagging scheme |
174
0948e76a57a1
added help. Bump to 0.1-beta2
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
173
diff
changeset
|
69 |
159 | 70 EOF |
71 exit 0 | |
72 } | |
73 | |
74 # generate link by transforming xml | |
75 Generate_Link() { | |
76 cd "${BASE_DIR}/temp" | |
148 | 77 echo |
78 echo "Fetching xml file" | |
159 | 79 wget "http://${SITE}/post/index.xml?tags=${TAGS}&offset=0&limit=100000" -O "${SITE_DIR}-${TARGET_DIR}-xml" -e continue=off |
148 | 80 echo "Processing XML file..." |
81 # xslt evilry | |
165 | 82 xsltproc - "${SITE_DIR}-${TARGET_DIR}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([jp][pn]g\)/\1\2.\3/g' | grep ^http > "${SITE_DIR}-${TARGET_DIR}-list" |
148 | 83 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> |
84 <xsl:output method="xml" indent="yes"/> | |
85 <xsl:template match="post"> | |
86 <xsl:value-of select="@file_url" /> | |
87 </xsl:template> | |
88 </xsl:stylesheet> | |
89 EOF | |
181
d3b7927bdb2b
restructuring and add check if the xml is processed properly
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
180
diff
changeset
|
90 NUMFILES=`echo \`wc -l < \"${SITE_DIR}-${TARGET_DIR}-list\" \`` |
d3b7927bdb2b
restructuring and add check if the xml is processed properly
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
180
diff
changeset
|
91 [ "${NUMFILES}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site" |
d3b7927bdb2b
restructuring and add check if the xml is processed properly
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
180
diff
changeset
|
92 echo "${NUMFILES}" file(s) available on server" |
159 | 93 #output file: ${TARGET_DIR}-list |
148 | 94 } |
95 | |
159 | 96 # check tools availability |
97 Check_Tools() { | |
98 # verify all programs required do indeed exist | |
99 #MD5 | |
100 if [ ! "${MD5}" ]; then | |
101 case `uname` in | |
102 *BSD) MD5="md5 -r";; | |
103 Linux|SunOS) MD5="md5sum";; | |
104 *) Fatal_Err "No known md5 tool for this platform. Please specify manually" | |
105 esac | |
106 fi | |
107 MD5_COMMAND=`echo ${MD5} | cut -d' ' -f1` | |
108 # basic tools | |
109 COMMANDS="cut sed wc wget xsltproc xargs rm mkdir chown comm grep ${MD5_COMMAND}" | |
110 for COMMAND in ${COMMANDS} | |
111 do | |
112 COMMAND_CHECK=`command -v "${COMMAND}"` | |
113 [ "${COMMAND_CHECK}" ] || Err_Fatal "${COMMAND} doesn't exist in ${PATH}" | |
114 done | |
115 | |
116 # grep checking | |
117 # originally created for workaround on solaris | |
118 #if [ `uname` = "SunOS" ]; then | |
119 FAIL="" | |
120 echo "blah" > superrandomtestfile | |
121 echo "blah" > superrandomtestfile.2 | |
122 grep -f superrandomtestfile.2 superrandomtestfile > /dev/null 2>&1 || FAIL=1 | |
123 rm -f superrandomtestfile superrandomtestfile.2 | |
124 [ "${FAIL}" ] && Err_Fatal "Your grep is not compatible. Please install or set path of correct grep" | |
125 } | |
126 | |
127 # verify required folders exist and writeable | |
128 Check_Folders(){ | |
129 [ -O "${BASE_DIR}" ] || Err_Fatal "You don't own ${BASE_DIR}. Please fix ${BASE_DIR}." | |
180 | 130 for FOLDER in temp trash deleted "${SITE_DIR}/${TARGET_DIR}"; do |
159 | 131 if [ ! -d "${BASE_DIR}/${FOLDER}" ]; then |
132 mkdir "${BASE_DIR}/${FOLDER}" || Err_Fatal "${FOLDER} folder creation failed" | |
133 fi | |
134 if [ ! -O "${BASE_DIR}/${FOLDER}" ]; then | |
167 | 135 echo "You don't own the ${BASE_DIR}/${FOLDER}, applying globally writeable permission on it" |
159 | 136 chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${FOLDER}" || Err_Fatal "Error changing ownership. This shouldn't happen" |
137 fi | |
138 done | |
167 | 139 [ `echo \`ls "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | wc -l\`` -eq 0 ] && ISNEW=1 |
159 | 140 # let's move to workdir |
141 cd "${BASE_DIR}/temp" | |
142 for i in error ok list newlist; do | |
161
52877e2849bb
misc fix. These past commits wasn't actually tested
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
160
diff
changeset
|
143 touch "${SITE_DIR}-${TARGET_DIR}-${i}" || Fatal_Err "Error creating ${TARGET_DIR}-${i}. This shouldn't happen" |
159 | 144 done |
145 # | |
146 } | |
147 | |
148 # check files correctness | |
149 Check_Files() { | |
166 | 150 if [ ! "${ISNEW}" ]; then |
148 | 151 echo "Checking for errors..." |
152 # THE FILES | |
159 | 153 |
154 # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR} | |
155 cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | |
173
2b7c8c1ecdfe
better trash handler
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
172
diff
changeset
|
156 TRASH_DIR="${SITE_DIR}-${TARGET_DIR}-`date -u +%Y%m%d-%H.%M`" |
2b7c8c1ecdfe
better trash handler
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
172
diff
changeset
|
157 mkdir -p "${BASE_DIR}/trash/${TRASH_DIR}" |
179
c132eb3ca841
improved folder checker
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
178
diff
changeset
|
158 # FIXME FIXME FIXME FIXME FIXME FIXME |
c132eb3ca841
improved folder checker
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
178
diff
changeset
|
159 for TRASH in * |
148 | 160 do |
179
c132eb3ca841
improved folder checker
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
178
diff
changeset
|
161 if [ -d "${TRASH}" ]; then |
c132eb3ca841
improved folder checker
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
178
diff
changeset
|
162 mv -f "${TRASH}" "${BASE_DIR}/trash/${TRASH_DIR}" || Err_Fatal "Error deleting files" |
c132eb3ca841
improved folder checker
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
178
diff
changeset
|
163 echo "Moved ${TRASH} to ${BASE_DIR}/trash/${TRASH_DIR}" |
c132eb3ca841
improved folder checker
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
178
diff
changeset
|
164 else |
c132eb3ca841
improved folder checker
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
178
diff
changeset
|
165 TRASH="`echo ${TRASH} | sed -e 's/\([0-9a-f]\{32\}.*\)//g' | grep -v ^$`" |
c132eb3ca841
improved folder checker
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
178
diff
changeset
|
166 if [ "${TRASH}" ]; then |
c132eb3ca841
improved folder checker
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
178
diff
changeset
|
167 mv -f "${TRASH}" "${BASE_DIR}/trash/${TRASH_DIR}" || Err_Fatal "Error deleting files" |
c132eb3ca841
improved folder checker
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
178
diff
changeset
|
168 echo "Moved ${TRASH} to ${BASE_DIR}/trash/${TRASH_DIR}" |
c132eb3ca841
improved folder checker
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
178
diff
changeset
|
169 fi |
c132eb3ca841
improved folder checker
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
178
diff
changeset
|
170 fi |
159 | 171 done |
173
2b7c8c1ecdfe
better trash handler
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
172
diff
changeset
|
172 [ "`ls "${BASE_DIR}/trash/${TRASH_DIR}"`" ] || rmdir "${BASE_DIR}/trash/${TRASH_DIR}" |
159 | 173 printf "" > "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-error" |
174 for FILE in * | |
175 do | |
170 | 176 if [ "`${MD5} "${FILE}" | cut -d ' ' -f1 -`" != "`echo "${FILE}" | cut -d '.' -f1`" ] |
148 | 177 then |
159 | 178 echo |
179 echo "${FILE}" >> "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-error" | |
180 echo "Error: ${FILE}" | |
148 | 181 fi |
182 printf "." | |
183 done | |
184 echo | |
178
3f5ee8b2791f
error when restructuring
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
177
diff
changeset
|
185 TOTAL_ERROR=`echo \`wc -l < "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-error"\`` |
3f5ee8b2791f
error when restructuring
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
177
diff
changeset
|
186 echo "${TOTAL_ERROR} file(s) error" |
170 | 187 echo "Removing error files" |
188 if [ "${TOTAL_ERROR}" -eq 0 ]; then | |
189 echo "No error file. 0 file removed" | |
190 else | |
191 cat "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-error" | xargs rm | |
192 echo "${TOTAL_ERROR} file(s) removed" | |
193 fi | |
194 echo "`echo \`ls | wc -l\`` file(s) available locally" | |
195 | |
159 | 196 # current dir: ${BASE_DIR}/temp |
197 cd ${BASE_DIR}/temp | |
148 | 198 |
199 echo "Generating list of new files..." | |
200 # THE FILES | |
159 | 201 #ls "../${TARGET_DIR}" | grep -vf "${TARGET_DIR}-error" > "${TARGET_DIR}-ok" |
156
d3b002fd944e
fix: my attempt at speeding up things failed. reverting back to trusty grep -vf
edhoprima
parents:
155
diff
changeset
|
202 # |
159 | 203 ls "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | comm -1 -3 "${SITE_DIR}-${TARGET_DIR}-error" - > "${SITE_DIR}-${TARGET_DIR}-ok" |
204 cat "${SITE_DIR}-${TARGET_DIR}-list" | grep -vf "${SITE_DIR}-${TARGET_DIR}-ok" > "${SITE_DIR}-${TARGET_DIR}-newlist" | |
205 echo "`echo \`wc -l < \"${SITE_DIR}-${TARGET_DIR}-newlist\"\`` file(s) to be downloaded" | |
206 | |
148 | 207 else |
159 | 208 if [ "${ISQUICK}" ]; then |
152 | 209 echo "quick mode selected. Skipping check" |
210 else | |
211 echo "Empty local repository" | |
212 fi | |
159 | 213 cd "${BASE_DIR}/temp" |
166 | 214 cat "${SITE_DIR}-${TARGET_DIR}-list" > "${SITE_DIR}-${TARGET_DIR}-newlist" |
148 | 215 fi |
216 } | |
217 | |
159 | 218 # start downloading the images |
219 Fetch_Images() { | |
160
68227a30d0b3
forgot to fix Fetch_Images to reflect new folder naming scheme
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
159
diff
changeset
|
220 cd "${BASE_DIR}/temp" |
68227a30d0b3
forgot to fix Fetch_Images to reflect new folder naming scheme
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
159
diff
changeset
|
221 if [ `echo \`wc -l < "${SITE_DIR}-${TARGET_DIR}-newlist"\`` -eq 0 ]; then |
148 | 222 echo "No new file" |
223 else | |
224 echo "Starting wget" | |
160
68227a30d0b3
forgot to fix Fetch_Images to reflect new folder naming scheme
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
159
diff
changeset
|
225 cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" |
68227a30d0b3
forgot to fix Fetch_Images to reflect new folder naming scheme
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
159
diff
changeset
|
226 wget -e continue=on -bi "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-newlist" -o "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}.log" |
148 | 227 fi |
228 } | |
229 | |
159 | 230 # initialize base variables and initial command check |
231 Init(){ | |
232 # path initialization | |
181
d3b7927bdb2b
restructuring and add check if the xml is processed properly
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
180
diff
changeset
|
233 [ "${ADDITIONAL_PATH}" ] && PATH="${ADDITIONAL_PATH}:${PATH}" |
159 | 234 export PATH |
158
cba73f6a96bb
grep check. OpenSolaris' default grep doesn't support -f
edhoprima
parents:
157
diff
changeset
|
235 |
159 | 236 # misc variables |
166 | 237 ISQUICK= |
238 ISNEW= | |
158
cba73f6a96bb
grep check. OpenSolaris' default grep doesn't support -f
edhoprima
parents:
157
diff
changeset
|
239 |
159 | 240 [ $# -lt 2 ] && Err_Help |
241 case "$1" in | |
174
0948e76a57a1
added help. Bump to 0.1-beta2
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
173
diff
changeset
|
242 check|fetch|quickfetch) |
159 | 243 echo "Starting..." |
244 JOB="$1" | |
245 ;; | |
246 *) | |
247 Err_Help | |
248 ;; | |
249 esac | |
250 shift | |
251 SITE= | |
252 case "$1" in | |
253 -s|--site) | |
254 shift | |
255 SITE="$1" | |
163
e2149ba6ab9c
shift placed at wrong place.
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
162
diff
changeset
|
256 shift |
159 | 257 ;; |
258 *) | |
161
52877e2849bb
misc fix. These past commits wasn't actually tested
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
160
diff
changeset
|
259 SITE="${DEFAULT_SITE}" |
159 | 260 ;; |
261 esac | |
181
d3b7927bdb2b
restructuring and add check if the xml is processed properly
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
180
diff
changeset
|
262 # Get base folder - default, current folder or fallback to ${HOME} |
d3b7927bdb2b
restructuring and add check if the xml is processed properly
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
180
diff
changeset
|
263 [ "${BASE_DIR}" ] || BASE_DIR="${PWD}" |
d3b7927bdb2b
restructuring and add check if the xml is processed properly
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
180
diff
changeset
|
264 [ "${BASE_DIR}" ] || BASE_DIR="{$HOME}" |
d3b7927bdb2b
restructuring and add check if the xml is processed properly
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
180
diff
changeset
|
265 [ "`echo ${BASE_DIR} | cut -c1 | grep \/`" ] || BASE_DIR="/${BASE_DIR}" |
d3b7927bdb2b
restructuring and add check if the xml is processed properly
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
180
diff
changeset
|
266 |
166 | 267 TAGS="$@" |
159 | 268 echo "Tags: ${TAGS}" |
269 # slash is not wanted for folder name | |
162
1f937c2e8b3f
tags doesn't get parsed :(
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
161
diff
changeset
|
270 TARGET_DIR="`echo "${TAGS}" | sed -e 's/\//_/g'`" |
1f937c2e8b3f
tags doesn't get parsed :(
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
161
diff
changeset
|
271 SITE_DIR="`echo "${SITE}" | sed -e 's/\/$//g;s/\//_/g'`" |
159 | 272 } |
148 | 273 |
181
d3b7927bdb2b
restructuring and add check if the xml is processed properly
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
180
diff
changeset
|
274 # initialization |
159 | 275 Msg_Welcome |
276 Init "$@" | |
277 Check_Tools | |
278 Check_Folders | |
158
cba73f6a96bb
grep check. OpenSolaris' default grep doesn't support -f
edhoprima
parents:
157
diff
changeset
|
279 |
148 | 280 |
159 | 281 # let's do the job! |
282 case "${JOB}" in | |
174
0948e76a57a1
added help. Bump to 0.1-beta2
edhoprima@gmail.com <edhoprima@gmail.com>
parents:
173
diff
changeset
|
283 check) |
159 | 284 Generate_Link |
285 Check_Files | |
148 | 286 ;; |
159 | 287 fetch) |
288 Generate_Link | |
289 Check_Files | |
290 Fetch_Images | |
291 ;; | |
292 quickfetch) | |
293 ISNEW=1 | |
294 ISQUICK=1 | |
295 Generate_Link | |
296 Check_Files | |
297 Fetch_Images | |
148 | 298 ;; |
299 esac |