Mercurial > ec-dotfiles
comparison moefetch.sh @ 159:75fe19903b74
Major cleanup
author | edhoprima@gmail.com <edhoprima@gmail.com> |
---|---|
date | Fri, 05 Jun 2009 15:20:36 +0000 |
parents | cba73f6a96bb |
children | 68227a30d0b3 |
comparison
equal
deleted
inserted
replaced
158:cba73f6a96bb | 159:75fe19903b74 |
---|---|
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | 11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | 12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | 13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | 14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | 15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
16 | 16 # |
17 # Version 0.1-alpha1. Lots of bugs here. Use with care | 17 # Lots of bugs here. Use with care |
18 # USE WITH CARE | 18 # USE WITH CARE |
19 | 19 # |
20 # what it does: fetch every picture that has the specified tags. | 20 # what it does: fetch every picture that has the specified TAGS. |
21 | |
22 # requirement: wget, libxslt, md5sum (or md5) | 21 # requirement: wget, libxslt, md5sum (or md5) |
23 | 22 |
24 # configs | 23 # program additional paths for: cut, sed, wc, MD5(sum), wget, xsltproc, grep |
25 # program additional paths for: cut, sed, wc, md5(sum), wget, xsltproc, grep | 24 ADDITIONAL_PATH= |
26 extrapath= | 25 |
27 | 26 # custom md5 path with arguments, expected output: <32digit md5><space(s)><filename> |
28 # md5 calculation, expected output: <32digit md5><space(s)><filename> | 27 # Leave empty for "md5sum" (Linux, Solaris), "md5 -r" (*BSD) |
29 # gnu: "md5sum", bsd: "md5 -r" | 28 MD5= |
30 md5="md5 -r" | 29 |
31 | 30 # default server address. Danbooru only! I do not take responsibility of stupidity. |
32 # server address. Danbooru only! I do not take responsibility of stupidity. | 31 DEFAULT_SITE="moe.imouto.org" |
33 site="moe.imouto.org" | |
34 | 32 |
35 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one. | 33 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one. |
36 # Structure is $basedir/<tags> | 34 # Structure is ${BASE_DIR}/<TAGS> |
37 basedir="/home/ifail/test" | 35 # Absolute path only. |
36 # Leave empty to use whatever folder you're running this at | |
37 BASE_DIR="" | |
38 | 38 |
39 # not user modifiable from here | 39 # not user modifiable from here |
40 | 40 |
41 GENERATE() | 41 # useless welcome message. Also version |
42 { | 42 Msg_Welcome() { |
43 MOEFETCHVERSION="0.1-beta" | |
44 cat <<EOF | |
45 moefetch ${MOEFETCHVERSION} | |
46 Copyright (c) 2009 edogawaconan <me@myconan.net> | |
47 | |
48 EOF | |
49 } | |
50 | |
51 # fatal error handler | |
52 Err_Fatal() { | |
53 echo "Fatal error: ${1}" | |
54 exit 1 | |
55 } | |
56 | |
57 # help message | |
58 Err_Help() { | |
59 cat <<EOF | |
60 Usage: moefetch (quick)fetch|status <TAGS> | |
61 EOF | |
62 exit 0 | |
63 } | |
64 | |
65 # generate link by transforming xml | |
66 Generate_Link() { | |
67 cd "${BASE_DIR}/temp" | |
43 echo | 68 echo |
44 echo "Fetching xml file" | 69 echo "Fetching xml file" |
45 wget "http://$site/post/index.xml?tags=$tags&offset=0&limit=100000" -O "$outdir-xml" -e continue=off | 70 wget "http://${SITE}/post/index.xml?tags=${TAGS}&offset=0&limit=100000" -O "${SITE_DIR}-${TARGET_DIR}-xml" -e continue=off |
46 echo "Processing XML file..." | 71 echo "Processing XML file..." |
47 # xslt evilry | 72 # xslt evilry |
48 xsltproc - "$outdir-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([jp][pn]g\)/\1\2.\3/g' | grep ^http > "$outdir-list" | 73 xsltproc - "${TARGET_DIR}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([jp][pn]g\)/\1\2.\3/g' | grep ^http > "${TARGET_DIR}-list" |
49 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> | 74 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> |
50 <xsl:output method="xml" indent="yes"/> | 75 <xsl:output method="xml" indent="yes"/> |
51 <xsl:template match="post"> | 76 <xsl:template match="post"> |
52 <xsl:value-of select="@file_url" /> | 77 <xsl:value-of select="@file_url" /> |
53 </xsl:template> | 78 </xsl:template> |
54 </xsl:stylesheet> | 79 </xsl:stylesheet> |
55 EOF | 80 EOF |
56 echo "`echo \`wc -l < \"$outdir-list\" \`` file(s) available on server" | 81 echo "`echo \`wc -l < \"${SITE_DIR}-${TARGET_DIR}-list\" \`` file(s) available on server" |
57 #output file: $outdir-list | 82 #output file: ${TARGET_DIR}-list |
58 } | 83 } |
59 | 84 |
60 CHECK() | 85 # check tools availability |
61 { | 86 Check_Tools() { |
62 if [ "$ISNEW" -ne 1 ];then | 87 # verify all programs required do indeed exist |
88 #MD5 | |
89 if [ ! "${MD5}" ]; then | |
90 case `uname` in | |
91 *BSD) MD5="md5 -r";; | |
92 Linux|SunOS) MD5="md5sum";; | |
93 *) Fatal_Err "No known md5 tool for this platform. Please specify manually" | |
94 esac | |
95 fi | |
96 MD5_COMMAND=`echo ${MD5} | cut -d' ' -f1` | |
97 # basic tools | |
98 COMMANDS="cut sed wc wget xsltproc xargs rm mkdir chown comm grep ${MD5_COMMAND}" | |
99 for COMMAND in ${COMMANDS} | |
100 do | |
101 COMMAND_CHECK=`command -v "${COMMAND}"` | |
102 [ "${COMMAND_CHECK}" ] || Err_Fatal "${COMMAND} doesn't exist in ${PATH}" | |
103 done | |
104 | |
105 # grep checking | |
106 # originally created for workaround on solaris | |
107 #if [ `uname` = "SunOS" ]; then | |
108 FAIL="" | |
109 echo "blah" > superrandomtestfile | |
110 echo "blah" > superrandomtestfile.2 | |
111 grep -f superrandomtestfile.2 superrandomtestfile > /dev/null 2>&1 || FAIL=1 | |
112 rm -f superrandomtestfile superrandomtestfile.2 | |
113 [ "${FAIL}" ] && Err_Fatal "Your grep is not compatible. Please install or set path of correct grep" | |
114 } | |
115 | |
116 # verify required folders exist and writeable | |
117 Check_Folders(){ | |
118 [ -O "${BASE_DIR}" ] || Err_Fatal "You don't own ${BASE_DIR}. Please fix ${BASE_DIR}." | |
119 for FOLDER in temp trash deleted ${TARGET_DIR} | |
120 do | |
121 if [ ! -d "${BASE_DIR}/${FOLDER}" ]; then | |
122 mkdir "${BASE_DIR}/${FOLDER}" || Err_Fatal "${FOLDER} folder creation failed" | |
123 fi | |
124 if [ ! -O "${BASE_DIR}/${FOLDER}" ]; then | |
125 echo "You don't own the ${BASE_DIR}/{$FOLDER}, applying globally writeable permission on it" | |
126 chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${FOLDER}" || Err_Fatal "Error changing ownership. This shouldn't happen" | |
127 fi | |
128 done | |
129 [ `echo \`ls "${BASE_DIR}/${TARGET_DIR}" | wc -l\`` -eq 0 ] && ISNEW=1 | |
130 # let's move to workdir | |
131 cd "${BASE_DIR}/temp" | |
132 for i in error ok list newlist; do | |
133 touch "${TARGET_DIR}-${i}" || Fatal_Err "Error creating ${TARGET_DIR}-${i}. This shouldn't happen" | |
134 done | |
135 # | |
136 } | |
137 | |
138 # check files correctness | |
139 Check_Files() { | |
140 if [ "$ISNEW" -ne 1 ]; then | |
63 echo "Checking for errors..." | 141 echo "Checking for errors..." |
64 # THE FILES | 142 # THE FILES |
65 printf "" > "$outdir-error" | 143 |
66 cd "../$outdir" | 144 # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR} |
67 for file in `ls` | 145 cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" |
146 for TRASH in `ls * | sed -e 's/\([0-9a-f]\{32\}.*\)//g' | grep -v ^$` | |
68 do | 147 do |
69 if [ `$md5 "$file" | cut -d ' ' -f1 -` != `echo "$file" | cut -d '.' -f1` ] | 148 mv -f "${TRASH}" "${BASE_DIR}/trash" |
149 echo "Moved ${TRASH} to ${BASE_DIR}/trash" | |
150 done | |
151 printf "" > "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-error" | |
152 for FILE in * | |
153 do | |
154 if [ `${MD5} "${FILE}" | cut -d ' ' -f1 -` != `echo "${FILE}" | cut -d '.' -f1` ] | |
70 then | 155 then |
71 echo "$file" >> "../temp/$outdir-error" | 156 echo |
72 echo "Error: $file" | 157 echo "${FILE}" >> "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-error" |
158 echo "Error: ${FILE}" | |
73 fi | 159 fi |
74 printf "." | 160 printf "." |
75 done | 161 done |
76 echo | 162 echo |
77 cd ../temp | 163 |
78 totalerr=`echo \`wc -l < "$outdir-error"\`` | 164 # current dir: ${BASE_DIR}/temp |
79 echo "$totalerr file(s) error" | 165 cd ${BASE_DIR}/temp |
166 TOTAL_ERROR=`echo \`wc -l < "${SITE_DIR}-${TARGET_DIR}-error"\`` | |
167 echo "${TOTAL_ERROR} file(s) error" | |
80 | 168 |
81 echo "Generating list of new files..." | 169 echo "Generating list of new files..." |
82 # THE FILES | 170 # THE FILES |
83 #ls "../$outdir" | grep -vf "$outdir-error" > "$outdir-ok" | 171 #ls "../${TARGET_DIR}" | grep -vf "${TARGET_DIR}-error" > "${TARGET_DIR}-ok" |
84 # | 172 # |
85 ls "../$outdir" | comm -1 -3 "$outdir-error" - > "$outdir-ok" | 173 ls "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | comm -1 -3 "${SITE_DIR}-${TARGET_DIR}-error" - > "${SITE_DIR}-${TARGET_DIR}-ok" |
86 cat "$outdir-list" | grep -vf "$outdir-ok" > "$outdir-newlist" | 174 cat "${SITE_DIR}-${TARGET_DIR}-list" | grep -vf "${SITE_DIR}-${TARGET_DIR}-ok" > "${SITE_DIR}-${TARGET_DIR}-newlist" |
87 echo "`echo \`wc -l < \"$outdir-newlist\"\`` file(s) to be downloaded" | 175 echo "`echo \`wc -l < \"${SITE_DIR}-${TARGET_DIR}-newlist\"\`` file(s) to be downloaded" |
88 cd "../$outdir" | 176 |
177 # back to target dir | |
178 cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | |
89 | 179 |
90 echo "Removing error files" | 180 echo "Removing error files" |
91 if [ $totalerr -eq 0 ]; then | 181 if [ "${TOTAL_ERROR}" -eq 0 ]; then |
92 echo "No error file. 0 file removed" | 182 echo "No error file. 0 file removed" |
93 else | 183 else |
94 cat "../temp/$outdir-error" | xargs rm | 184 cat "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-error" | xargs rm |
95 echo "$totalerr file(s) removed" | 185 echo "${TOTAL_ERROR} file(s) removed" |
96 fi | 186 fi |
97 echo "`echo \`ls | wc -l\`` file(s) available locally" | 187 echo "`echo \`ls | wc -l\`` file(s) available locally" |
98 cd ../temp | 188 cd ../temp |
99 else | 189 else |
100 if [ $ISQUICK -eq 1 ]; then | 190 if [ "${ISQUICK}" ]; then |
101 echo "quick mode selected. Skipping check" | 191 echo "quick mode selected. Skipping check" |
102 else | 192 else |
103 echo "Empty local repository" | 193 echo "Empty local repository" |
104 fi | 194 fi |
105 cat "$outdir-list" > "$outdir-newlist" | 195 cd "${BASE_DIR}/temp" |
196 cat "${SITE_DIR}-${TARGET_DIR}-list" > "${TARGET_DIR}-newlist" | |
106 fi | 197 fi |
107 } | 198 } |
108 | 199 |
109 FETCH() | 200 # start downloading the images |
110 { | 201 Fetch_Images() { |
111 if [ `echo \`wc -l < "$outdir-newlist"\`` -eq 0 ]; then | 202 if [ `echo \`wc -l < "${TARGET_DIR}-newlist"\`` -eq 0 ]; then |
112 echo "No new file" | 203 echo "No new file" |
113 else | 204 else |
114 echo "Starting wget" | 205 echo "Starting wget" |
115 cd "../$outdir" | 206 cd "../${TARGET_DIR}" |
116 wget -e continue=on -bi "../temp/$outdir-newlist" -o "../temp/$outdir.log" | 207 wget -e continue=on -bi "../temp/${TARGET_DIR}-newlist" -o "../temp/${TARGET_DIR}.log" |
117 fi | 208 fi |
118 } | 209 } |
119 | 210 |
120 | 211 # initialize base variables and initial command check |
121 # path initialization | 212 Init(){ |
122 export PATH=$extrapath:${PATH} | 213 # Get base folder - current folder or fallback to ${HOME} |
123 | 214 [ "${BASE_DIR}" ] || BASE_DIR="${PWD}" |
124 # verify all programs required do indeed exist | 215 [ "${BASE_DIR}" ] || BASE_DIR="{$HOME}" |
216 [ "`echo ${BASE_DIR} | cut -c1 | grep \/`" ] || BASE_DIR="/${BASE_DIR}" | |
217 # path initialization | |
218 [ "${ADDITIONAL_PATH}" ] && PATH=${ADDITIONAL_PATH}:${PATH} | |
219 export PATH | |
125 | 220 |
126 | 221 # misc variables |
127 # basic tools | 222 ISQUICK="" |
128 commands="cut sed wc wget xsltproc xargs rm mkdir chown comm grep" | 223 ISNEW="" |
129 cmderr=" " | 224 |
130 for cmd in $commands | 225 [ $# -lt 2 ] && Err_Help |
131 do | 226 case "$1" in |
132 command -v "$cmd" >/dev/null || cmderr="$cmderr $cmd" | 227 status|fetch|quickfetch) |
133 done | 228 echo "Starting..." |
134 if [ x"$cmderr" != x" " ]; then | 229 JOB="$1" |
135 echo "$cmderr doesn't exist in $PATH" | 230 ;; |
136 exit 1 | 231 *) |
137 fi | 232 Err_Help |
138 #md5 | 233 ;; |
139 md5base=`echo $md5 | cut -d ' ' -f 1 -` | 234 esac |
140 if [ x`command -v "$md5base" >/dev/null || echo x` != "x" ]; then | 235 shift |
141 echo "$md5base doesn't exist in $PATH" | 236 SITE= |
142 exit 1 | 237 case "$1" in |
143 fi | 238 -s|--site) |
144 mdtest= | 239 shift |
145 if [ `echo test | $md5 | cut -d ' ' -f 1 -` != "d8e8fca2dc0f896fd7cb4cb0031ba249" ]; then | 240 SITE="$1" |
146 echo "$md5 doesn't produce wanted output" | 241 ;; |
147 exit 1 | 242 *) |
148 fi | 243 SITE=DEFAULT_SITE |
149 | 244 ;; |
150 # grep checking | 245 esac |
151 # originally created for workaround on solaris | 246 shift |
152 #if [ `uname` = "SunOS" ]; then | 247 TAGS="$@" |
153 FAIL=0 | 248 echo "Tags: ${TAGS}" |
154 echo "blah" > superrandomtestfile | 249 # slash is not wanted for folder name |
155 echo "blah" > superrandomtestfile.2 | 250 TARGET_DIR=`echo "${TAGS}" | sed -e 's/\//_/g'` |
156 grep -f superrandomtestfile.2 superrandomtestfile > /dev/null 2>&1 || FAIL=1 | 251 SITE_DIR=`echo "${SITE}" | sed -e 's/\//_/g'` |
157 rm superrandomtestfile superrandomtestfile.2 superrandomtestfile.3 > /dev/null 2>&1 | 252 } |
158 if [ $FAIL = 1 ]; then | 253 |
159 echo "Your grep is not compatible. Please install or set path of correct grep" | 254 Msg_Welcome |
160 exit 1 | 255 Init "$@" |
161 fi | 256 Check_Tools |
162 #fi | 257 Check_Folders |
163 | 258 |
164 | 259 |
165 # all green (part 1)! let's go (until we check the tag) | 260 # let's do the job! |
166 | 261 case "${JOB}" in |
167 # initialization | 262 status) |
168 # are we really doing it? | 263 Generate_Link |
169 HELP="Usage: moefetch (quick)fetch|status <tags>" | 264 Check_Files |
170 | |
171 if [ $# -lt 2 ]; then | |
172 echo "$HELP" | |
173 exit 1 | |
174 fi | |
175 | |
176 case "$1" in | |
177 status|fetch|quickfetch) | |
178 echo "Starting..." | |
179 ;; | 265 ;; |
180 *) | 266 fetch) |
181 echo "$HELP" | 267 Generate_Link |
182 exit 1 | 268 Check_Files |
269 Fetch_Images | |
270 ;; | |
271 quickfetch) | |
272 ISNEW=1 | |
273 ISQUICK=1 | |
274 Generate_Link | |
275 Check_Files | |
276 Fetch_Images | |
183 ;; | 277 ;; |
184 esac | 278 esac |
185 | |
186 # we did it indeed | |
187 # get started | |
188 | |
189 # do we own the files | |
190 tags=`echo "$@" | cut -d ' ' -f 2- -` | |
191 echo "Tags: $tags" | |
192 # slash do not want | |
193 outdir=`echo "$tags" | sed -e 's/\//_/g'` | |
194 ISNEW=0 | |
195 if [ -O "$basedir" ]; then | |
196 if [ ! -d "$basedir/$outdir" ]; then | |
197 ISNEW=1 | |
198 mkdir "$basedir/$outdir" | |
199 fi | |
200 if [ ! -O "$basedir/$outdir" ]; then | |
201 echo "You don't own the $basedir/$outdir, applying globally writeable permission on it" | |
202 chmod -R u=rwX,g=rwX,o=rwX "$basedir/$outdir" | |
203 fi | |
204 if [ `echo \`ls "$basedir/$outdir" | wc -l\`` -eq 0 ]; then | |
205 ISNEW=1 | |
206 fi | |
207 if [ ! -d "$basedir/temp" ]; then | |
208 mkdir "$basedir/temp" | |
209 fi | |
210 if [ ! -O "$basedir/temp" ]; then | |
211 echo "You don't own the $basedir/temp, applying globally writeable permission on it" | |
212 chmod -R u=rwX,g=rwX,o=rwX "$basedir/temp" | |
213 fi | |
214 else | |
215 echo "Fatal error: you don't own ${basedir}. Please fix ${basedir}. Stopping" | |
216 exit 1 | |
217 fi | |
218 # let's move to workdir | |
219 cd "$basedir/temp" | |
220 touch "$outdir-error" | |
221 touch "$outdir-ok" | |
222 touch "$outdir-list" | |
223 touch "$outdir-newlist" | |
224 # | |
225 | |
226 # let's do the job! | |
227 ISQUICK=0 | |
228 case "$1" in | |
229 status) | |
230 GENERATE | |
231 CHECK | |
232 ;; | |
233 fetch) | |
234 GENERATE | |
235 CHECK | |
236 FETCH | |
237 ;; | |
238 quickfetch) | |
239 GENERATE | |
240 ISNEW=1 | |
241 ISQUICK=1 | |
242 CHECK | |
243 FETCH | |
244 esac |