148
|
1 #!/bin/sh
|
|
2
|
|
3 # Copyright (c) 2009, edogawaconan <me@myconan.net>
|
|
4 #
|
|
5 # Permission to use, copy, modify, and/or distribute this software for any
|
|
6 # purpose with or without fee is hereby granted, provided that the above
|
|
7 # copyright notice and this permission notice appear in all copies.
|
|
8 #
|
|
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
16
|
|
17 # Version 0.1-alpha1. Lots of bugs here. Use with care
|
|
18 # USE WITH CARE
|
|
19
|
|
20 # what it does: fetch every picture that has the specified tags.
|
|
21
|
|
22 # requirement: wget, libxslt, md5sum (or md5)
|
|
23
|
|
24 # configs
|
|
25 # program additional paths for: cut, sed, wc, md5(sum), wget, xsltproc
|
|
26 extrapath=
|
|
27
|
|
28 # md5 calculation, expected output: <32digit md5><space(s)><filename>
|
|
29 # gnu: "md5sum", bsd: "md5 -r"
|
|
30 md5="md5 -r"
|
|
31
|
|
32 # server address. Danbooru only! I do not take responsibility of stupidity.
|
|
33 site="moe.imouto.org"
|
|
34
|
|
35 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one.
|
|
36 # Structure is $basedir/<tags>
|
|
37 basedir="/home/ifail/test"
|
|
38
|
|
39 # not user modifiable from here
|
|
40
|
|
41 GENERATE()
|
|
42 {
|
|
43 echo
|
|
44 echo "Fetching xml file"
|
|
45 wget "http://$site/post/index.xml?tags=$tags&offset=0&limit=100000" -O "$outdir-xml" -e continue=off
|
|
46 echo "Processing XML file..."
|
|
47 # xslt evilry
|
|
48 xsltproc - "$outdir-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([jp][pn]g\)/\1\2.\3/g' | grep ^http > "$outdir-list"
|
|
49 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
|
|
50 <xsl:output method="xml" indent="yes"/>
|
|
51 <xsl:template match="post">
|
|
52 <xsl:value-of select="@file_url" />
|
|
53 </xsl:template>
|
|
54 </xsl:stylesheet>
|
|
55 EOF
|
|
56 echo "`echo \`wc -l < \"$outdir-list\" \`` file(s) available on server"
|
|
57 #output file: $outdir-list
|
|
58 }
|
|
59
|
|
60 CHECK()
|
|
61 {
|
|
62 if [ "$ISNEW" -ne 1 ];then
|
|
63 echo "Checking for errors..."
|
|
64 # THE FILES
|
|
65 echo > "$outdir-error"
|
|
66 cd "../$outdir"
|
|
67 for file in `ls`
|
|
68 do
|
|
69 if [ `$md5 "$file" | cut -d ' ' -f1 -` != `echo "$file" | cut -d '.' -f1` ]
|
|
70 then
|
|
71 echo "$file" >> "../temp/$outdir-error"
|
|
72 echo "Error: $file"
|
|
73 fi
|
|
74 printf "."
|
|
75 done
|
|
76 echo
|
|
77 cd ../temp
|
153
|
78 totalerr=`echo \`wc -l < "$outdir-error"\``
|
148
|
79 echo "$totalerr file(s) error"
|
|
80
|
|
81 echo "Generating list of new files..."
|
|
82 # THE FILES
|
|
83 #ls "../$outdir" | grep -vf "$outdir-error" > "$outdir-ok"
|
|
84 #cat "$outdir-list" | grep -vf "$outdir-ok" > "$outdir-newlist"
|
|
85 ls "../$outdir" | comm -1 -2 "$outdir-error" - > "$outdir-ok"
|
|
86 comm -1 -2 "$outdir-list" "$outdir-error" > "$outdir-newlist"
|
|
87 echo "`echo \`wc -l < \"$outdir-newlist\"\`` file(s) to be downloaded"
|
|
88 cd "../$outdir"
|
|
89
|
|
90 echo "Removing error files"
|
153
|
91 if [ $totalerr -eq 0 ]; then
|
|
92 echo "No error file. 0 file removed"
|
|
93 elif [ $totalerr -gt 0 ]; then
|
148
|
94 cat "../temp/$outdir-error" | xargs rm
|
153
|
95 echo "$totalerr file(s) removed"
|
148
|
96 fi
|
|
97 echo "`echo \`ls | wc -l\`` file(s) available locally"
|
|
98 cd ..
|
|
99 else
|
152
|
100 if [ $ISQUICK -eq 1 ]; then
|
|
101 echo "quick mode selected. Skipping check"
|
|
102 else
|
|
103 echo "Empty local repository"
|
|
104 fi
|
148
|
105 cat "$outdir-list" > "$outdir-newlist"
|
|
106 fi
|
|
107 }
|
|
108
|
|
109 FETCH()
|
|
110 {
|
|
111 if [ `wc -l < "$outdir-newlist"` -eq 0 ]
|
|
112 then
|
|
113 echo "No new file"
|
|
114 else
|
|
115 echo "Starting wget"
|
|
116 cd "../$outdir"
|
152
|
117 wget -bi -e continue=on "../temp/$outdir-newlist" -o "../temp/$outdir.log"
|
148
|
118 fi
|
|
119 }
|
|
120
|
|
121
|
|
122 # path initialization
|
|
123 export PATH=${PATH}:$extrapath
|
|
124
|
|
125 # verify all programs required do indeed exist
|
|
126 # basic tools
|
|
127 commands="cut sed wc wget xsltproc xargs rm mkdir chown comm"
|
|
128 cmderr=" "
|
|
129 for cmd in $commands
|
|
130 do
|
|
131 command -v "$cmd" >/dev/null || cmderr="$cmderr $cmd"
|
|
132 done
|
|
133 if [ x"$cmderr" != x" " ]; then
|
|
134 echo "$cmderr doesn't exist in $PATH"
|
|
135 exit 1
|
|
136 fi
|
|
137 #md5
|
|
138 md5base=`echo $md5 | cut -d ' ' -f 1 -`
|
|
139 if [ x`command -v "$md5base" >/dev/null || echo x` != "x" ]; then
|
|
140 echo "$md5base doesn't exist in $PATH"
|
|
141 exit 1
|
|
142 fi
|
150
|
143 mdtest=
|
151
|
144 if [ `echo test | $md5 | cut -d ' ' -f 1 -` != "d8e8fca2dc0f896fd7cb4cb0031ba249" ]; then
|
148
|
145 echo "$md5 doesn't produce wanted output"
|
|
146 exit 1
|
|
147 fi
|
|
148
|
|
149
|
|
150 # all green (part 1)! let's go (until we check the tag)
|
|
151
|
|
152 # initialization
|
|
153 # are we really doing it?
|
152
|
154 HELP="Usage: moefetch (quick)fetch|status <tags>"
|
|
155
|
148
|
156 if [ $# -lt 2 ]; then
|
152
|
157 echo "$HELP"
|
148
|
158 exit 1
|
|
159 fi
|
|
160
|
|
161 case "$1" in
|
152
|
162 status|fetch|quickfetch)
|
148
|
163 echo "Starting..."
|
|
164 ;;
|
|
165 *)
|
152
|
166 echo "$HELP"
|
148
|
167 exit 1
|
|
168 ;;
|
|
169 esac
|
|
170
|
|
171 # we did it indeed
|
|
172 # get started
|
|
173
|
|
174 # do we own the files
|
|
175 tags=`echo "$@" | cut -d ' ' -f 2- -`
|
|
176 echo "Tags: $tags"
|
|
177 # slash do not want
|
|
178 outdir=`echo "$tags" | sed -e 's/\//_/g'`
|
152
|
179 ISNEW=0
|
148
|
180 if [ -O "$basedir" ]; then
|
|
181 if [ ! -d "$basedir/$outdir" ]; then
|
|
182 ISNEW=1
|
|
183 mkdir "$basedir/$outdir"
|
|
184 fi
|
|
185 if [ ! -O "$basedir/$outdir" ]; then
|
|
186 echo "You don't own the $basedir/$outdir, applying globally writeable permission on it"
|
|
187 chmod -R u=rwX,g=rwX,o=rwX "$basedir/$outdir"
|
|
188 fi
|
|
189 if [ `echo \`ls "$basedir/$outdir" | wc -l\`` -eq 0 ]; then
|
|
190 ISNEW=1
|
|
191 fi
|
|
192 if [ ! -d "$basedir/temp" ]; then
|
|
193 mkdir "$basedir/temp"
|
|
194 fi
|
|
195 if [ ! -O "$basedir/temp" ]; then
|
|
196 echo "You don't own the $basedir/temp, applying globally writeable permission on it"
|
|
197 chmod -R u=rwX,g=rwX,o=rwX "$basedir/temp"
|
|
198 fi
|
|
199 else
|
|
200 echo "Fatal error: you don't own ${basedir}. Please fix ${basedir}. Stopping"
|
|
201 exit 1
|
|
202 fi
|
|
203 # let's move to workdir
|
|
204 cd "$basedir/temp"
|
|
205 touch "$outdir-error"
|
|
206 touch "$outdir-ok"
|
|
207 touch "$outdir-list"
|
|
208 touch "$outdir-newlist"
|
|
209 #
|
|
210
|
|
211 # let's do the job!
|
152
|
212 ISQUICK=0
|
148
|
213 case "$1" in
|
|
214 status)
|
|
215 GENERATE
|
|
216 CHECK
|
|
217 ;;
|
152
|
218 fetch)
|
148
|
219 GENERATE
|
|
220 CHECK
|
|
221 FETCH
|
|
222 ;;
|
152
|
223 quickfetch)
|
|
224 GENERATE
|
|
225 ISNEW=1
|
|
226 ISQUICK=1
|
|
227 CHECK
|
148
|
228 FETCH
|
|
229 esac
|