comparison moefetch.sh @ 148:378ade047762

(none)
author edhoprima
date Sat, 25 Apr 2009 15:25:06 +0000
parents
children 7200b4339aea
comparison
equal deleted inserted replaced
147:88e8acf9b563 148:378ade047762
1 #!/bin/sh
2
3 # Copyright (c) 2009, edogawaconan <me@myconan.net>
4 #
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
8 #
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
17 # Version 0.1-alpha1. Lots of bugs here. Use with care
18 # USE WITH CARE
19
20 # what it does: fetch every picture that has the specified tags.
21
22 # requirement: wget, libxslt, md5sum (or md5)
23
24 # configs
25 # program additional paths for: cut, sed, wc, md5(sum), wget, xsltproc
26 extrapath=
27
28 # md5 calculation, expected output: <32digit md5><space(s)><filename>
29 # gnu: "md5sum", bsd: "md5 -r"
30 md5="md5 -r"
31
32 # server address. Danbooru only! I do not take responsibility of stupidity.
33 site="moe.imouto.org"
34
35 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one.
36 # Structure is $basedir/<tags>
37 basedir="/home/ifail/test"
38
39 # not user modifiable from here
40
41 GENERATE()
42 {
43 echo
44 echo "Fetching xml file"
45 wget "http://$site/post/index.xml?tags=$tags&offset=0&limit=100000" -O "$outdir-xml" -e continue=off
46 echo "Processing XML file..."
47 # xslt evilry
48 xsltproc - "$outdir-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([jp][pn]g\)/\1\2.\3/g' | grep ^http > "$outdir-list"
49 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
50 <xsl:output method="xml" indent="yes"/>
51 <xsl:template match="post">
52 <xsl:value-of select="@file_url" />
53 </xsl:template>
54 </xsl:stylesheet>
55 EOF
56 echo "`echo \`wc -l < \"$outdir-list\" \`` file(s) available on server"
57 #output file: $outdir-list
58 }
59
60 CHECK()
61 {
62 if [ "$ISNEW" -ne 1 ];then
63 echo "Checking for errors..."
64 # THE FILES
65 echo > "$outdir-error"
66 cd "../$outdir"
67 for file in `ls`
68 do
69 if [ `$md5 "$file" | cut -d ' ' -f1 -` != `echo "$file" | cut -d '.' -f1` ]
70 then
71 echo "$file" >> "../temp/$outdir-error"
72 echo "Error: $file"
73 fi
74 printf "."
75 done
76 echo
77 cd ../temp
78 totalerr=`wc -l < $tags-error`
79 echo "$totalerr file(s) error"
80
81 echo "Generating list of new files..."
82 # THE FILES
83 #ls "../$outdir" | grep -vf "$outdir-error" > "$outdir-ok"
84 #cat "$outdir-list" | grep -vf "$outdir-ok" > "$outdir-newlist"
85 ls "../$outdir" | comm -1 -2 "$outdir-error" - > "$outdir-ok"
86 comm -1 -2 "$outdir-list" "$outdir-error" > "$outdir-newlist"
87 echo "`echo \`wc -l < \"$outdir-newlist\"\`` file(s) to be downloaded"
88 cd "../$outdir"
89
90 echo "Removing error files"
91 if [ $totalerr -gt 0 ]
92 then
93 cat "../temp/$outdir-error" | xargs rm
94 fi
95 echo "$totalerr file(s) removed"
96 echo "`echo \`ls | wc -l\`` file(s) available locally"
97 cd ..
98 else
99 echo "Empty local repository"
100 cat "$outdir-list" > "$outdir-newlist"
101 fi
102 }
103
104 FETCH()
105 {
106 if [ `wc -l < "$outdir-newlist"` -eq 0 ]
107 then
108 echo "No new file"
109 else
110 echo "Starting wget"
111 cd "../$outdir"
112 wget -bi "../temp/$outdir-newlist" -o "../temp/$outdir.log"
113 fi
114 }
115
116
117 # path initialization
118 export PATH=${PATH}:$extrapath
119
120 # verify all programs required do indeed exist
121 # basic tools
122 commands="cut sed wc wget xsltproc xargs rm mkdir chown comm"
123 cmderr=" "
124 for cmd in $commands
125 do
126 command -v "$cmd" >/dev/null || cmderr="$cmderr $cmd"
127 done
128 if [ x"$cmderr" != x" " ]; then
129 echo "$cmderr doesn't exist in $PATH"
130 exit 1
131 fi
132 #md5
133 md5base=`echo $md5 | cut -d ' ' -f 1 -`
134 if [ x`command -v "$md5base" >/dev/null || echo x` != "x" ]; then
135 echo "$md5base doesn't exist in $PATH"
136 exit 1
137 fi
138 if [ `$md5 - <<EOF | cut -d ' ' -f 1 -` != "d8e8fca2dc0f896fd7cb4cb0031ba249" ]; then
139 test
140 EOF
141 echo "$md5 doesn't produce wanted output"
142 exit 1
143 fi
144
145
146 # all green (part 1)! let's go (until we check the tag)
147
148 # initialization
149 # are we really doing it?
150 if [ $# -lt 2 ]; then
151 echo "Usage: moefetch status|fetch|get <tags>"
152 exit 1
153 fi
154
155 case "$1" in
156 status|get|update)
157 echo "Starting..."
158 ;;
159 *)
160 echo "Usage: moefetch status|fetch|update <tags>"
161 exit 1
162 ;;
163 esac
164
165 # we did it indeed
166 # get started
167
168 # do we own the files
169 tags=`echo "$@" | cut -d ' ' -f 2- -`
170 echo "Tags: $tags"
171 # slash do not want
172 outdir=`echo "$tags" | sed -e 's/\//_/g'`
173 if [ -O "$basedir" ]; then
174 if [ ! -d "$basedir/$outdir" ]; then
175 ISNEW=1
176 mkdir "$basedir/$outdir"
177 fi
178 if [ ! -O "$basedir/$outdir" ]; then
179 echo "You don't own the $basedir/$outdir, applying globally writeable permission on it"
180 chmod -R u=rwX,g=rwX,o=rwX "$basedir/$outdir"
181 fi
182 if [ `echo \`ls "$basedir/$outdir" | wc -l\`` -eq 0 ]; then
183 ISNEW=1
184 fi
185 if [ ! -d "$basedir/temp" ]; then
186 mkdir "$basedir/temp"
187 fi
188 if [ ! -O "$basedir/temp" ]; then
189 echo "You don't own the $basedir/temp, applying globally writeable permission on it"
190 chmod -R u=rwX,g=rwX,o=rwX "$basedir/temp"
191 fi
192 else
193 echo "Fatal error: you don't own ${basedir}. Please fix ${basedir}. Stopping"
194 exit 1
195 fi
196 # let's move to workdir
197 cd "$basedir/temp"
198 touch "$outdir-error"
199 touch "$outdir-ok"
200 touch "$outdir-list"
201 touch "$outdir-newlist"
202 #
203
204 # let's do the job!
205 case "$1" in
206 status)
207 GENERATE
208 CHECK
209 ;;
210 get)
211 GENERATE
212 CHECK
213 FETCH
214 ;;
215 update)
216 FETCH
217 ;;
218 esac