7import re, base64, logging, pickle, httplib2, time, urlparse, urllib2, urllib, StringIO, gzip, zipfile
9from google.appengine.ext
import webapp, db
11from google.appengine.api
import taskqueue, urlfetch, memcache, images, users
12from google.appengine.ext.webapp.util
import login_required
13from google.appengine.ext.webapp
import template
15from django.utils
import simplejson
as json
16from django.utils.html
import strip_tags
18from oauth2client.appengine
import CredentialsProperty
19from oauth2client.client
import OAuth2WebServerFlow
25READER_API =
'http://www.google.com/reader/api/0'
32 credentials = CredentialsProperty()
33 sections = db.ListProperty(db.Key)
39 enc.writeInt(maxSections)
40 for section
in db.get(self.
sections[:maxSections]):
41 section.encode(enc, articleKeys)
46 title = db.TextProperty()
47 feeds = db.ListProperty(db.Key)
50 return self.
title.split(
'_')[0]
52 def encode(self, enc, articleKeys=None):
54 enc.writeString(self.key().
name())
56 enc.writeInt(len(self.
feeds))
57 for feed
in db.get(self.
feeds):
58 feed.ensureEncodedFeed()
59 enc.writeRaw(feed.encodedFeed3)
60 if articleKeys
is not None:
61 articleKeys.extend(feed.topArticles)
65 title = db.TextProperty()
66 iconUrl = db.TextProperty()
67 lastUpdated = db.IntegerProperty()
69 encodedFeed3 = db.TextProperty()
70 topArticles = db.ListProperty(db.Key)
76 self.
encode(enc, MAX_ARTICLES, articleSet)
77 logging.info(
'articleSet length is %s' % len(articleSet))
82 def encode(self, enc, maxArticles, articleSet):
83 enc.writeString(self.key().
name())
84 enc.writeString(self.
title)
87 logging.info(
'encoding feed: %s' % self.
title)
90 for article
in self.article_set.order(
'-date').fetch(limit=maxArticles):
91 encodedArts.append(article.encodeHeader())
92 articleSet.append(article.key())
94 enc.writeInt(len(encodedArts))
95 enc.writeRaw(
''.join(encodedArts))
99 feed = db.ReferenceProperty(Feed)
101 title = db.TextProperty()
102 author = db.TextProperty()
103 content = db.TextProperty()
104 snippet = db.TextProperty()
105 thumbnail = db.BlobProperty()
106 thumbnailSize = db.TextProperty()
107 srcurl = db.TextProperty()
108 date = db.IntegerProperty()
120 enc.writeString(self.key().
name())
122 enc.writeString(self.
srcurl)
123 enc.writeBool(self.
thumbnail is not None)
124 enc.writeString(self.
author)
125 enc.writeInt(self.
date)
131 content = db.BlobProperty()
132 compressed = db.BooleanProperty()
133 filename = db.StringProperty()
134 author = db.UserProperty(auto_current_user=
True)
135 date = db.DateTimeProperty(auto_now_add=
True)
141 upload_files = self.request.POST.multi.__dict__[
'_items']
142 version = self.request.get(
'version')
143 logging.info(
'files: %r' % upload_files)
144 for data
in upload_files:
145 if data[0] !=
'files':
continue
147 filename = file.filename
149 filename =
'%s-%s' % (version, filename)
150 logging.info(
'upload: %r' % filename)
152 htmlFile = HtmlFile.get_or_insert(filename)
153 htmlFile.filename = filename
157 if len(text) > 1024 * 1023:
158 data = StringIO.StringIO()
159 gz = gzip.GzipFile(str(filename),
'wb', fileobj=data)
162 htmlFile.content = data.getvalue()
163 htmlFile.compressed =
True
165 htmlFile.content = text
166 htmlFile.compressed =
False
177 user = users.get_current_user()
178 prefs = UserData.get_by_key_name(user.user_id())
180 self.redirect(
'/update/user')
183 params = {
'files': HtmlFile.all().order(
'-date').fetch(limit=30)}
184 self.response.out.write(template.render(
'top.html', params))
194 elif name ==
'login':
197 elif name ==
'upload':
200 user = users.get_current_user()
201 prefs = UserData.get_by_key_name(user.user_id())
205 html = HtmlFile.get_by_key_name(name)
210 self.response.headers[
'Content-Type'] =
'text/html'
215 gz = gzip.GzipFile(name,
217 fileobj=StringIO.StringIO(html.content))
218 self.response.out.write(gz.read())
221 self.response.out.write(html.content)
226 user = users.get_current_user()
229 flow = OAuth2WebServerFlow(
230 client_id=
'267793340506.apps.googleusercontent.com',
231 client_secret=
'5m8H-zyamfTYg5vnpYu1uGMU',
235 callback = self.request.relative_url(
'/oauth2callback')
236 authorize_url = flow.step1_get_authorize_url(callback)
238 memcache.set(user.user_id(), pickle.dumps(flow))
240 content = template.render(
'login.html', {
'authorize': authorize_url})
241 self.response.out.write(content)
244 user = users.get_current_user()
245 content = template.render(
'dev.html', {
'user': user})
246 self.response.out.write(content)
249 user = users.get_current_user()
250 content = template.render(
'upload.html', {
'user': user})
251 self.response.out.write(content)
257 upload_files = self.request.POST.multi.__dict__[
'_items']
258 version = self.request.get(
'version')
259 logging.info(
'files: %r' % upload_files)
260 for data
in upload_files:
261 if data[0] !=
'files':
continue
263 logging.info(
'upload feed: %r' % file.filename)
265 data = json.loads(file.value)
267 feedId = file.filename
268 feed = Feed.get_or_insert(feedId)
271 sectionTitle = data[
'section']
274 if feed.key()
in section.feeds:
275 logging.warn(
'Already contains feed %s, replacing' % feedId)
276 section.feeds.remove(feed.key())
279 section.feeds.insert(0, feed.key())
286 logging.error(
'Could not find section %s to add the feed to' %
296 if name.endswith(
'.jpg'):
298 key = urllib2.unquote(name[:-len(
'.jpg')])
299 article = Article.get_by_key_name(key)
300 self.response.headers[
'Content-Type'] =
'image/jpeg'
302 self.response.headers[
'Cache-Control'] =
'public,max-age=36000'
303 article.ensureThumbnail()
304 self.response.out.write(article.thumbnail)
305 elif name.endswith(
'.html'):
307 key = urllib2.unquote(name[:-len(
'.html')])
308 article = Article.get_by_key_name(key)
309 self.response.headers[
'Content-Type'] =
'text/html'
311 content =
'<h2>Missing article</h2>'
313 content = article.content
315 self.response.headers[
'Cache-Control'] =
'public,max-age=36000'
316 self.response.out.write(content)
317 elif name ==
'user.data':
319 elif name ==
'CannedData.dart':
321 elif name ==
'CannedData.zip':
327 user = users.get_current_user()
328 user_id = user.user_id()
330 key =
'data_' + user_id
332 data = memcache.get(key)
334 prefs = UserData.get_or_insert(user_id)
338 data = prefs.getEncodedData(articleKeys)
345 def makeDartSafe(data):
346 return repr(unicode(data))[1:].replace(
'$',
'\\$')
349 '// TODO(jimhug): Work out correct copyright for this file.',
353 user = users.get_current_user()
354 prefs = UserData.get_by_key_name(user.user_id())
356 data = prefs.getEncodedData(articleKeys)
357 lines.append(
' static const Map<String,String> data = const {')
358 for article
in db.get(articleKeys):
359 key = makeDartSafe(urllib.quote(article.key().
name()) +
'.html')
360 lines.append(
' %s:%s, ' % (key, makeDartSafe(article.content)))
362 lines.append(
' "user.data":%s' % makeDartSafe(data))
367 self.response.headers[
'Content-Type'] =
'application/dart'
368 self.response.out.write(
'\n'.join(lines))
374 data = StringIO.StringIO()
375 result = zipfile.ZipFile(data,
'w')
378 result.writestr(
'data/user.data',
380 logging.info(
' adding articles %s' % len(articleKeys))
382 for article
in db.get(articleKeys):
383 article.ensureThumbnail()
384 path =
'data/' + article.key().
name() +
'.html'
385 result.writestr(path.encode(
'utf-8'),
386 article.content.encode(
'utf-8'))
387 if article.thumbnail:
388 path =
'data/' + article.key().
name() +
'.jpg'
389 result.writestr(path.encode(
'utf-8'), article.thumbnail)
392 logging.info(
'writing CannedData.zip')
393 self.response.headers[
'Content-Type'] =
'multipart/x-zip'
394 disposition =
'attachment; filename=CannedData.zip'
395 self.response.headers[
'Content-Disposition'] = disposition
396 self.response.out.write(data.getvalue())
404 user = users.get_current_user()
405 prefs = UserData.get_or_insert(user.user_id())
408 db.Key.from_path(
'Section',
'user/17857667084667353155/label/Top'),
409 db.Key.from_path(
'Section',
410 'user/17857667084667353155/label/Design'),
411 db.Key.from_path(
'Section',
'user/17857667084667353155/label/Eco'),
412 db.Key.from_path(
'Section',
'user/17857667084667353155/label/Geek'),
413 db.Key.from_path(
'Section',
414 'user/17857667084667353155/label/Google'),
415 db.Key.from_path(
'Section',
416 'user/17857667084667353155/label/Seattle'),
417 db.Key.from_path(
'Section',
'user/17857667084667353155/label/Tech'),
418 db.Key.from_path(
'Section',
'user/17857667084667353155/label/Web')
430 user = users.get_current_user()
431 prefs = UserData.get_or_insert(user.user_id())
435 s1 = Section.get_or_insert(
'Test%d' % i)
436 s1.title =
'Section %d' % (i + 1)
440 label =
'%d_%d' % (i, j)
441 f1 = Feed.get_or_insert(
'Test%s' % label)
442 f1.title =
'Feed %s' % label
446 feeds.append(f1.key())
449 label =
'%d_%d_%d' % (i, j, k)
450 a1 = Article.get_or_insert(
'Test%s' % label)
453 a1.title =
'Article %s' % label
455 a1.content =
'Lorem ipsum something or other...'
456 a1.snippet =
'Lorem ipsum something or other...'
463 sections.append(s1.key())
465 prefs.sections = sections
475 user = users.get_current_user()
476 prefs = UserData.get_or_insert(user.user_id())
477 if prefs.credentials:
478 http = prefs.credentials.authorize(httplib2.Http())
480 response, content = http.request(
481 '%s/subscription/list?output=json' % READER_API)
485 self.redirect(
'/login')
488 data = json.loads(content)
490 queue_name = self.request.
get(
'queue_name',
'priority-queue')
492 for feedData
in data[
'subscriptions']:
493 feed = Feed.get_or_insert(feedData[
'id'])
495 category = feedData[
'categories'][0]
496 categoryId = category[
'id']
497 if not sections.has_key(categoryId):
498 sections[categoryId] = (category[
'label'], [])
501 sections[categoryId][1].
append(feed.key())
504 taskqueue.add(url=
'/update/feed',
505 queue_name=queue_name,
506 params={
'id': feed.key().
name()})
509 for name, (title, feeds)
in sections.items():
510 section = Section.get_or_insert(name)
511 section.feeds = feeds
512 section.title = title
515 if title ==
'Top': title =
'0Top'
516 sectionKeys.append((title, section.key()))
519 prefs.sections = [key
for t, key
in sorted(sectionKeys)]
524 '''Ensures that a given feed object is locally up to date.'''
530 queue_name = self.request.
get(
'queue_name',
'background')
531 for feed
in Feed.all():
532 taskqueue.add(url=
'/update/feed',
533 queue_name=queue_name,
534 params={
'id': feed.key().
name()})
548 feedId = self.request.
get(
'id')
549 feed = Feed.get_or_insert(feedId)
551 if feed.lastUpdated
is None:
552 self.
fetchn(feed, feedId, INITIAL_COUNT)
554 self.
fetchn(feed, feedId, UPDATE_COUNT)
556 self.response.headers[
'Content-Type'] =
"text/plain"
558 def fetchn(self, feed, feedId, n, continuation=None):
560 if continuation
is None:
561 apiUrl =
'%s/stream/contents/%s?n=%d' % (READER_API, feedId, n)
563 apiUrl =
'%s/stream/contents/%s?n=%d&c=%s' % (READER_API, feedId, n,
566 logging.info(
'fetching: %s' % apiUrl)
567 result = urlfetch.fetch(apiUrl)
569 if result.status_code == 200:
570 data = json.loads(result.content)
572 elif result.status_code == 401:
573 self.response.out.write(
'<pre>%s</pre>' % result.content)
575 self.response.out.write(result.status_code)
579 for section
in Section.all():
580 if section.fixedTitle() == title:
587 Reads a feed from the given JSON object and populates the given feed object
588 in the datastore with its data.
590 if continuation
is None:
591 if 'alternate' in data:
592 feed.iconUrl =
getFeedIcon(data[
'alternate'][0][
'href'])
593 feed.title = data[
'title']
594 feed.lastUpdated = data[
'updated']
596 articles = data[
'items']
597 logging.info(
'%d new articles for %s' % (len(articles), feed.title))
599 for articleData
in articles:
604 if len(articles) > 0
and data.has_key(
'continuation'):
605 logging.info(
'would have looked for more articles')
609 feed.ensureEncodedFeed(force=
True)
616 Reads an article from the given JSON object and populates the datastore with
619 if not 'title' in data:
623 articleId = data[
'id']
624 article = Article.get_or_insert(articleId)
626 if article.date == data[
'published']:
627 logging.info(
'found existing, aborting: %r, %r' %
628 (articleId, article.date))
631 if data.has_key(
'content'):
632 content = data[
'content'][
'content']
633 elif data.has_key(
'summary'):
634 content = data[
'summary'][
'content']
638 article.content = content
639 article.date = data[
'published']
640 article.title =
unescape(data[
'title'])
641 article.snippet =
unescape(strip_tags(content)[:SNIPPET_SIZE])
646 article.author = data.get(
'author',
'anonymous')
648 article.ensureThumbnail()
651 if data.has_key(
'alternate'):
652 for alt
in data[
'alternate']:
653 if alt.has_key(
'href'):
654 article.srcurl = alt[
'href']
659 "Inverse of Django's utils.html.escape function"
660 if not isinstance(html, basestring):
662 html = html.replace(
''',
"'").replace(
'"',
'"')
663 return html.replace(
'>',
'>').replace(
'<',
'<').replace(
'&',
'&')
667 url = urlparse.urlparse(url).netloc
668 return 'http://s2.googleusercontent.com/s2/favicons?domain=%s&alt=feed' % url
685 m = re.search(
r'src="(http://\S+\.(%s))(\?.*)?"' % extensions, text)
693 m = re.search(
r'src="http://www.youtube.com/(\S+)/(\S+)[/|"]', text)
697 return 'http://img.youtube.com/vi/%s/0.jpg' % m.group(2)
708 logging.info(
'error decoding: %s' % (url
or text))
713 logging.info(
'generating thumbnail: %s' % url)
714 thumbWidth, thumbHeight = THUMB_SIZE
716 result = urlfetch.fetch(url)
717 img = images.Image(result.content)
719 w, h = img.width, img.height
721 aspect = float(w) / h
722 thumbAspect = float(thumbWidth) / thumbHeight
724 if aspect > thumbAspect:
726 normalizedCrop = (w - h * thumbAspect) / (2.0 * w)
727 img.crop(normalizedCrop, 0., 1. - normalizedCrop, 1.)
728 elif aspect < thumbAspect:
730 normalizedCrop = (h - w / thumbAspect) / h
731 img.crop(0., 0., 1., 1. - normalizedCrop)
733 img.resize(thumbWidth, thumbHeight)
737 nimg = img.execute_transforms(output_encoding=images.JPEG)
738 logging.info(
' finished thumbnail: %s' % url)
747 user = users.get_current_user()
748 flow = pickle.loads(memcache.get(user.user_id()))
750 prefs = UserData.get_or_insert(user.user_id())
751 prefs.credentials = flow.step2_exchange(self.request.params)
753 self.redirect(
'/update/user')
759 application = webapp.WSGIApplication(
761 (
'/data/(.*)', DataHandler),
764 (
'/update/allFeeds', AllFeedsCollector),
765 (
'/update/feed', FeedCollector),
766 (
'/update/user', UserLoginHandler),
767 (
'/update/defaultFeeds', SetDefaultFeeds),
768 (
'/update/testFeeds', SetTestFeeds),
769 (
'/update/html', UpdateHtml),
770 (
'/update/upload', UploadFeed),
771 (
'/oauth2callback', OAuthHandler),
773 (
'/(.*)', MainHandler),
776 webapp.util.run_wsgi_app(application)
779if __name__ ==
'__main__':
static void encode(uint8_t output[16], const uint32_t input[4])
getUserData(self, articleKeys=None)
fetchn(self, feed, feedId, n, continuation=None)
ensureEncodedFeed(self, force=False)
encode(self, enc, maxArticles, articleSet)
encode(self, enc, articleKeys=None)
getEncodedData(self, articleKeys=None)
collectFeeds(self, prefs, content)
static void append(char **dst, size_t *count, const char *src, size_t n)
static float min(float r, float g, float b)
findImgTag(text, extensions)
findSectionByTitle(title)
collectArticle(feed, data)
collectFeed(feed, data, continuation=None)