7bf5fab1fa2fa07cc012c0570781aea23343ae05bcca862eaaeeb4e5a739a9107f8ae073ef6a37b3
 
 
1
# The contents of this file are subject to the Common Public Attribution
 
 
2
# License Version 1.0. (the "License"); you may not use this file except in
 
 
3
# compliance with the License. You may obtain a copy of the License at
 
 
4
# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
 
 
5
# License Version 1.1, but Sections 14 and 15 have been added to cover use of
 
 
6
# software over a computer network and provide for limited attribution for the
 
 
7
# Original Developer. In addition, Exhibit A has been modified to be consistent
 
 
8
# with Exhibit B.
 
 
9
# 
 
 
10
# Software distributed under the License is distributed on an "AS IS" basis,
 
 
11
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
 
 
12
# the specific language governing rights and limitations under the License.
 
 
13
# 
 
 
14
# The Original Code is Reddit.
 
 
15
# 
 
 
16
# The Original Developer is the Initial Developer.  The Initial Developer of the
 
 
17
# Original Code is CondeNet, Inc.
 
 
18
# 
 
 
19
# All portions of the code written by CondeNet are Copyright (c) 2006-2008
 
 
20
# CondeNet, Inc. All Rights Reserved.
 
 
21
################################################################################
 
 
22
 
 
 
23
from pylons import g, config
 
 
24
 
 
 
25
from r2.models.link import Link
 
 
26
from r2.lib.workqueue import WorkQueue
 
 
27
from r2.lib import s3cp
 
 
28
from r2.lib.utils import timeago, fetch_things2
 
 
29
from r2.lib.db.operators import desc
 
 
30
from r2.lib.scraper import make_scraper
 
 
31
 
 
 
32
import tempfile
 
 
33
from Queue import Queue
 
 
34
 
 
 
35
s3_thumbnail_bucket = g.s3_thumb_bucket
 
 
36
media_period = g.media_period
 
 
37
threads = 20
 
 
38
log = g.log
 
 
39
 
 
 
40
def thumbnail_url(link):
 
 
41
    """Given a link, returns the url for its thumbnail based on its fullname"""
 
 
42
    return 'http:/%s%s.png' % (s3_thumbnail_bucket, link._fullname)
 
 
43
 
 
 
44
def upload_thumb(link, image):
 
 
45
    """Given a link and an image, uploads the image to s3 into an image
 
 
46
    based on the link's fullname"""
 
 
47
    f = tempfile.NamedTemporaryFile(suffix = '.png')
 
 
48
    image.save(f)
 
 
49
 
 
 
50
    resource = s3_thumbnail_bucket + link._fullname + '.png'
 
 
51
    log.debug('uploading to s3: %s' % link._fullname)
 
 
52
    s3cp.send_file(f.name, resource, 'image/png', 'public-read', None, False)
 
 
53
    log.debug('thumbnail %s: %s' % (link._fullname, thumbnail_url(link)))
 
 
54
 
 
 
55
def make_link_info_job(results, link, useragent):
 
 
56
    """Returns a unit of work to send to a work queue that downloads a
 
 
57
    link's thumbnail and media object. Places the result in the results
 
 
58
    dict"""
 
 
59
    def job():
 
 
60
        scraper = make_scraper(link.url)
 
 
61
 
 
 
62
        thumbnail = scraper.thumbnail()
 
 
63
        media_object = scraper.media_object()
 
 
64
 
 
 
65
        if thumbnail:
 
 
66
            upload_thumb(link, thumbnail)
 
 
67
 
 
 
68
        results[link] = (thumbnail, media_object)
 
 
69
    return job
 
 
70
 
 
 
71
def update_link(link, thumbnail, media_object):
 
 
72
    """Sets the link's has_thumbnail and media_object attributes iin the
 
 
73
    database."""
 
 
74
    if thumbnail:
 
 
75
        link.has_thumbnail = True
 
 
76
 
 
 
77
    if media_object:
 
 
78
        link.media_object = media_object
 
 
79
 
 
 
80
    link._commit()
 
 
81
 
 
 
82
def process_new_links(period = media_period, force = False):
 
 
83
    """Fetches links from the last period and sets their media
 
 
84
    properities. If force is True, it will fetch properities for links
 
 
85
    even if the properties already exist"""
 
 
86
    links = Link._query(Link.c._date > timeago(period), sort = desc('_date'),
 
 
87
                        data = True)
 
 
88
    results = {}
 
 
89
    jobs = []
 
 
90
    for link in fetch_things2(links):
 
 
91
        if link.is_self:
 
 
92
            continue
 
 
93
 
 
 
94
        if not force and (link.has_thumbnail or link.media_object):
 
 
95
            continue
 
 
96
 
 
 
97
        jobs.append(make_link_info_job(results, link, g.useragent))
 
 
98
 
 
 
99
    #send links to a queue
 
 
100
    wq = WorkQueue(jobs, num_workers = 20)
 
 
101
    wq.start()
 
 
102
    wq.jobs.join()
 
 
103
 
 
 
104
    #when the queue is finished, do the db writes in this thread
 
 
105
    for link, info in results.items():
 
 
106
        update_link(link, info[0], info[1])
 
 
107
 
 
 
108
def set_media(link):
 
 
109
    """Sets the media properties for a single link."""
 
 
110
    results = {}
 
 
111
    make_link_info_job(results, link, g.useragent)()
 
 
112
    update_link(link, *results[link])