mosaic/flickr_scraper.py at master · KeithWM/mosaic · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# -*- coding: utf-8 -*-
import flickr
import urllib
from PIL import Image
import scipy, StringIO

class FlickrScraper(object):

    def __init__(self):
        api_key = 'f49f4273470a5bb1504142d20aeee4d6'
        api_secret = '0156db5ea3a6a21b'
        flickr.API_KEY = api_key
        flickr.API_SECRET = api_secret

        print  "This product uses the Flickr API but is not endorsed or certified by Flickr." #legal

    def get_url_proper(self, photo): #but slow, many API calls
        return photo.getURL(size='Square', urlType='source')

    def get_url(self, photo): #not good, but fast
        return u'https://farm6.staticflickr.com/{:s}/{:s}_{:s}_s.jpg'.format(photo.server, photo.id, photo.secret)

    def scrapeTag(self, tags, per_page, page=1, sort='interestingness=desc'):
        photos = flickr.photos_search(tags=tags, per_page=per_page, page=page, sort=sort)

        urls = []
        for photo in photos:
            urls.append(self.get_url(photo))

        return urls

    def fetchFiles(self, urls):
        files = []
        for url in urls:
            file, mime = urllib.urlretrieve(url)
            files.append(file)
        return files


    def imageToArray(self, im):
        arr = scipy.array(im)
        if len(arr.shape) == 2:
            # greyscale handling in the scraper might be the best way
            # it certainly is the first possibility, so negate the requirement for further handling
            arr = arr.reshape((arr.shape[0], arr.shape[1], 1))
            arr = scipy.concatenate((arr, arr, arr), axis=2)
        else:
            if arr.shape[2] == 4: # image with alpha channel
                arr = arr[:,:,:3]
        if not arr.size == 75*75*3: # the proper size we need
            print "Array rejected with shape ", arr.shape
            arr = scipy.zeros((75,75,3), dtype=scipy.uint8)
        return arr.reshape((1,75,75,3))

    def fetchFileData(self, url, filename=None):
        arr = None
        while arr is None:
             try:
                 if (filename == None):
                     im = Image.open(urllib.urlretrieve(url)[0])
                 else:
                     im = Image.open(urllib.urlretrieve(url, filename)[0])
                 arr = self.imageToArray(im)
             except:
                 # print "******ERRORS*********"
                 arr =  None
        return arr


class FlickrScraperDummy(FlickrScraper):
    def __init__(self, path):
        self.path = path
        print "Using stored images from location : %s" % self.path

    def get_url_proper(self, photo):
        raise NotImplementedError("Dummy scraper: no flickr connection!")

    def scrapeTag(self, tags, per_page, page=1, sort='interestingness=desc'):
        raise NotImplementedError("Dummy scraper: no flickr connection!")

    def fetchFiles(self, urls):
        raise NotImplementedError("Dummy scraper: no flickr connection!")

    def fetchFileData(self, url, filename=None):
        im = Image.open(self.path + url)
        return self.imageToArray(im)