git.fiddlerwoaroof.com
Browse code

Adding a distinctive user agent

fiddlerwoaroof authored on 21/10/2015 23:47:23
Showing 1 changed files
... ...
@@ -13,11 +13,12 @@ def titlecase(line):
13 13
 
14 14
 class DefaultTitleGetter(object):
15 15
     url_cleaner = re.compile('[+\-_]')
16
+    user_agent = {'User-Agent': 'Marrow Title Getter: https://joinmarrow.com'}
16 17
 
17 18
     def get_title(self, url):
18 19
         s = requests.session()
19 20
         scheme, netloc, path, params, query, fragment = urlparse.urlparse(url, 'http')
20
-        data = s.get(url)
21
+        data = s.get(url, headers=self.user_agent)
21 22
         etree = lxml.html.fromstring(data.content.decode(data.encoding))
22 23
 
23 24
         canonicalLink = etree.xpath('//link[@rel="canonical"]/@href')