#!/usr/bin/env python # # UsernameGenYahoo # Copyright (C) 2009 Jason Wood (JW Network Consulting) # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA __version__ = '1.1' __author__ = 'Jason Wood - aka Tadaka' __doc__ = """ http://www.jwnetworkconsulting.com Change Log ---------- 9/1/2009 - Did a bug fix for when results were less than 50, but more than 50 were requested. """ import re import urllib import urllib2 import logging class Get(object): """ Get The power of python in a single object """ def __init__(self): self.user_agent = 'User-Agent: Mozilla/5.0 ' \ + '(Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.11) ' \ + 'Gecko/2009060214 Firefox/3.0.11' def get(self, url): """ get(url) -> Response Open given url and return response. """ try: request = urllib2.Request(url) request.add_header('User-Agent', self.user_agent) logging.debug('Get.get - getting url ' + url) result = urllib2.urlopen(request) except urllib2.URLError, e: print e.code return result class Search(Get): """ Search """ def search(self, q, id, num ): url = 'http://boss.yahooapis.com/ysearch/web/v1/' + q + '?' query = urllib.urlencode({'appid':id, 'format':'xml', 'count':num}) starturl = url + query result = self.get(starturl) content = result.read() # print content """ Yahoo only returns a max of 50 results at a time. So the script has to check to see how many results we want, then handle the request on whether or not that's more than 50 """ if num > 50: totalhits = re.findall('', content) totalresults = int(totalhits[0]) print totalresults tokens = [] if totalresults < 50: tokens = re.findall('(.*?)', content) return tokens startnum = 0 while startnum < totalresults: # print startnum """ Extract the URL to get to the next page of results """ uri = re.findall('',content) for ur in uri: pain = ur[0:] baseurl = "http://boss.yahooapis.com" fullurl = baseurl + pain result = self.get(fullurl) content = result.read() """ Grab the title value from the results """ itertokens = re.findall('(.*?)', content) """ Add the title values to the tokens array """ for it in itertokens: tokens.append(it) startnum = startnum + 50 else: """ Less than 50 results desired, so just grab what's on the page """ tokens = re.findall('(.*?)', content) return tokens class MungeUsernames(Search): """ Create usernames out of the search results """ def mungeusers(self, searchterms): """ set everything to lower case to start """ names_raw = searchterms.lower() # print names_raw """ remove " - linkedin" from each row that has it then remove ": directory" from every row that has it assign the leftover values to the name variable """ matcher = re.match( r'(.*) - linkedin', names_raw, re.M|re.I) name = "" if matcher: match_directory = re.match( r'(.*): directory', matcher.group(1), re.M|re.I) if match_directory: name = match_directory.group(1) else: name = matcher.group(1) """ Break the name up into first and last names. create the username variations with the first initial, last name and first name, last initial """ if name != "": full_name = name.split(" ") if len(full_name) == 2: fname = full_name[0] lname = full_name[1] uname = fname[0] + lname print uname uname = fname + lname[0] print uname uname = lname + fname[0] print uname elif len(full_name) == 3: fname = full_name[0] mname = full_name[1] lname = full_name[2] uname = fname[0] + lname print uname uname = fname + lname[0] print uname uname = lname + fname[0] print uname uname = fname[0] + mname[0] + lname print uname if __name__ == '__main__': import os import sys import time import signal signal.signal(signal.SIGINT, lambda signum, frame: sys.exit()) if len(sys.argv) < 4: print 'usage:', os.path.basename(sys.argv[0]), 'company-name yahoo-api-id #-of-results' sys.exit() logging.basicConfig() companyname = sys.argv[1] apiid = sys.argv[2] num2get = int(sys.argv[3]) y = Search() u = MungeUsernames() search_term = "site:linkedin.com%20" search_term += companyname for result in u.search(search_term, apiid, num2get): u.mungeusers(result)