A Programmer at Olivet University: My solution to Google Python Class Babynames homework

Thursday, February 7, 2013

My solution to Google Python Class Babynames homework

def extract_names(filename):
"""
Given a file name for baby.html, returns a list starting with the year string
followed by the name-rank strings in alphabetical order.
['2006', 'Aaliyah 91', Aaron 57', 'Abagail 895', ' ...]
"""
# +++your code here+++
names = []

f = open(filename, 'rU')
text = f.read()

year_match = re.search(r'Popularity\sin\s(\d\d\d\d)', text)
if not year_match:
sys.stderr.write('Couldn\'t find the year!\n')
sys.exit(1)
year = year_match.group(1)
names.append(year)

tuples = re.findall(r'<td>(\d+)</td><td>(\w+)</td>\<td>(\w+)</td>', text)

names_to_rank = {}
for rank_tuple in tuples:
(rank, boyname, girlname) = rank_tuple
if boyname not in names_to_rank:
names_to_rank[boyname] = rank
if girlname not in names_to_rank:
names_to_rank[girlname] = rank

sorted_names = sorted(names_to_rank.keys())

for name in sorted_names:
names.append(name + " " + names_to_rank[name])

return names

def main():
# This command-line parsing code is provided.
# Make a list of command line arguments, omitting the [0] element
# which is the script itself.
args = sys.argv[1:]

if not args:
print 'usage: [--summaryfile] file [file ...]'
sys.exit(1)

# Notice the summary flag and remove it from args if it is present.
summary = False
if args[0] == '--summaryfile':
summary = True
del args[0]

# +++your code here+++
# For each filename, get the names, then either print the text output
# or write it to a summary file
for filename in args:
names = extract_names(filename)

text = '\n'.join(names)

if summary:
outf = open(filename + '.summary', 'w')
outf.write(text + '\n')
outf.close()
else:
print text

if __name__ == '__main__':
main()

Google Python Class

1 comment:

UnknownMarch 16, 2014 at 8:05 PM
Hi,

How did you make it work? I'm still getting an error! :(
Thanks
ReplyDelete
Replies

Add comment