Click on regress.py to get source.
from scipy import array
from scipy.stats import linregress

from geonames import parse_country_info


def get_country_data(country):
    size = country.findtext('areaInSqKm')
    population = country.findtext('population')
    if size and population:
        return size, population
    else:
        return None, None

def parse_pop_size_data():
    tree = parse_country_info('geonames.xml')
    root = tree.getroot()
    data = map(get_country_data, root.findall('country'))
    data = array(filter(lambda x: x[0] and x[1], data), dtype=float)
    data.sort()
    size = data[:,0]
    population = data[:,1]
    return (size, population)


if __name__ == '__main__':
    (a_s, b_s, r, tt, stderr) = linregress(*parse_pop_size_data())
    print('Regression: a=%.2f b=%.2f, r=%.2f, std error= %.3f' % (a_s, b_s, r, stderr))