You can use itertools.zip_longest
to “tie” the rows together.
For example:
import requests
from itertools import zip_longest
from bs4 import BeautifulSoup, NavigableString, Tag
url = 'http://www.abyznewslinks.com/costa.htm'
soup = BeautifulSoup(requests.get(url).content, 'html.parser')
for tr in soup.select('table')[4:-1]:
tds = []
for f in tr.select('font'):
tds.append([])
for c in f.contents:
if isinstance(c, NavigableString) and c.strip():
tds[-1].append(c.strip())
elif isinstance(c, Tag) and c.name == 'a':
tds[-1].append([c.text, c['href']])
for column in zip_longest(*tds, fillvalue=''):
print(column)
print('-' * 80)
Prints:
('Costa Rica Newspapers and News Media - National',)
--------------------------------------------------------------------------------
('Costa Rica - Broadcast News Media',)
--------------------------------------------------------------------------------
('National', ['Columbia', 'https://columbia.co.cr/'], 'BC', 'GI', 'SPA', 'Radio')
('National', ['Monumental', 'http://www.monumental.co.cr/'], 'BC', 'GI', 'SPA', 'Radio')
('National', ['Multimedios', 'https://www.multimedios.cr/'], 'BC', 'GI', 'SPA', 'TV')
('National', ['Repretel', 'http://www.repretel.com/'], 'BC', 'GI', 'SPA', 'TV')
('National', ['Sinart', 'http://www.costaricanoticias.cr/'], 'BC', 'GI', 'SPA', 'Radio TV')
('National', ['Teletica', 'https://www.teletica.com/'], 'BC', 'GI', 'SPA', 'TV')
--------------------------------------------------------------------------------
('Costa Rica - Internet News Media',)
--------------------------------------------------------------------------------
('National', ['A Diario CR', 'http://adiariocr.com/'], 'IN', 'GI', 'SPA')
('National', ['AM Costa Rica', 'http://www.amcostarica.com/'], 'IN', 'GI', 'ENG')
('National', ['AM Prensa', 'https://amprensa.com/'], 'IN', 'GI', 'SPA')
('National', ['BS Noticias', 'http://www.bsnoticias.cr/'], 'IN', 'GI', 'SPA')
('National', ['Costa Rica News', 'https://thecostaricanews.com/'], 'IN', 'GI', 'ENG')
('National', ['Costa Rica Star', 'https://news.co.cr/'], 'IN', 'GI', 'ENG')
('National', ['Costarican Times', 'https://www.costaricantimes.com/'], 'IN', 'GI', 'ENG')
('National', ['CR Hoy', 'https://www.crhoy.com/'], 'IN', 'GI', 'SPA')
('National', ['Delfino', 'https://delfino.cr/'], 'IN', 'GI', 'SPA')
('National', ['El Guardian', 'https://elguardian.cr/'], 'IN', 'GI', 'SPA')
('National', ['El Mundo', 'https://www.elmundo.cr/'], 'IN', 'GI', 'SPA')
('National', ['El Pais', 'http://www.elpais.cr/'], 'IN', 'GI', 'SPA')
('National', ['El Periodico CR', 'https://elperiodicocr.com/'], 'IN', 'GI', 'SPA')
('National', ['Informa Tico', 'http://informa-tico.com/'], 'IN', 'GI', 'SPA')
('National', ['La Prensa Libre', 'http://www.laprensalibre.cr/'], 'IN', 'GI', 'SPA')
('National', ['NCR Noticias Costa Rica', 'https://ncrnoticias.com/'], 'IN', 'GI', 'SPA')
('National', ['No Ticiero', 'http://no.ticiero.com/'], 'IN', 'GI', 'SPA')
('National', ['Noticias al Instante Costa Rica', 'https://www.noticiasalinstante.cr/'], 'IN', 'GI', 'SPA')
('National', ['Noticias Costa Rica', 'https://noticiascostarica.com/'], 'IN', 'GI', 'SPA')
('National', ['Q Costa Rica', 'http://qcostarica.com/'], 'IN', 'GI', 'ENG')
('National', ['Tico Deporte', 'https://www.ticodeporte.com/'], 'IN', 'SP', 'SPA')
('National', ['Today Costa Rica', 'http://todaycostarica.com/'], 'IN', 'GI', 'ENG')
--------------------------------------------------------------------------------
('Costa Rica - Magazine News Media',)
--------------------------------------------------------------------------------
('National', ['EKA', 'https://www.ekaenlinea.com/'], 'MG', 'BU', 'SPA')
--------------------------------------------------------------------------------
('Costa Rica - Newspaper News Media',)
--------------------------------------------------------------------------------
('National', ['Diario Extra', 'http://www.diarioextra.com/'], 'NP', 'GI', 'SPA')
('National', ['La Nacion', 'https://www.nacion.com/'], 'NP', 'GI', 'SPA')
('National', ['La Republica', 'https://www.larepublica.net/'], 'NP', 'GI', 'SPA')
('National', ['La Teja', 'https://www.lateja.cr/'], 'NP', 'GI', 'SPA')
--------------------------------------------------------------------------------
('Costa Rica Newspapers and News Media - Local',)
--------------------------------------------------------------------------------
('Alajuela',)
--------------------------------------------------------------------------------
('Alajuela', ['El Sol', 'https://elsoldeoccidente.com/'], 'NP', 'GI', 'SPA')
('Alajuela', ['La Segunda', 'http://www.periodicolasegundacr.com/'], 'NP', 'GI', 'SPA')
('Grecia', ['Mi Tierra', 'http://www.periodicomitierra.com/'], 'NP', 'GI', 'SPA')
('San Carlos', ['La Region', 'http://laregion.cr/'], 'NP', 'GI', 'SPA')
('San Carlos', ['San Carlos al Dia', 'https://www.sancarlosaldia.com/'], 'IN', 'GI', 'SPA')
('San Carlos', ['San Carlos Digital', 'https://sancarlosdigital.com/'], 'IN', 'GI', 'SPA')
--------------------------------------------------------------------------------
('Cartago',)
--------------------------------------------------------------------------------
('Cartago', ['Cartago Hoy', 'http://www.cartagohoy.com/'], 'IN', 'IG', 'SPA')
('Paraiso', ['Brujos Paraiso', 'http://www.brujosparaiso.com/'], 'IN', 'IG', 'SPA')
--------------------------------------------------------------------------------
('Guanacaste',)
--------------------------------------------------------------------------------
('Bagaces', ['Guanacaste na la Altura', 'https://www.guanacastealaaltura.com/'], 'NP', 'GI', 'SPA', 'TV')
('Filadelfia', ['El Independiente', 'https://diariodigitalelindependiente.com/'], 'IN', 'GI', 'SPA', 'Radio')
('Liberia', ['Canal 5 Guanacaste', 'http://www.canal5guanacaste.com/'], 'BC', 'GI', 'SPA', '')
('Liberia', ['Guana Noticias', 'https://guananoticias.com/'], 'IN', 'GI', 'SPA', '')
('Liberia', ['Mensaje', 'https://www.periodicomensaje.com/'], 'NP', 'GI', 'SPA', '')
('Liberia', ['Mundo Guanacaste', 'http://www.mundoguanacaste.com/'], 'IN', 'GI', 'SPA', '')
('Liberia', ['NTG Noticias', 'https://ntgnoticias.com/'], 'IN', 'GI', 'SPA', '')
('Liberia', ['Radio Pampa', 'http://www.radiolapampa.net/'], 'BC', 'GI', 'SPA', '')
('Nicoya', ['La Voz de Guanacaste', 'https://vozdeguanacaste.com/'], 'NP', 'GI', 'SPA', '')
('Nicoya', ['Voice of Guanacaste', 'https://vozdeguanacaste.com/en'], 'NP', 'GI', 'ENG', '')
('Tamarindo', ['Tamarindo News', 'http://tamarindonews.com/'], 'IN', 'GI', 'ENG', '')
--------------------------------------------------------------------------------
('Heredia',)
--------------------------------------------------------------------------------
('Flores', ['El Florense', 'http://elflorense.com/'], 'NP', 'GI', 'SPA')
('Heredia', ['Fortinoticias', 'http://fortinoticias.com/'], 'IN', 'GI', 'SPA')
--------------------------------------------------------------------------------
('Limon',)
--------------------------------------------------------------------------------
('Limon', ['El Independiente', 'https://www.elindependiente.co.cr/'], 'NP', 'GI', 'SPA')
('Limon', ['Limon Hoy', 'https://www.limonhoy.com/'], 'IN', 'GI', 'SPA')
--------------------------------------------------------------------------------
('Puntarenas',)
--------------------------------------------------------------------------------
('Paquera', ['Mi Prensa', 'http://www.miprensacr.com/'], 'IN', 'GI', 'SPA')
('Puntarenas', ['Puntarenas Se Oye', 'https://www.puntarenasseoye.com/'], 'IN', 'GI', 'SPA')
--------------------------------------------------------------------------------
('San Jose',)
--------------------------------------------------------------------------------
('Acosta', ['El Jornal', 'http://eljornalcr.com/'], 'NP', 'GI', 'SPA', 'TV')
('Goicochea', ['La Voz de Goicochea', 'https://www.lavozdegoicoechea.info/'], 'IN', 'GI', 'SPA', 'TV')
('Perez Zeledon', ['Canal 14', 'http://www.tvsur.co.cr/'], 'BC', 'GI', 'SPA', '')
('Perez Zeledon', ['Enlace', 'https://www.enlacecr.com/'], 'NP', 'GI', 'SPA', '')
('Perez Zeledon', ['PZ Actual', 'http://www.pzactual.com/'], 'IN', 'GI', 'SPA', '')
('Perez Zeledon', ['PZ Noticias', 'http://www.pznoticias.org/'], 'IN', 'GI', 'SPA', '')
('San Jose', ['Diario Extra', 'http://www.diarioextra.com/'], 'NP', 'GI', 'SPA', '')
('San Jose', ['El Financiero', 'https://www.elfinancierocr.com/'], 'NP', 'BU', 'SPA', '')
('San Jose', ['Extra TV', 'http://www.extratv42.com/'], 'BC', 'GI', 'SPA', '')
('San Jose', ['La Gaceta', 'http://www.gaceta.go.cr/gaceta/'], 'NP', 'GO', 'SPA', '')
('San Jose', ['La Nacion', 'https://www.nacion.com/'], 'NP', 'GI', 'SPA', '')
('San Jose', ['La Republica', 'https://www.larepublica.net/'], 'NP', 'GI', 'SPA', '')
('San Jose', ['La Teja', 'https://www.lateja.cr/'], 'NP', 'GI', 'SPA', '')
('San Jose', ['Tico Times', 'http://www.ticotimes.net/'], 'NP', 'GI', 'ENG', '')
('Tibas', ['Gente', 'http://periodicogente.co.cr/'], 'NP', 'GI', 'SPA', '')
--------------------------------------------------------------------------------