from langchain_community.document_transformers import Html2TextTransformer
urls = ["https://www.espn.com", "https://lilianweng.github.io/posts/2023-06-23-agent/"]
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)
print(docs_transformed[0].page_content[1000:2000])
print(docs_transformed[1].page_content[1000:2000])