# add by elnino
def search_by_author(self,
author: str,
page: int,
) -> SearchResult:
request_response = self.__make_request(url=f'https://nhentai.net/artist/{author}',
params={'page': page})
query = 'artist/' + author
# below code is the same as the search() function
soup = BeautifulSoup(request_response.text, 'html.parser')
search_results_container = soup.find('div', {'class': 'container'})
pagination_container = soup.find('section', {'class': 'pagination'})
last_page_a_tag = pagination_container.find('a',
{'class': 'last'}) if pagination_container else None # type: ignore
total_pages = int(last_page_a_tag['href'].split('=')[-1]) if last_page_a_tag else 1 # type: ignore
if not search_results_container:
return SearchResult(query=query,
total_pages=total_pages,
page=page,
total_results=0,
results=[])
search_results = search_results_container.find_all('div', {'class': 'gallery'}) # type: ignore
if not search_results:
return SearchResult(query=query,
total_pages=total_pages,
page=page,
total_results=0,
results=[])
a_tags_with_doujin_id = [gallery.find('a', {'class': 'cover'}) for gallery in search_results]
thumbs = []
for a_tag in a_tags_with_doujin_id:
if a_tag is None: continue
doujin_id = a_tag['href'].split('/')[-2]
if doujin_id == '': continue
result_cover = a_tag.find('img', {'class': 'lazyload'})
cover_uri = None
width = None
height = None
if result_cover is not None:
cover_uri = result_cover['data-src']
width = result_cover['width']
height = result_cover['height']
result_caption = a_tag.find('div', {'class': 'caption'})
caption = None
if result_caption is not None:
caption = result_caption.text
thumbs.append(Thumb(id=doujin_id,
cover=Image(uri=cover_uri or '',
mime=MIME.J,
width=width or 0,
height=height or 0),
title=caption or ''))
return SearchResult(query=query,
total_pages=total_pages,
page=page,
total_results=25 * total_pages if pagination_container else len(thumbs),
results=thumbs)
and tested with the following code:
config = CloudFlareConfig(
user_agent='',
cf_clearance=''
)
nHentai = NHentai(config)
resultList = nHentai.search_by_author('yd', 1).results
for result in resultList:
print(result)
and it works.
Similarily, the tag page can also be searched in this way.
So, I think the process of parsing html can be abstracted to a common function, which can be called by search(), search_by_author() and search_by_tag().
I am not good at python, and the above function is enough for my use. So I make this as an issue, instead of a PR.
Searching by author, or viewing all doujinshi of a author, is a useful function to me.
We can observe that the artist page and the search page are quite alike (maybe the same) in nhentai.
So, I added a function to nhentai.py:
and tested with the following code:
and it works.
Similarily, the tag page can also be searched in this way.
So, I think the process of parsing html can be abstracted to a common function, which can be called by
search()
,search_by_author()
andsearch_by_tag()
.I am not good at python, and the above function is enough for my use. So I make this as an issue, instead of a PR.