Closed mohammad-fahs closed 2 months ago
package com.ZahraaSaleh.web_scraper;
public class CosmalineSoftWave {
String title;
String link;
String imageUrl;
String summaryDetails;
String price;
public String getPrice() {
return price;
}
public void setPrice(String price) {
this.price = price;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getLink() {
return link;
}
public void setLink(String link) {
this.link = link;
}
public String getImageUrl() {
return imageUrl;
}
public void setImageUrl(String imageUrl) {
this.imageUrl = imageUrl;
}
public String getSummaryDetails() {
return summaryDetails;
}
public void setSummaryDetails(String summaryDetails) {
this.summaryDetails = summaryDetails;
}
public CosmalineSoftWave(String title, String link, String imageUrl, String summaryDetails, String price) {
this.title = title;
this.link = link;
this.imageUrl = imageUrl;
this.summaryDetails = summaryDetails;
this.price = price;
}
@Override
public String toString() {
return "CosmalineSoftWave{" +
"title='" + title + '\'' +
", link='" + link + '\'' +
", imageUrl='" + imageUrl + '\'' +
", summaryDetails='" + summaryDetails + '\'' +
", price='" + price + '\'' +
'}';
}
}
package com.ZahraaSaleh.web_scraper;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.boot.CommandLineRunner;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
@Component
public class ScrapingApplication implements CommandLineRunner {
@Override
public void run(String... args) throws Exception{
String url;
Scanner sc= new Scanner(System.in);
System.out.println("please enter the url to scrape ");
url= sc.next();
//Scraping
List<CosmalineSoftWave> cosmalineSoftWaves = new ArrayList<>();
Document document = Jsoup.connect(url).get();
Elements products = document.select("td.oe_product");
for (Element productElement : products) {
// Extract product name
String productName = productElement.select("h6.o_wsale_products_item_title a").text();
// Extract product URL
String productUrl = productElement.select("h6.o_wsale_products_item_title a").attr("href");
// Extract image URL
String imageUrl = productElement.select("div.oe_product_image img").attr("src");
// Extract price
String price = productElement.select("div.product_price span.oe_currency_value").text();
// Print the extracted data
System.out.println("Product Name: " + productName);
System.out.println("Product URL: " + productUrl);
System.out.println("Image URL: " + imageUrl);
System.out.println("Price: $" + price);
System.out.println("---------------------------");
}
}
}
Task 2 :
Objective:
In this task, you will initialize a new Spring Boot project, add the JSoup dependency, and write a simple Java program to scrape data from a website of your choice. The goal is to get hands-on experience with setting up a Spring Boot project, using an external library (JSoup), and applying web scraping techniques.
Instructions:
com.yourname
web-scraper
Web Scraper
com.yourname.webscraper
Spring Web
dependency (to allow adding more features later).pom.xml
file in the root directory of your project.<dependencies>
tag:pom.xml
file and allow IntelliJ to update the Maven project to download the JSoup library.com.yourname.webscraper
package that implements theCommandLineRunner
interface.run
method, use JSoup to connect to the website you chose and scrape the data.Submit Your Work:
CommandLineRunner
.Resources that can help: