@param schooList HTML containing links of the form <a href="school-key">School Name</a>
@return A list of team key and team name pairs.
*/
def extractNamesAndKeys(schooList: Option[Node]): Iterator[(String, String)] = {
for (d <- schooList.iterator;
link <- d \ "a";
href <- attrValue(link, "href") if href.startsWith("/schools/"))
yield {
href.substring(9) -> link.text
}
}
package junk
import scala.util.control.Exception. import scala.xml. import java.io.{Reader, StringReader} import org.ccil.cowan.tagsoup.jaxp.SAXFactoryImpl import scala.xml.parsing.NoBindingFactoryAdapter
object Bitch { def loadHtml(url: String): Option[Node] = { catching(classOf[Exception]).opt { val adapter = new NoBindingFactoryAdapter() adapter.loadXML(new InputSource(url), new SAXFactoryImpl().newSAXParser()) } }
def loadHtmlFromReader(r: Reader): Option[Node] = { catching(classOf[Exception]).opt { val adapter = new NoBindingFactoryAdapter() adapter.loadXML(new InputSource(r), new SAXFactoryImpl().newSAXParser()) } }
def loadHtmlFromString(s: String): Option[Node] = { val node: Option[Node] = loadHtmlFromReader(new StringReader(s)) node }
def loadFile(fileName: String): String = { scala.io.Source.fromFile(fileName).mkString }
def teamNamesFromAlpha(node: Node) = { val schooList: Option[Node] = (node \ "div").find(n => attrMatch(n, "id", "school-list")).flatMap(_.headOption) extractNamesAndKeys(schooList) }
/*
@return A list of team key and team name pairs. */ def extractNamesAndKeys(schooList: Option[Node]): Iterator[(String, String)] = { for (d <- schooList.iterator; link <- d \ "a"; href <- attrValue(link, "href") if href.startsWith("/schools/")) yield { href.substring(9) -> link.text } }
def attrValue(n: Node, attr: String): Option[String] = { n.attribute(attr).flatMap(.headOption).map(.text) }
def attrMatch(n: Node, attr: String, value: String): Boolean = { n.attribute(attr) match { case Some(nodeStr) => nodeStr.exists(.text == value) case => false } }
def scrapeTeamPage(n:Node) = { val sn = schoolName(n) println(sn) val sl = schoolLogo(n) println(sl) val pc = schoolPrimaryColor(n) println(pc) }
def schoolName(n: Node): Option[String] = { (n \ "span").find(n => attrMatch(n, "class", "school-name")).map(_.text) }
def schoolLogo(n: Node): Option[String] = { (n \ "span").find(n => attrMatch(n, "class", "school-logo")).map( \ "img").flatMap(.headOption).flatMap(nn=>attrValue(nn,"src")) }
def schoolPrimaryColor(n: Node): Option[String] = { (n \ "span").find(n => attrMatch(n, "class", "school-logo")).flatMap(nn=>attrValue(nn,"style")).map(_.replaceFirst("border-color:","").replace(";","").trim)
}
def main(args: Array[String]) { val s: String = loadFile("teamsg.htm") val ts = loadHtmlFromString(s).map(n => teamNamesFromAlpha(n)).getOrElse(List.empty[String]) ts.foreach(println(_))
loadHtmlFromString(loadFile("clemsoneg.html")).map(n => scrapeTeamPage(n))
}
}