Closed bdcallen closed 5 years ago
@iangow I just did this
library(RCurl)
fileURL <- "https://www.sec.gov/Archives/edgar/data/821026/000120919118029790/doc4.xml"
xData <- getURL(fileURL)
data <- xmlParse(xData)
xml_data <- xmlToList(data)
xml_data
and got
$schemaVersion
[1] "X0306"
$documentType
[1] "4"
$periodOfReport
[1] "2018-05-11"
$notSubjectToSection16
[1] "0"
$issuer
$issuer$issuerCik
[1] "0000821026"
$issuer$issuerName
[1] "Andersons, Inc."
$issuer$issuerTradingSymbol
[1] "ANDE"
$reportingOwner
$reportingOwner$reportingOwnerId
$reportingOwner$reportingOwnerId$rptOwnerCik
[1] "0001200124"
$reportingOwner$reportingOwnerId$rptOwnerName
[1] "KING ROBERT J JR"
$reportingOwner$reportingOwnerAddress
$reportingOwner$reportingOwnerAddress$rptOwnerStreet1
[1] "1947 BRIARFIELD BLVD."
$reportingOwner$reportingOwnerAddress$rptOwnerStreet2
NULL
$reportingOwner$reportingOwnerAddress$rptOwnerCity
[1] "MAUMEE"
$reportingOwner$reportingOwnerAddress$rptOwnerState
[1] "OH"
$reportingOwner$reportingOwnerAddress$rptOwnerZipCode
[1] "43537"
$reportingOwner$reportingOwnerAddress$rptOwnerStateDescription
NULL
$reportingOwner$reportingOwnerRelationship
$reportingOwner$reportingOwnerRelationship$isDirector
[1] "1"
$reportingOwner$reportingOwnerRelationship$isOfficer
[1] "0"
$reportingOwner$reportingOwnerRelationship$isTenPercentOwner
[1] "0"
$reportingOwner$reportingOwnerRelationship$isOther
[1] "0"
$nonDerivativeTable
$nonDerivativeTable$nonDerivativeTransaction
$nonDerivativeTable$nonDerivativeTransaction$securityTitle
$nonDerivativeTable$nonDerivativeTransaction$securityTitle$value
[1] "COMMON STOCK"
$nonDerivativeTable$nonDerivativeTransaction$transactionDate
$nonDerivativeTable$nonDerivativeTransaction$transactionDate$value
[1] "2018-05-11"
$nonDerivativeTable$nonDerivativeTransaction$deemedExecutionDate
NULL
$nonDerivativeTable$nonDerivativeTransaction$transactionCoding
$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$transactionFormType
[1] "4"
$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$transactionCode
[1] "J"
$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$equitySwapInvolved
[1] "0"
$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$footnoteId
id
"F1"
$nonDerivativeTable$nonDerivativeTransaction$transactionTimeliness
$nonDerivativeTable$nonDerivativeTransaction$transactionTimeliness$value
NULL
$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts
$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionShares
$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionShares$value
[1] "1.9"
$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionPricePerShare
$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionPricePerShare$value
[1] "32.65"
$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionAcquiredDisposedCode
$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionAcquiredDisposedCode$value
[1] "A"
$nonDerivativeTable$nonDerivativeTransaction$postTransactionAmounts
$nonDerivativeTable$nonDerivativeTransaction$postTransactionAmounts$sharesOwnedFollowingTransaction
$nonDerivativeTable$nonDerivativeTransaction$postTransactionAmounts$sharesOwnedFollowingTransaction$value
[1] "9314.9"
$nonDerivativeTable$nonDerivativeTransaction$ownershipNature
$nonDerivativeTable$nonDerivativeTransaction$ownershipNature$directOrIndirectOwnership
$nonDerivativeTable$nonDerivativeTransaction$ownershipNature$directOrIndirectOwnership$value
[1] "D"
$nonDerivativeTable$nonDerivativeTransaction
$nonDerivativeTable$nonDerivativeTransaction$securityTitle
$nonDerivativeTable$nonDerivativeTransaction$securityTitle$value
[1] "COMMON STOCK"
$nonDerivativeTable$nonDerivativeTransaction$transactionDate
$nonDerivativeTable$nonDerivativeTransaction$transactionDate$value
[1] "2018-05-11"
$nonDerivativeTable$nonDerivativeTransaction$deemedExecutionDate
NULL
$nonDerivativeTable$nonDerivativeTransaction$transactionCoding
$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$transactionFormType
[1] "4"
$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$transactionCode
[1] "A"
$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$equitySwapInvolved
[1] "0"
$nonDerivativeTable$nonDerivativeTransaction$transactionTimeliness
$nonDerivativeTable$nonDerivativeTransaction$transactionTimeliness$value
NULL
$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts
$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionShares
$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionShares$value
[1] "2286"
$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionPricePerShare
$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionPricePerShare$value
[1] "0.00"
$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionAcquiredDisposedCode
$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionAcquiredDisposedCode$value
[1] "A"
$nonDerivativeTable$nonDerivativeTransaction$postTransactionAmounts
$nonDerivativeTable$nonDerivativeTransaction$postTransactionAmounts$sharesOwnedFollowingTransaction
$nonDerivativeTable$nonDerivativeTransaction$postTransactionAmounts$sharesOwnedFollowingTransaction$value
[1] "11600.9"
$nonDerivativeTable$nonDerivativeTransaction$ownershipNature
$nonDerivativeTable$nonDerivativeTransaction$ownershipNature$directOrIndirectOwnership
$nonDerivativeTable$nonDerivativeTransaction$ownershipNature$directOrIndirectOwnership$value
[1] "D"
$nonDerivativeTable$nonDerivativeHolding
$nonDerivativeTable$nonDerivativeHolding$securityTitle
$nonDerivativeTable$nonDerivativeHolding$securityTitle$value
[1] "COMMON STOCK"
$nonDerivativeTable$nonDerivativeHolding$postTransactionAmounts
$nonDerivativeTable$nonDerivativeHolding$postTransactionAmounts$sharesOwnedFollowingTransaction
$nonDerivativeTable$nonDerivativeHolding$postTransactionAmounts$sharesOwnedFollowingTransaction$value
[1] "18970"
$nonDerivativeTable$nonDerivativeHolding$ownershipNature
$nonDerivativeTable$nonDerivativeHolding$ownershipNature$directOrIndirectOwnership
$nonDerivativeTable$nonDerivativeHolding$ownershipNature$directOrIndirectOwnership$value
[1] "I"
$nonDerivativeTable$nonDerivativeHolding$ownershipNature$natureOfOwnership
$nonDerivativeTable$nonDerivativeHolding$ownershipNature$natureOfOwnership$value
[1] "Robert J. King Jr. Trust u/a dated Dec. 20, 2013"
$footnotes
$footnotes$footnote
$footnotes$footnote$text
[1] "Shares in lieu of cash dividend."
$footnotes$footnote$.attrs
id
"F1"
$remarks
NULL
$ownerSignature
$ownerSignature$signatureName
[1] "Robert J. King, by Melissa Trippel, Limited Power of Attorney"
$ownerSignature$signatureDate
[1] "2018-05-14"
It seems there are a lot of nodes that have subnodes, and that the depth of the subnodes is not always the same between different nodes. I'm wondering if all these entries should be put in the one dataframe, or in several. By the way, I tried xmlToDataFrame(data)
, and it didn't work, which I suspect is due to the different depths of subnodes.
Note that your code requires library(XML)
. I believe xml2
is a newer alternative.
library(xml2)
fileURL <- "https://www.sec.gov/Archives/edgar/data/821026/000120919118029790/doc4.xml"
xData <- read_xml(fileURL)
xml_data <- as_list(xData)
xml_data
#> $ownershipDocument
#> $ownershipDocument$schemaVersion
#> $ownershipDocument$schemaVersion[[1]]
#> [1] "X0306"
#>
#>
#> $ownershipDocument$documentType
#> $ownershipDocument$documentType[[1]]
#> [1] "4"
#>
#>
#> $ownershipDocument$periodOfReport
#> $ownershipDocument$periodOfReport[[1]]
#> [1] "2018-05-11"
#>
#>
#> $ownershipDocument$notSubjectToSection16
#> $ownershipDocument$notSubjectToSection16[[1]]
#> [1] "0"
#>
#>
#> $ownershipDocument$issuer
#> $ownershipDocument$issuer$issuerCik
#> $ownershipDocument$issuer$issuerCik[[1]]
#> [1] "0000821026"
#>
#>
#> $ownershipDocument$issuer$issuerName
#> $ownershipDocument$issuer$issuerName[[1]]
#> [1] "Andersons, Inc."
#>
#>
#> $ownershipDocument$issuer$issuerTradingSymbol
#> $ownershipDocument$issuer$issuerTradingSymbol[[1]]
#> [1] "ANDE"
#>
#>
#>
#> $ownershipDocument$reportingOwner
#> $ownershipDocument$reportingOwner$reportingOwnerId
#> $ownershipDocument$reportingOwner$reportingOwnerId$rptOwnerCik
#> $ownershipDocument$reportingOwner$reportingOwnerId$rptOwnerCik[[1]]
#> [1] "0001200124"
#>
#>
#> $ownershipDocument$reportingOwner$reportingOwnerId$rptOwnerName
#> $ownershipDocument$reportingOwner$reportingOwnerId$rptOwnerName[[1]]
#> [1] "KING ROBERT J JR"
#>
#>
#>
#> $ownershipDocument$reportingOwner$reportingOwnerAddress
#> $ownershipDocument$reportingOwner$reportingOwnerAddress$rptOwnerStreet1
#> $ownershipDocument$reportingOwner$reportingOwnerAddress$rptOwnerStreet1[[1]]
#> [1] "1947 BRIARFIELD BLVD."
#>
#>
#> $ownershipDocument$reportingOwner$reportingOwnerAddress$rptOwnerStreet2
#> list()
#>
#> $ownershipDocument$reportingOwner$reportingOwnerAddress$rptOwnerCity
#> $ownershipDocument$reportingOwner$reportingOwnerAddress$rptOwnerCity[[1]]
#> [1] "MAUMEE"
#>
#>
#> $ownershipDocument$reportingOwner$reportingOwnerAddress$rptOwnerState
#> $ownershipDocument$reportingOwner$reportingOwnerAddress$rptOwnerState[[1]]
#> [1] "OH"
#>
#>
#> $ownershipDocument$reportingOwner$reportingOwnerAddress$rptOwnerZipCode
#> $ownershipDocument$reportingOwner$reportingOwnerAddress$rptOwnerZipCode[[1]]
#> [1] "43537"
#>
#>
#> $ownershipDocument$reportingOwner$reportingOwnerAddress$rptOwnerStateDescription
#> list()
#>
#>
#> $ownershipDocument$reportingOwner$reportingOwnerRelationship
#> $ownershipDocument$reportingOwner$reportingOwnerRelationship$isDirector
#> $ownershipDocument$reportingOwner$reportingOwnerRelationship$isDirector[[1]]
#> [1] "1"
#>
#>
#> $ownershipDocument$reportingOwner$reportingOwnerRelationship$isOfficer
#> $ownershipDocument$reportingOwner$reportingOwnerRelationship$isOfficer[[1]]
#> [1] "0"
#>
#>
#> $ownershipDocument$reportingOwner$reportingOwnerRelationship$isTenPercentOwner
#> $ownershipDocument$reportingOwner$reportingOwnerRelationship$isTenPercentOwner[[1]]
#> [1] "0"
#>
#>
#> $ownershipDocument$reportingOwner$reportingOwnerRelationship$isOther
#> $ownershipDocument$reportingOwner$reportingOwnerRelationship$isOther[[1]]
#> [1] "0"
#>
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$securityTitle
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$securityTitle$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$securityTitle$value[[1]]
#> [1] "COMMON STOCK"
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionDate
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionDate$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionDate$value[[1]]
#> [1] "2018-05-11"
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$deemedExecutionDate
#> list()
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionCoding
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$transactionFormType
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$transactionFormType[[1]]
#> [1] "4"
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$transactionCode
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$transactionCode[[1]]
#> [1] "J"
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$equitySwapInvolved
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$equitySwapInvolved[[1]]
#> [1] "0"
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$footnoteId
#> list()
#> attr(,"id")
#> [1] "F1"
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionTimeliness
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionTimeliness$value
#> list()
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionShares
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionShares$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionShares$value[[1]]
#> [1] "1.9"
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionPricePerShare
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionPricePerShare$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionPricePerShare$value[[1]]
#> [1] "32.65"
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionAcquiredDisposedCode
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionAcquiredDisposedCode$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionAcquiredDisposedCode$value[[1]]
#> [1] "A"
#>
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$postTransactionAmounts
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$postTransactionAmounts$sharesOwnedFollowingTransaction
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$postTransactionAmounts$sharesOwnedFollowingTransaction$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$postTransactionAmounts$sharesOwnedFollowingTransaction$value[[1]]
#> [1] "9314.9"
#>
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$ownershipNature
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$ownershipNature$directOrIndirectOwnership
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$ownershipNature$directOrIndirectOwnership$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$ownershipNature$directOrIndirectOwnership$value[[1]]
#> [1] "D"
#>
#>
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$securityTitle
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$securityTitle$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$securityTitle$value[[1]]
#> [1] "COMMON STOCK"
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionDate
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionDate$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionDate$value[[1]]
#> [1] "2018-05-11"
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$deemedExecutionDate
#> list()
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionCoding
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$transactionFormType
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$transactionFormType[[1]]
#> [1] "4"
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$transactionCode
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$transactionCode[[1]]
#> [1] "A"
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$equitySwapInvolved
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionCoding$equitySwapInvolved[[1]]
#> [1] "0"
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionTimeliness
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionTimeliness$value
#> list()
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionShares
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionShares$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionShares$value[[1]]
#> [1] "2286"
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionPricePerShare
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionPricePerShare$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionPricePerShare$value[[1]]
#> [1] "0.00"
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionAcquiredDisposedCode
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionAcquiredDisposedCode$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$transactionAmounts$transactionAcquiredDisposedCode$value[[1]]
#> [1] "A"
#>
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$postTransactionAmounts
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$postTransactionAmounts$sharesOwnedFollowingTransaction
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$postTransactionAmounts$sharesOwnedFollowingTransaction$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$postTransactionAmounts$sharesOwnedFollowingTransaction$value[[1]]
#> [1] "11600.9"
#>
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$ownershipNature
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$ownershipNature$directOrIndirectOwnership
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$ownershipNature$directOrIndirectOwnership$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeTransaction$ownershipNature$directOrIndirectOwnership$value[[1]]
#> [1] "D"
#>
#>
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeHolding
#> $ownershipDocument$nonDerivativeTable$nonDerivativeHolding$securityTitle
#> $ownershipDocument$nonDerivativeTable$nonDerivativeHolding$securityTitle$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeHolding$securityTitle$value[[1]]
#> [1] "COMMON STOCK"
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeHolding$postTransactionAmounts
#> $ownershipDocument$nonDerivativeTable$nonDerivativeHolding$postTransactionAmounts$sharesOwnedFollowingTransaction
#> $ownershipDocument$nonDerivativeTable$nonDerivativeHolding$postTransactionAmounts$sharesOwnedFollowingTransaction$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeHolding$postTransactionAmounts$sharesOwnedFollowingTransaction$value[[1]]
#> [1] "18970"
#>
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeHolding$ownershipNature
#> $ownershipDocument$nonDerivativeTable$nonDerivativeHolding$ownershipNature$directOrIndirectOwnership
#> $ownershipDocument$nonDerivativeTable$nonDerivativeHolding$ownershipNature$directOrIndirectOwnership$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeHolding$ownershipNature$directOrIndirectOwnership$value[[1]]
#> [1] "I"
#>
#>
#>
#> $ownershipDocument$nonDerivativeTable$nonDerivativeHolding$ownershipNature$natureOfOwnership
#> $ownershipDocument$nonDerivativeTable$nonDerivativeHolding$ownershipNature$natureOfOwnership$value
#> $ownershipDocument$nonDerivativeTable$nonDerivativeHolding$ownershipNature$natureOfOwnership$value[[1]]
#> [1] "Robert J. King Jr. Trust u/a dated Dec. 20, 2013"
#>
#>
#>
#>
#>
#>
#> $ownershipDocument$footnotes
#> $ownershipDocument$footnotes$footnote
#> $ownershipDocument$footnotes$footnote[[1]]
#> [1] "Shares in lieu of cash dividend."
#>
#> attr(,"id")
#> [1] "F1"
#>
#>
#> $ownershipDocument$remarks
#> list()
#>
#> $ownershipDocument$ownerSignature
#> $ownershipDocument$ownerSignature$signatureName
#> $ownershipDocument$ownerSignature$signatureName[[1]]
#> [1] "Robert J. King, by Melissa Trippel, Limited Power of Attorney"
#>
#>
#> $ownershipDocument$ownerSignature$signatureDate
#> $ownershipDocument$ownerSignature$signatureDate[[1]]
#> [1] "2018-05-14"
Created on 2018-10-02 by the reprex package (v0.2.1)
PostgreSQL can store XML documents, but the functionality is limited. Perhaps JSON would be a better format. At table with the partial URL from filing_docs
and the JSON equivalent of the XML might be quite workable. One can easily extract values from JSON using functions in PostgreSQL.
library(XML)
library(rjson)
library(RCurl)
#> Loading required package: bitops
xml_url_to_json <- function(url) {
fileURL <- file.path("https://www.sec.gov/Archives", url)
xml_parse <- xmlParse(getURL(fileURL))
xml_root <- xmlRoot(xml_parse)
xml_list <- xmlToList(xml_root,addAttributes = T, simplify = F)
xml_rjson <- toJSON(xml_list)
xml_rjson
}
temp <- xml_url_to_json("edgar/data/821026/000120919118029790/doc4.xml")
cat(temp)
#> {"schemaVersion":"X0306","documentType":"4","periodOfReport":"2018-05-11","notSubjectToSection16":"0","issuer":{"issuerCik":"0000821026","issuerName":"Andersons, Inc.","issuerTradingSymbol":"ANDE"},"reportingOwner":{"reportingOwnerId":{"rptOwnerCik":"0001200124","rptOwnerName":"KING ROBERT J JR"},"reportingOwnerAddress":{"rptOwnerStreet1":"1947 BRIARFIELD BLVD.","rptOwnerStreet2":null,"rptOwnerCity":"MAUMEE","rptOwnerState":"OH","rptOwnerZipCode":"43537","rptOwnerStateDescription":null},"reportingOwnerRelationship":{"isDirector":"1","isOfficer":"0","isTenPercentOwner":"0","isOther":"0"}},"nonDerivativeTable":{"nonDerivativeTransaction":{"securityTitle":{"value":"COMMON STOCK"},"transactionDate":{"value":"2018-05-11"},"deemedExecutionDate":null,"transactionCoding":{"transactionFormType":"4","transactionCode":"J","equitySwapInvolved":"0","footnoteId":{"id":"F1"}},"transactionTimeliness":{"value":null},"transactionAmounts":{"transactionShares":{"value":"1.9"},"transactionPricePerShare":{"value":"32.65"},"transactionAcquiredDisposedCode":{"value":"A"}},"postTransactionAmounts":{"sharesOwnedFollowingTransaction":{"value":"9314.9"}},"ownershipNature":{"directOrIndirectOwnership":{"value":"D"}}},"nonDerivativeTransaction":{"securityTitle":{"value":"COMMON STOCK"},"transactionDate":{"value":"2018-05-11"},"deemedExecutionDate":null,"transactionCoding":{"transactionFormType":"4","transactionCode":"A","equitySwapInvolved":"0"},"transactionTimeliness":{"value":null},"transactionAmounts":{"transactionShares":{"value":"2286"},"transactionPricePerShare":{"value":"0.00"},"transactionAcquiredDisposedCode":{"value":"A"}},"postTransactionAmounts":{"sharesOwnedFollowingTransaction":{"value":"11600.9"}},"ownershipNature":{"directOrIndirectOwnership":{"value":"D"}}},"nonDerivativeHolding":{"securityTitle":{"value":"COMMON STOCK"},"postTransactionAmounts":{"sharesOwnedFollowingTransaction":{"value":"18970"}},"ownershipNature":{"directOrIndirectOwnership":{"value":"I"},"natureOfOwnership":{"value":"Robert J. King Jr. Trust u/a dated Dec. 20, 2013"}}}},"footnotes":{"footnote":{"text":"Shares in lieu of cash dividend.",".attrs":{"id":"F1"}}},"remarks":null,"ownerSignature":{"signatureName":"Robert J. King, by Melissa Trippel, Limited Power of Attorney","signatureDate":"2018-05-14"}}
Created on 2018-10-02 by the reprex package (v0.2.1)
@iangow I just tried this approach
get_info_from_node <- function(node) {
subnode_names <- names(node)
names <- c()
values <- c()
for(name in subnode_names) {
subnames <- names(node[[name]])
if(is.null(subnames)) {
names <- c(names, name)
if(is.null(node[[name]])) {
values <- c(values, NA)
} else {
values <- c(values, node[[name]])
}
} else {
subtree_df <- get_info_from_node(node[[name]])
names <- c(names, paste(name, subtree_df$names, sep = "_"))
values <- c(values, subtree_df$values)
}
}
df <- data.frame(names, values, stringsAsFactors = FALSE)
return(df)
}
xml_to_df <- function(data) {
raw_df <- get_info_from_node(data)
ncols = dim(raw_df)[1]
data_df <- data.frame(matrix(nrow = 0, ncol = ncols))
colnames(data_df) <- raw_df$names
data_df[1, ] = raw_df$values
return(data_df)
}
Admittedly, for now, the column names are too long, but at least this code works in principle. It basically converts the xml data lists to a dataframe.
Using get_info_from_node
on xml_data
I got
> get_info_from_node(xml_data)
names values
1 schemaVersion X0306
2 documentType 4
3 periodOfReport 2018-05-11
4 notSubjectToSection16 0
5 issuer_issuerCik 0000821026
6 issuer_issuerName Andersons, Inc.
7 issuer_issuerTradingSymbol ANDE
8 reportingOwner_reportingOwnerId_rptOwnerCik 0001200124
9 reportingOwner_reportingOwnerId_rptOwnerName KING ROBERT J JR
10 reportingOwner_reportingOwnerAddress_rptOwnerStreet1 1947 BRIARFIELD BLVD.
11 reportingOwner_reportingOwnerAddress_rptOwnerStreet2 <NA>
12 reportingOwner_reportingOwnerAddress_rptOwnerCity MAUMEE
13 reportingOwner_reportingOwnerAddress_rptOwnerState OH
14 reportingOwner_reportingOwnerAddress_rptOwnerZipCode 43537
15 reportingOwner_reportingOwnerAddress_rptOwnerStateDescription <NA>
16 reportingOwner_reportingOwnerRelationship_isDirector 1
17 reportingOwner_reportingOwnerRelationship_isOfficer 0
18 reportingOwner_reportingOwnerRelationship_isTenPercentOwner 0
19 reportingOwner_reportingOwnerRelationship_isOther 0
20 nonDerivativeTable_nonDerivativeTransaction_securityTitle_value COMMON STOCK
21 nonDerivativeTable_nonDerivativeTransaction_transactionDate_value 2018-05-11
22 nonDerivativeTable_nonDerivativeTransaction_deemedExecutionDate <NA>
23 nonDerivativeTable_nonDerivativeTransaction_transactionCoding_transactionFormType 4
24 nonDerivativeTable_nonDerivativeTransaction_transactionCoding_transactionCode J
25 nonDerivativeTable_nonDerivativeTransaction_transactionCoding_equitySwapInvolved 0
26 nonDerivativeTable_nonDerivativeTransaction_transactionCoding_footnoteId_id F1
27 nonDerivativeTable_nonDerivativeTransaction_transactionTimeliness_value <NA>
28 nonDerivativeTable_nonDerivativeTransaction_transactionAmounts_transactionShares_value 1.9
29 nonDerivativeTable_nonDerivativeTransaction_transactionAmounts_transactionPricePerShare_value 32.65
30 nonDerivativeTable_nonDerivativeTransaction_transactionAmounts_transactionAcquiredDisposedCode_value A
31 nonDerivativeTable_nonDerivativeTransaction_postTransactionAmounts_sharesOwnedFollowingTransaction_value 9314.9
32 nonDerivativeTable_nonDerivativeTransaction_ownershipNature_directOrIndirectOwnership_value D
33 nonDerivativeTable_nonDerivativeTransaction_securityTitle_value COMMON STOCK
34 nonDerivativeTable_nonDerivativeTransaction_transactionDate_value 2018-05-11
35 nonDerivativeTable_nonDerivativeTransaction_deemedExecutionDate <NA>
36 nonDerivativeTable_nonDerivativeTransaction_transactionCoding_transactionFormType 4
37 nonDerivativeTable_nonDerivativeTransaction_transactionCoding_transactionCode J
38 nonDerivativeTable_nonDerivativeTransaction_transactionCoding_equitySwapInvolved 0
39 nonDerivativeTable_nonDerivativeTransaction_transactionCoding_footnoteId_id F1
40 nonDerivativeTable_nonDerivativeTransaction_transactionTimeliness_value <NA>
41 nonDerivativeTable_nonDerivativeTransaction_transactionAmounts_transactionShares_value 1.9
42 nonDerivativeTable_nonDerivativeTransaction_transactionAmounts_transactionPricePerShare_value 32.65
43 nonDerivativeTable_nonDerivativeTransaction_transactionAmounts_transactionAcquiredDisposedCode_value A
44 nonDerivativeTable_nonDerivativeTransaction_postTransactionAmounts_sharesOwnedFollowingTransaction_value 9314.9
45 nonDerivativeTable_nonDerivativeTransaction_ownershipNature_directOrIndirectOwnership_value D
46 nonDerivativeTable_nonDerivativeHolding_securityTitle_value COMMON STOCK
47 nonDerivativeTable_nonDerivativeHolding_postTransactionAmounts_sharesOwnedFollowingTransaction_value 18970
48 nonDerivativeTable_nonDerivativeHolding_ownershipNature_directOrIndirectOwnership_value I
49 nonDerivativeTable_nonDerivativeHolding_ownershipNature_natureOfOwnership_value Robert J. King Jr. Trust u/a dated Dec. 20, 2013
50 footnotes_footnote_text Shares in lieu of cash dividend.
51 footnotes_footnote_.attrs_id F1
52 remarks <NA>
53 ownerSignature_signatureName Robert J. King, by Melissa Trippel, Limited Power of Attorney
54 ownerSignature_signatureDate 2018-05-14
and getting the values of the first few columns of xml_to_df
acting on xml_data
works (showing the full dataframe is rather cumbersome right now),
> eg$remarks
[1] NA
> eg$nonDerivativeTable_nonDerivativeTransaction_securityTitle_value
[1] "COMMON STOCK"
> eg$documentType
[1] "4"
It might be worth understanding how the fields in your data frame above map to those in the TFN data (see #31).
@iangow Just an aside for now, but defining json_to_df
as a very minor modification of `xml_to_df from before,
json_to_df <- function(data_json) {
data_list <- fromJSON(data_json)
raw_df <- get_info_from_node(data_list)
ncols = dim(raw_df)[1]
data_df <- data.frame(matrix(nrow = 0, ncol = ncols))
colnames(data_df) <- raw_df$names
data_df[1, ] = raw_df$values
return(data_df)
}
we can convert a file in JSON format to the type of dataframe I initially made. I just tested this function on temp
, where again
temp <- xml_url_to_json("edgar/data/821026/000120919118029790/doc4.xml")
and it produced the same dataframe that xml_to_df
produces on the original XML file.
@iangow I just been testing my function process_345_filing
for the last couple of hours or so. I've tested it successfully on a few individual cases. I just tried to test it on a sample of 1000 xml documents, I've had a problem with the connections to the database growing out of hand, even though process_345_filing
has a dbDisconnect(pg)
line after doing dbConnect and writing to the database, which lead to me having to shut it down. In that small test, I managed to get data for around 130 filings into the database. If you would like to have a look, the tables are:
edgar.forms345_header
: a table with the heading and signature information from the documentsedgar.forms345_table1
: a table with the non-derivative transactionsedgar.forms345_table2
: a table with the derivative transactionsedgar.forms345_footnotes
: a table with the footnotesedgar.forms345_footnote_indices
: a table containing information on each instance a footnote is indexed in a document.The table forms345_footnotes
should use the name footnote_index
not index
so that the link with edgar.forms345_footnote_indices
is clear (or use index
in both places).
@bdcallen Please investigate these cases below. Perhaps these two tables should be simply one table (the result of the join below).
library(dplyr, warn.conflicts = FALSE)
library(DBI)
Sys.setenv(PGHOST = "10.101.13.99", PGDATABASE = "crsp")
pg <- dbConnect(RPostgres::Postgres())
rs <- dbExecute(pg, "SET search_path TO edgar")
forms345_header <- tbl(pg, "forms345_header")
forms345_table1 <- tbl(pg, "forms345_table1")
forms345_table2 <- tbl(pg, "forms345_table2")
forms345_footnotes <- tbl(pg, "forms345_footnotes")
forms345_footnote_indices <- tbl(pg, "forms345_footnote_indices")
forms345_footnotes %>%
anti_join(
forms345_footnote_indices %>%
rename(index = footnote_index)) %>%
count()
#> Joining, by = c("file_name", "document", "index")
#> # Source: lazy query [?? x 1]
#> # Database: postgres [igow@10.101.13.99:5432/crsp]
#> n
#> <S3: integer64>
#> 1 40
Created on 2018-11-16 by the reprex package (v0.2.1)
Assuming that in #23, all the relevant filings can be found in XML format, we want to write a function that scrapes the data from pages like this