USPTO / PatentPublicData

Utility tools to help download and parse patent data made available to the public
Other
182 stars 80 forks source link

Greenbook returns only even patents #10

Closed patricknee closed 7 years ago

patricknee commented 7 years ago

Opening issue. Haven't found issue yet.

patricknee commented 7 years ago

Control logic in DumpFileAps.read flawed. Testing a fix.

patricknee commented 7 years ago

In initial testing, this following DumpFileAps seems to work:

package gov.uspto.patent.bulk;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.NoSuchElementException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DumpFileAps extends DumpFile {
    private static final Logger LOGGER = LoggerFactory.getLogger(DumpFileAps.class);

    private int currentRecCount;

    private static final String startTag = "PATN";

    public DumpFileAps(File file) {
        super(file);
    }

    public DumpFileAps(String name, BufferedReader reader) {
        super(name, reader);
    }

    //pn added
    boolean firstTagSeen = false;

    @Override
    public String read() {
        StringBuilder content = new StringBuilder();

        try {
            boolean startTagSeen = false; //pn removed
            String line;
            while (super.getReader().ready() && (line = super.getReader().readLine()) != null) {
                if (firstTagSeen==false){
                    if (line.startsWith(startTag)) {
                        firstTagSeen=true;
                    }
                }
                else {
                    if (line.startsWith(startTag)) {
                        currentRecCount++;
                        return startTag + "\n" + content.toString();
                    } else {
                        content.append(line).append('\n');
                    }
                }
            }
        } catch (IOException e) {
            LOGGER.error("Error while reading file: {}:{}", super.getFile(), currentRecCount, e);
        }

        if (content.length()==0){
            return null;
        }
        else {
            return startTag + "\n" + content.toString();
        }
    }

    @Override
    public void skip(int skipCount) throws IOException {
        for (int i = 1; i < skipCount; i++) {
            super.next();
            currentRecCount++;
        }
    }

    @Override
    public int getCurrentRecCount() {
        return currentRecCount;
    }

}
bgfeldm commented 7 years ago

Verified and checked in your fix.

Thanks again,

Brian