package opennlp.tools.formats.masc;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import javax.xml.parsers.SAXParser;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.XmlUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:opennlp/tools/formats/masc/MascDocumentStream.class */
public class MascDocumentStream implements ObjectStream<MascDocument> {
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) MascDocumentStream.class);
    private List<MascDocument> documents;
    private Iterator<MascDocument> documentIterator;
    private SAXParser saxParser;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:opennlp/tools/formats/masc/MascDocumentStream$HeaderHandler.class */
    public static class HeaderHandler extends DefaultHandler {
        private HashMap<String, String> annotationFiles = null;
        private String file = null;
        private String fType = null;

        private HeaderHandler() {
        }

        protected HashMap<String, String> getPathList() {
            return this.annotationFiles;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            if (str3.equalsIgnoreCase("annotation") || str3.equalsIgnoreCase("primaryData")) {
                this.file = attributes.getValue("loc");
                this.fType = attributes.getValue("f.id");
                if (this.annotationFiles == null) {
                    this.annotationFiles = new HashMap<>();
                }
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if (str3.equalsIgnoreCase("annotation") || str3.equalsIgnoreCase("primaryData")) {
                this.annotationFiles.put(this.fType, this.file);
            }
        }
    }

    public MascDocumentStream(File file) throws IOException {
        this.documents = new LinkedList();
        new MascDocumentStream(file, true, file2 -> {
            return file2.getName().contains("");
        });
    }

    public MascDocumentStream(File file, boolean z, FileFilter fileFilter) throws IOException {
        this.documents = new LinkedList();
        this.saxParser = XmlUtil.createSaxParser();
        if (!file.isDirectory()) {
            throw new IOException("Input corpus directory must be a directory according to File.isDirectory()!");
        }
        int i = 0;
        Stack stack = new Stack();
        stack.add(file);
        while (!stack.isEmpty()) {
            for (File file2 : ((File) stack.pop()).listFiles(fileFilter)) {
                if (file2.isFile()) {
                    String absolutePath = file2.getAbsolutePath();
                    if (absolutePath.endsWith(".hdr")) {
                        HashMap<String, File> checkAnnotations = checkAnnotations(absolutePath);
                        try {
                            this.documents.add(MascDocument.parseDocument(absolutePath, new BufferedInputStream(new FileInputStream(checkAnnotations.get("f.text"))), checkAnnotations.containsKey("f.seg") ? new BufferedInputStream(new FileInputStream(checkAnnotations.get("f.seg"))) : null, checkAnnotations.containsKey("f.penn") ? new BufferedInputStream(new FileInputStream(checkAnnotations.get("f.penn"))) : null, checkAnnotations.containsKey("f.s") ? new BufferedInputStream(new FileInputStream(checkAnnotations.get("f.s"))) : null, checkAnnotations.containsKey("f.ne") ? new BufferedInputStream(new FileInputStream(checkAnnotations.get("f.ne"))) : null));
                        } catch (IOException e) {
                            logger.error("Failed to parse the file: {}", absolutePath, e);
                            i++;
                        }
                    }
                } else if (z && file2.isDirectory()) {
                    stack.push(file2);
                }
            }
        }
        logger.info("Documents loaded: {}", Integer.valueOf(this.documents.size()));
        if (i > 0) {
            logger.info("Failed loading {} documents.", Integer.valueOf(i));
        }
        reset();
    }

    private HashMap<String, File> checkAnnotations(String str) throws IOException {
        HeaderHandler headerHandler = new HeaderHandler();
        HashMap<String, File> hashMap = new HashMap<>();
        File file = new File(str);
        try {
            this.saxParser.parse(file, headerHandler);
            HashMap<String, String> pathList = headerHandler.getPathList();
            String absolutePath = file.getParentFile().getAbsolutePath();
            for (Map.Entry<String, String> entry : pathList.entrySet()) {
                File file2 = new File(absolutePath, entry.getValue());
                if (!file2.isFile() || !file2.exists()) {
                    throw new IOException("Corpus integrity violated. Annotation file " + file2.getAbsolutePath() + " is missing.");
                }
                hashMap.put(entry.getKey(), file2);
            }
            return hashMap;
        } catch (SAXException e) {
            throw new IOException("Invalid corpus format. Could not parse the header: " + str);
        }
    }

    @Override // opennlp.tools.util.ObjectStream
    public void reset() {
        Iterator<MascDocument> it = this.documents.iterator();
        while (it.hasNext()) {
            it.next().reset();
        }
        this.documentIterator = this.documents.iterator();
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // opennlp.tools.util.ObjectStream
    public MascDocument read() throws IOException {
        MascDocument mascDocument = null;
        if (this.documentIterator.hasNext()) {
            mascDocument = this.documentIterator.next();
        }
        return mascDocument;
    }

    @Override // opennlp.tools.util.ObjectStream, java.lang.AutoCloseable
    public void close() {
        this.documents = null;
        this.documentIterator = null;
    }
}
