package org.fda.data;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedWriter;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.fda.data.Enums.AlignmentTool;
import org.fda.data.Enums.FileMode;
import org.fda.inputdataparser.FastaRecord;
import org.fda.inputdataparser.InputReader;
import org.fda.inputdataparser.ReadRecord;
import org.fda.intervaltree.Interval;
import org.fda.intervaltree.IntervalSearchTree;
import org.fda.commands.ComputeModelCommand;

/**
 *
 * @author Gokhan.Yavas
 */
public class ReferenceSet implements Serializable{    
    private static final long serialVersionUID = 8234343511122882189L;
    private final File infile, outdir;
    private final Map<String, Reference> chrs=new TreeMap<String, Reference>();       
    private final List<ReferenceFile> referenceFiles=new ArrayList<>();
    private int maxContigSize=0;
    private int minContigSize=Integer.MAX_VALUE;
    private final File refFolder;
    private final Set<String> hashCodeSet;
    private long genomesize=0;
    private long gapsize=0;

    public long getGapsize() {
        return gapsize;
    }

    public long getGenomesize() {
        return genomesize;
    }

    public Set<String> getHashCodeSet() {
        return hashCodeSet;
    }

    public File getRefFolder() {
        return refFolder;
    }
    public File getInfile() {
        return infile;
    }

    public int getMaxContigSize() {
        return maxContigSize;
    }

    public int getMinContigSize() {
        return minContigSize;
    }
    
    public List<ReferenceFile> getReferenceFiles(){
        return referenceFiles;
    }
    public List<Reference> getRefs(){
        List<Reference> ret = new ArrayList(chrs.values());
        Collections.sort(ret);
        return ret;
    }
    public Reference findReference(String name){
        return chrs.get(name);
    }
    public int getNumberOfReferences(){
        return this.chrs.entrySet().size();
    }    
    private ReferenceFile findBestFitGreedy(ReadRecord r){
        long bestFit, fit;
        bestFit =fit = 0L;
        ReferenceFile toRet, tmp;
        toRet=tmp=null;
        boolean first = true;
        //System.out.println(r.getName()+" "+r.getReadRecordLength());
        for(int i=0; i<referenceFiles.size(); i++){            
            tmp = referenceFiles.get(i);                        
            fit = Utilities.maxReferenceFileSize-(tmp.getSize()+(long)r.getReadRecordLength());
            //System.out.println(tmp.getPrefix()+ " "+tmp.getSize()+" "+fit);
            if(fit>0 &&  first){
                toRet = tmp;
                bestFit = fit;
                first = false;
            }
            else if(fit > 0 && fit < bestFit){
                toRet = tmp;
                bestFit = fit;                
            }
        }
        return toRet;
    }
    private void setMaxMinContigLength(int len){
        if(len< minContigSize)
            minContigSize = len;
        if(len> maxContigSize)
            maxContigSize = len;
        
    }
    private boolean processReferenceFile(){
        //NumberFormat nr = Utilities.numberFormatter;
        
        //Reader fr = new FastaReader();
        InputReader fr = new InputReader();
        ReadRecord f_rec;
        ReferenceFile tmpRefFile;
        BufferedWriter bwchrgaps=null;
        try{
            if(!(Utilities.current_cmd instanceof ComputeModelCommand))
                bwchrgaps= new BufferedWriter(new FileWriter(new File(outdir.getAbsolutePath()+File.separator+Utilities.chrGapsFileName)));
            Reference c;
            fr.openFile(infile);
            int i=0;
            while((f_rec=fr.readNextRecord())!=null){
                // time to write this read into a file
                setMaxMinContigLength(f_rec.getReadRecordLength());
                //tmpRefFile =findBestFitGreedy(f_rec);
                
                if(Utilities.alignmentTool == AlignmentTool.nucmer){
                    tmpRefFile=null;
                    if(tmpRefFile==null){
                        // Create a new reference file
                        tmpRefFile = new ReferenceFile(refFolder.getAbsolutePath()+File.separator+"refpart"+Utilities.giveName(i)+".fa",FileMode.WRITE);
                        referenceFiles.add(tmpRefFile);                    
                        i++;
                    }
                    tmpRefFile.writeTo(f_rec);                
                }
                // Write the reference stats into the chrs.stat file
                //c = new Reference(f_rec.getName(), f_rec.getReadRecordLength(), tmpRefFile, f_rec.getGCcount(), f_rec.getGCpercent(), ((FastaRecord)f_rec).getnIntervalsTree(),f_rec.getNcount(),((FastaRecord)f_rec).getHashValue(), ((FastaRecord)f_rec).getSeqHashValue());
                c = new Reference(f_rec.getName(), f_rec.getReadRecordLength(), f_rec.getGCcount(), f_rec.getGCpercent(), ((FastaRecord)f_rec).getnIntervalsTree(),f_rec.getNcount(),((FastaRecord)f_rec).getHashValue(), ((FastaRecord)f_rec).getSeqHashValue());
                //tmpRefFile.addReference(c);
                chrs.put(c.getRefID(), c);
                genomesize+=c.getLength();
                gapsize+=c.getGaplength();
                this.hashCodeSet.add(Utilities.byteToHex(c.getHashValue()));
                IntervalSearchTree ist = c.getGapIntervalTree();
                if(bwchrgaps!=null){
                    bwchrgaps.append(c.getRefID()+Utilities.ls);
                    for(Interval inte : ist.inOrder()){
                        bwchrgaps.append("\t"+inte+Utilities.ls);                                    
                    }
                }                
            }
            fr.closeReader();
            if(Utilities.alignmentTool == AlignmentTool.nucmer){
                for(ReferenceFile rf : referenceFiles)
                    rf.closeFastaWriter();
            }
            else{                
                referenceFiles.add(new ReferenceFile(infile, FileMode.READONLY));
            }
            // record the referenceset object in a file
            File otu = new File(outdir.getAbsolutePath()+File.separator+Utilities.chrStatFileName);
            ObjectOutputStream out = new ObjectOutputStream(new BufferedOutputStream(new FileOutputStream(otu)));
            out.writeObject(this);
            out.close();
            if(bwchrgaps!=null)
                bwchrgaps.close();
            return true;
        }
        catch(IOException e){e.printStackTrace(); return false;} 
        catch (Exception ex) {
            ex.printStackTrace();
            return false;
        }
    }
    
    // Constructor to generate the chr stats from an actual set of .fa or .fasta files
    public ReferenceSet(File infile, File outdir) throws ExceptionInInitializerError{
        this.outdir = outdir;
        this.infile = infile;
        this.hashCodeSet = new HashSet<>();
        refFolder = new File(outdir.getAbsolutePath()+File.separator+"REF");
        refFolder.mkdirs();                        
        // The references are kept in a single file

        // we have to divide this file into multiple parts based on its approximate size
        boolean out = processReferenceFile();
        if(!out)
            throw new ExceptionInInitializerError();

    }

    public static ReferenceSet load(File indir){
        ReferenceSet rset=null;
        try(ObjectInputStream instream = new ObjectInputStream(new BufferedInputStream(new FileInputStream(indir.getAbsolutePath()+File.separator+Utilities.chrStatFileName)))){            
            rset = (ReferenceSet)instream.readObject();
        }
        catch(ClassNotFoundException e){e.printStackTrace();}
        catch (IOException ex) {
            if(!(ex instanceof EOFException))
                ex.printStackTrace();
        }
        return rset;
        
    }
    public boolean equals(ReferenceSet r){
        if(this.hashCodeSet.size()!=r.hashCodeSet.size())
            return false;
        else{            
            for(String s : this.hashCodeSet){
                if(!r.getHashCodeSet().contains(s))
                    return false;                
            }
            return true;
        }
    }
    public boolean contains(ReferenceSet r){        
        return(this.hashCodeSet.containsAll(r.getHashCodeSet()));        
    }
}
