package org.fda.contiggenerator;


import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.fda.data.Enums;
import org.fda.data.Enums.Distribution;
import org.fda.data.Enums.Orientation;
import org.fda.data.Utilities;
import org.fda.exceptions.IllegalIntervalException;
import org.fda.inputdataparser.FastaReader;
import org.fda.inputdataparser.FastaRecord;
import org.fda.inputdataparser.FastaWriter;
import org.fda.intervaltree.Interval;

/**
 *
 * @author Gokhan.Yavas
 */
public class ContigGenerator4 extends ContigGeneratorWDistribution implements ContigGeneratorInterface{
    private final int maxiteration =1000;
    private final boolean isRandom;
    public ContigGenerator4(File infile, File outfile, double mean, double sd, Distribution dist, boolean isRandom){
        super(infile, outfile, mean, sd, dist);        
        this.isRandom =isRandom;
    }
    public ContigGenerator4(File infile, File outfile, double mean, double sd, Distribution dist, boolean isRandom, boolean createreport){
        super(infile, outfile, mean, sd, dist, createreport);        
        this.isRandom =isRandom;
    }    
    public ContigGenerator4(File infile, File outfile, double mean, double sd, Distribution dist, long seed, boolean isRandom){
        super(infile, outfile, mean, sd, dist, seed);        
        this.isRandom =isRandom;
    }
    public ContigGenerator4(File infile, File outfile, double mean, double sd, Distribution dist, long seed, boolean isRandom, boolean createreport){
        super(infile, outfile, mean, sd, dist, seed, createreport);        
        this.isRandom =isRandom;
    }
    public ContigGenerator4(File infile, File outfile, double mean, double sd, Distribution dist, boolean setseed, boolean isRandom, boolean createreport){
        super(infile, outfile, mean, sd, dist, setseed, createreport);        
        this.isRandom =isRandom;
    }
    
    protected List<String> createContigsOnReference(FastaRecord r, FastaWriter frw, double cov_perc) throws IllegalIntervalException{
        boolean full=false;
        if(cov_perc==1)
            full=true;
        int st=1;
        Interval inte;
        Interval res=null;
        List<Interval> currcontigcoords = new ArrayList<>();
        ContigDetails tmp;
        int contnumber=0;
        List<String> contigids = new ArrayList();
        long curr_covered_size=0;
        
        while(true){
            
            if(st>r.getReadRecordLength())
                break;
            
            inte = new Interval(st, st, false);
            res = r.getnIntervalsTree().search(inte);
            if(res!=null){
                // then this is inside a N interval
                if(r.getReadRecordLength() == res.gethigh()){
                    // there is no record to be generated since we are already in the last N interval which goes till the end of the reference
                    break;
                }
                else{
                    st = res.gethigh()+1;                                        
                    continue;
                }
            }
            else{
                // get a random length                
                int length;
                if(!full)
                    length= getRandomLength(r.getReadRecordLength(), cov_perc, curr_covered_size);
                else
                    length= getRandomLength(r.getReadRecordLength());
                int end = st + length -1; 
                int cnt =1;
                while(end > r.getReadRecordLength()){
                    if(cnt> this.maxiteration){
                        end = r.getReadRecordLength();
                        break;
                    }
                    if(!full)
                        length = getRandomLength(r.getReadRecordLength(), cov_perc, curr_covered_size);
                    else
                        length = getRandomLength(r.getReadRecordLength());
                    end = st + length -1; 
                    cnt++;                    
                }
                
                // check the overlap with gaps (Ns)
                inte = new Interval(st, end, false);
                List<Interval> intervals = r.getnIntervalsTree().searchAll(inte);
                Collections.sort(intervals);
                // now calculate the total N ratio
                int ntot=0;
                for(Interval it : intervals){                        
                    ntot += inte.overlap(it);                        
                }
                double nrat = (double)ntot/(double)inte.getlength();                
                
                if(nrat <= Utilities.nratio){
                    // good news
                    currcontigcoords.add(inte);
                    curr_covered_size+=inte.getlength();
                    st = end+1; 
                    //System.out.println("Original: "+inte.getlow()+"\t"+inte.gethigh()+"\t"+nrat);
                }
                else{

                    boolean found=false;

                    // find the end position which will satisfy the nratio requirement
                    if(r.getRead().charAt(end-1)=='N'){

                        ntot = ntot+ intervals.get(intervals.size()-1).gethigh()-end;
                    }
                                        
                    Interval curr;
                    for(int k=intervals.size()-1; k>=0; k--){
                        end = intervals.get(k).gethigh();
                        curr = intervals.get(k);
                        
                        int displacement = calculateNewEndPos(end-st+1, ntot);
                        int pos = end - displacement;
                        if(pos >= (curr.getlow()-1) && displacement>0){
                            // we are good my friend!
                            inte =new Interval(st, pos, false);
                            currcontigcoords.add(inte);
                            curr_covered_size += inte.getlength();
                            st = intervals.get(k).gethigh()+1;                            
                            found=true;                            
                            break;
                        }
                        ntot -= intervals.get(k).getlength();
                    }
                    if(!found){
                        end = intervals.get(0).getlow()-1;
                        inte = new Interval(st, end, false);
                        currcontigcoords.add(inte);
                        curr_covered_size += inte.getlength();
                        st = intervals.get(0).gethigh()+1;
                    }
                }
            }
            if(cov_perc<1){
                double curr_rat = (double)curr_covered_size/r.getReadRecordLength();
                //if(curr_rat <= cov_perc*1.01 && curr_rat >= cov_perc*0.99)
                if(curr_rat <= cov_perc*(1+Utilities.simulation_coverage_ratio_threshold) && curr_rat >= cov_perc*(1-Utilities.simulation_coverage_ratio_threshold))
                    break;
            }
        }
        
        // Do a pass over currcontigcoords and try to merge the small contigs         
        int currpos=0;
        Interval newint=null;
        while(true){
            if(currcontigcoords.get(currpos).getlength()<Utilities.simulation_mincontiglength){
                // check if you can merge this with previous interval                
                if(currpos==0)
                    currpos++;
                else{
                    newint=merge( currcontigcoords.get(currpos-1),currcontigcoords.get(currpos));                
                    if(newint!=null){
                        // merge
                        currcontigcoords.add(currpos+1, newint);
                        currcontigcoords.remove(currpos-1);
                        currcontigcoords.remove(currpos-1);
                    }
                    else{
                        currpos++;
                    }
                }
            }
            else{
                currpos++;
            }
            
            if(currpos>=currcontigcoords.size())
                break;            
        }
                
        FastaRecord frec;
        String fin;
        Orientation or;
        for(Interval i : currcontigcoords){
            totcontnumber++;
            String s = r.getRead().substring(i.getlow()-1, i.gethigh());
            //or = Math.random()<0.5?Enums.Orientation.F:Enums.Orientation.R;
            or = rand.nextDouble()<0.5?Enums.Orientation.F:Enums.Orientation.R;
            if(or==Enums.Orientation.R){
                fin = Utilities.reverseComplement(s);            
            }
            else{
                fin = s;
            }
            //frec = new FastaRecord(">contig"+(++contnumber)+Utilities.primaryChromosomeSep+r.getName(), fin);    
            frec = new FastaRecord(">contig"+totcontnumber, fin);
            contigids.add(frec.getName());
            totalN+=frec.getNcount();
            //String name = "contig"+totcontnumber+"\t"+r.getName()+":"+i.getlow()+"-"+i.gethigh();
            tmp =new ContigDetails(frec.getName(), r.getName(), i, or, frec.getNcount(), frec.getNratio());
            finalcontigcoords.add(tmp);
            totsize+=frec.getReadRecordLength();
            
            frw.write(frec);
            //lengths.add(i.getlength());
            add2Lengths(tmp);
        }
        return contigids;
                        
    }
    private Interval merge(Interval a, Interval b){
        double nonnlen = a.getlength()+b.getlength();
        double totlen = b.gethigh() - a.getlow() +1;
        double nrat = (totlen-nonnlen) / totlen;
        Interval ret = null;
        if(nrat<Utilities.nratio)
            try {
                ret = new Interval(a.getlow(), b.gethigh(), false);
        } catch (IllegalIntervalException ex) {
            ex.printStackTrace();
        }
        return ret;
                    
            
    }
    private int calculateNewEndPos(int intervallength, int nlength){
        double d = (Utilities.nratio*intervallength - nlength) / (Utilities.nratio -1);
        return (int)d;
    }
    private void createRandomContigs(FastaRecord r, FastaWriter frw) throws IllegalIntervalException{
        
        // first start to find the target number of base pairs 
        long totTargetBp = (long)(Utilities.depthCoverage*r.getReadRecordLength());

        int contlen;
        int st;
        Interval inte;

        int totalIterationCnt=0;
        int seqlen = r.getReadRecordLength() ;
        Orientation or;
        long totgeneratedbp=0;
        int totgeneratedcontignumber=0;
        ContigDetails tmp;
        int contnumber=0;
        OUTER: while(true){                
            if(totgeneratedbp>=totTargetBp)
                break;                
            if(totalIterationCnt > maxiteration){
                System.out.println("Error: this many contigs couldn't be generated with the given parameters from this reference chromosome"+r.getName()+" after "+totalIterationCnt+" iterations");
                if(totgeneratedcontignumber ==0)
                    System.out.println("No contigs could be generated "+r.getName());
                else
                    System.out.println(totgeneratedcontignumber+" contigs were generated on "+r.getName());                                                    
                break;
            }
            int iteration=0;
            while(true){
                contlen = getRandomLength(r.getReadRecordLength());
                if(contlen + totgeneratedbp <= (1.05*totTargetBp) || iteration >=100)
                    break;
                iteration++;
            }

            
            int end;                
            while(true){
                if(totalIterationCnt > maxiteration){
                    System.out.println("Error: this many contigs couldn't be generated with the given parameters from this reference chromosome"+r.getName()+" after "+totalIterationCnt+" iterations");
                    if(totgeneratedcontignumber ==0)
                        System.out.println("No contigs could be generated on "+r.getName());
                    else
                        System.out.println(totgeneratedcontignumber+" contigs were generated on "+r.getName());                                                    
                    break OUTER;
                }

                st = rand.nextInt(seqlen);
                end = st+contlen;
                if(end>seqlen){
                    totalIterationCnt++;
                    continue;                        
                }                    
                try {
                    inte = new Interval(st+1, end, false);
                    List<Interval> intervals = r.getnIntervalsTree().searchAll(inte);
                    Collections.sort(intervals);
                    // now calculate the total N ratio
                    int ntot=0;
                    for(Interval it : intervals){                        
                        ntot += inte.overlap(it);                        
                    }
                    double nrat = (double)ntot/(double)inte.getlength();                
                    if(nrat <= Utilities.nratio){
                        // we checked everything and this interval is good to create a contig
                        totgeneratedcontignumber++;                        
                        this.totcontnumber++;
                        FastaRecord frec;
                        String fin;
                        
                        String s = r.getRead().substring(inte.getlow()-1, inte.gethigh());
                        or = Math.random()<0.5?Enums.Orientation.F:Enums.Orientation.R;
                        if(or==Orientation.R){
                            fin = Utilities.reverseComplement(s);            
                        }
                        else{
                            fin = s;
                        }
                        frec = new FastaRecord(">contig"+totcontnumber, fin);    
                        //frec = new FastaRecord(">contig"+(++contnumber)+Utilities.primaryChromosomeSep+r.getName(), fin);    
                        totalN+=frec.getNcount();
                        tmp = new ContigDetails(frec.getName(), r.getName(), inte, or, frec.getNcount(), frec.getNratio());
                        finalcontigcoords.add(tmp);
                        
                        totsize+=frec.getReadRecordLength();
                        totgeneratedbp+=frec.getReadRecordLength();
                        frw.write(frec);
                        add2Lengths(tmp);
                        
                        
                        break;
                    }
                    else{
                        totalIterationCnt++;
                        continue;                        
                    }
                } catch (IllegalIntervalException ex) {
                    ex.printStackTrace();
                }                
            }
        }
        
        
        
    }
    @Override
    public void generateContigs(){
        FastaReader fr = new FastaReader();
        FastaWriter frw = new FastaWriter(outfile);  
        int w = (int)Utilities.depthCoverage;
        double f = Utilities.depthCoverage - w; 
        
        try{
            fr.openFile(infile);
            FastaRecord r;
                                                
            while((r=((FastaRecord)fr.readNextRecord()))!=null){
//                if(r.getReadRecordLength()<Utilities.simulation_mincontiglength){
//                    System.out.println("This reference chromosome is smaller than "+Utilities.simulation_mincontiglength+" base pairs, which is the minimum required length");
//                    System.out.println("Hence no contigs would be created using this reference chromosome");
//                    continue;
//                }
                // calculate the total gaps in reference r
                totalNReference+=r.getNcount();
                
                totrefnumber++;
                totrefsize+=r.getReadRecordLength();
                
                if(isRandom){
                    createRandomContigs(r, frw);
                }
                else{                        
                    
                    // now we read the reference sequence and create contigs to cover it "coverage" times                                    
                    for(int i=0; i<w;i++){
                        if(setseed)
                            rand.setSeed((long)r.getReadRecordLength()+i);
                        createContigsOnReference(r, frw, 1);
                    }
                    if(f!=0){
                        if(setseed)
                            rand.setSeed((long)r.getReadRecordLength()-1);
                        createContigsOnReference(r, frw, f);
                    }
                }
            }
            fr.closeReader();
        }
        catch(IOException io){io.printStackTrace();} 
        catch (IllegalIntervalException ex) {ex.printStackTrace();}
        finally{
            frw.closeWriter();
        }
        report();
        
    }
    
    public void setCoverage(double d){
        Utilities.depthCoverage=d;
    }
    public static void main(String args[]){
        Utilities.nratio=0.5;
        ContigGenerator4 g = new ContigGenerator4(new File("hg38.fa"), new File("out.fa"), 10000000, 10000000, Distribution.NORMAL, false, false, true);
                //new File("contigs.fa"), 1, 6500000, Distribution.GAMMA);
        g.setCoverage(0.4);
        g.generateContigs();          
    }
}
