Commit 64838c1c by Eric Coissac

New version based on sort them merge algorithm

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@180 60f365c0-8329-0410-b2a4-ec073aeeaa1d
parent 6d8bb449
EXEC=ecoPrimer
PRIMER_SRC= ecoprimer.c
PRIMER_OBJ= $(patsubst %.c,%.o,$(PRIMER_SRC))
SRCS= $(PRIMER_SRC)
LIB= -lecoPCR -lecoprimer -lz -lm
LIBFILE= libecoPCR/libecoPCR.a \
libecoprimer/libecoprimer.a
include global.mk
all: $(EXEC)
########
#
# ecoPrimer compilation
#
########
# executable compilation and link
ecoPrimer: $(PRIMER_OBJ) $(LIBFILE)
$(CC) $(LDFLAGS) -O5 -m64 -fast -o $@ $< $(LIBPATH) $(LIB)
########
#
# library compilation
#
########
libecoPCR/libecoPCR.a:
$(MAKE) -C libecoPCR
libecoprimer/libecoprimer.a:
$(MAKE) -C libecoprimer
########
#
# project management
#
########
clean:
rm -f *.o
rm -f $(EXEC)
$(MAKE) -C libecoPCR clean
$(MAKE) -C libecoprimer clean
\ No newline at end of file
File added
This diff is collapsed. Click to expand it.
MACHINE=MAC_OS_X
LIBPATH= -Llibapat -LlibecoPCR -Llibecoprimer
MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $<
CC=gcc
CFLAGS= -W -Wall -O5 -m64 -fast -g
#CFLAGS= -W -Wall -O0 -m64 -g
#CFLAGS= -W -Wall -O5 -fast -g
default: all
%.o: %.c
$(CC) -D$(MACHINE) $(CFLAGS) -c -o $@ $<
%.P : %.c
$(MAKEDEPEND)
@sed 's/\($*\)\.o[ :]*/\1.o $@ : /g' < $*.d > $@; \
rm -f $*.d; [ -s $@ ] || rm -f $@
include $(SRCS:.c=.P)
\ No newline at end of file
SOURCES = ecodna.c \
ecoError.c \
ecoIOUtils.c \
ecoMalloc.c \
ecorank.c \
ecoseq.c \
ecotax.c \
ecofilter.c \
econame.c
SRCS=$(SOURCES)
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
LIBFILE= libecoPCR.a
RANLIB= ranlib
include ../global.mk
all: $(LIBFILE)
clean:
rm -rf $(OBJECTS) $(LIBFILE)
$(LIBFILE): $(OBJECTS)
ar -cr $@ $?
$(RANLIB) $@
ecoError.o ecoError.P : ecoError.c ecoPCR.h /usr/include/stdio.h \
/usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h
#include "ecoPCR.h"
#include <stdio.h>
#include <stdlib.h>
/*
* print the message given as argument and exit the program
* @param error error number
* @param message the text explaining what's going on
* @param filename the file source where the program failed
* @param linenumber the line where it has failed
* filename and linenumber are written at pre-processing
* time by a macro
*/
void ecoError(int32_t error,
const char* message,
const char * filename,
int linenumber)
{
fprintf(stderr,"Error %d in file %s line %d : %s\n",
error,
filename,
linenumber,
message);
abort();
}
ecoIOUtils.o ecoIOUtils.P : ecoIOUtils.c ecoPCR.h /usr/include/stdio.h \
/usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h
#include "ecoPCR.h"
#include <stdio.h>
#include <stdlib.h>
#define SWAPINT32(x) ((((x) << 24) & 0xFF000000) | (((x) << 8) & 0xFF0000) | \
(((x) >> 8) & 0xFF00) | (((x) >> 24) & 0xFF))
int32_t is_big_endian()
{
int32_t i=1;
return (int32_t)((char*)&i)[0];
}
int32_t swap_int32_t(int32_t i)
{
return SWAPINT32(i);
}
/**
* Read part of the file
* @param *f the database
* @param recordSize the size to be read
*
* @return buffer
*/
void *read_ecorecord(FILE *f,int32_t *recordSize)
{
static void *buffer =NULL;
int32_t buffersize=0;
int32_t read;
if (!recordSize)
ECOERROR(ECO_ASSERT_ERROR,
"recordSize cannot be NULL");
read = fread(recordSize,
1,
sizeof(int32_t),
f);
if (feof(f))
return NULL;
if (read != sizeof(int32_t))
ECOERROR(ECO_IO_ERROR,"Reading record size error");
if (is_big_endian())
*recordSize=swap_int32_t(*recordSize);
if (buffersize < *recordSize)
{
if (buffer)
buffer = ECOREALLOC(buffer,*recordSize,
"Increase size of record buffer");
else
buffer = ECOMALLOC(*recordSize,
"Allocate record buffer");
}
read = fread(buffer,
1,
*recordSize,
f);
if (read != *recordSize)
ECOERROR(ECO_IO_ERROR,"Reading record data error");
return buffer;
};
/**
* Open the database and check it's readable
* @param filename name of the database (.sdx, .rdx, .tbx)
* @param sequencecount buffer - pointer to variable storing the number of occurence
* @param abort_on_open_error boolean to define the behaviour in case of error
* while opening the database
* @return FILE type
**/
FILE *open_ecorecorddb(const char *filename,
int32_t *sequencecount,
int32_t abort_on_open_error)
{
FILE *f;
int32_t read;
f = fopen(filename,"rb");
if (!f)
{
if (abort_on_open_error)
ECOERROR(ECO_IO_ERROR,"Cannot open file");
else
{
*sequencecount=0;
return NULL;
}
}
read = fread(sequencecount,
1,
sizeof(int32_t),
f);
if (read != sizeof(int32_t))
ECOERROR(ECO_IO_ERROR,"Reading record size error");
if (is_big_endian())
*sequencecount=swap_int32_t(*sequencecount);
return f;
}
ecoMalloc.o ecoMalloc.P : ecoMalloc.c ecoPCR.h /usr/include/stdio.h \
/usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h
#include "ecoPCR.h"
#include <stdlib.h>
static int eco_log_malloc = 0;
static size_t eco_amount_malloc=0;
static size_t eco_chunk_malloc=0;
void eco_trace_memory_allocation()
{
eco_log_malloc=1;
}
void eco_untrace_memory_allocation()
{
eco_log_malloc=0;
}
void ecoMallocedMemory()
{
return eco_amount_malloc;
}
void *eco_malloc(int32_t chunksize,
const char *error_message,
const char *filename,
int32_t line)
{
void * chunk;
chunk = calloc(1,chunksize);
if (!chunk)
ecoError(ECO_MEM_ERROR,error_message,filename,line);
eco_chunk_malloc++;
if (eco_log_malloc)
fprintf(stderr,
"Memory segment located at %p of size %d is allocated (file : %s [%d])",
chunk,
chunksize,
filename,
line);
return chunk;
}
void *eco_realloc(void *chunk,
int32_t newsize,
const char *error_message,
const char *filename,
int32_t line)
{
void *newchunk;
newchunk = realloc(chunk,newsize);
if (!newchunk)
ecoError(ECO_MEM_ERROR,error_message,filename,line);
if (!chunk)
eco_chunk_malloc++;
if (eco_log_malloc)
fprintf(stderr,
"Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])",
chunk,
newchunk,
newsize,
filename,
line);
return newchunk;
}
void eco_free(void *chunk,
const char *error_message,
const char *filename,
int32_t line)
{
free(chunk);
if (eco_log_malloc)
fprintf(stderr,
"Memory segment %p is released => %s (file : %s [%d])",
chunk,
error_message,
filename,
line);
eco_chunk_malloc--;
}
#ifndef ECOPCR_H_
#define ECOPCR_H_
#include <stdio.h>
#include <inttypes.h>
/*****************************************************
*
* Data type declarations
*
*****************************************************/
/*
*
* Sequence types
*
*/
typedef struct {
int32_t taxid;
char AC[20];
int32_t DE_length;
int32_t SQ_length;
int32_t CSQ_length; /*what is this CSQ_length ? */
char data[1];
} ecoseqformat_t;
typedef struct {
int32_t taxid;
int32_t SQ_length;
int32_t isexample;
char *AC;
char *DE;
char *SQ;
int32_t ranktaxonid;/*TR: taxon id to which the sequence belongs*/
} ecoseq_t, *pecoseq_t;
/*
*
* Taxonomy taxon types
*
*/
typedef struct {
int32_t taxid;
int32_t rank;
int32_t parent;
int32_t namelength;
char name[1];
} ecotxformat_t;
typedef struct ecotxnode {
int32_t taxid;
int32_t rank;
struct ecotxnode *parent;
char *name;
} ecotx_t;
typedef struct {
int32_t count;
ecotx_t taxon[1];
} ecotxidx_t;
/*
*
* Taxonomy rank types
*
*/
typedef struct {
int32_t count;
char* label[1];
} ecorankidx_t;
/*
*
* Taxonomy name types
*
*/
typedef struct {
int32_t is_scientificname;
int32_t namelength;
int32_t classlength;
int32_t taxid;
char names[1];
} econameformat_t;
typedef struct {
char *name;
char *classname;
int32_t is_scientificname;
struct ecotxnode *taxon;
} econame_t;
typedef struct {
int32_t count;
econame_t names[1];
} econameidx_t;
typedef struct {
ecorankidx_t *ranks;
econameidx_t *names;
ecotxidx_t *taxons;
} ecotaxonomy_t;
/*****************************************************
*
* Function declarations
*
*****************************************************/
/*
*
* Low level system functions
*
*/
int32_t is_big_endian();
int32_t swap_int32_t(int32_t);
void *eco_malloc(int32_t chunksize,
const char *error_message,
const char *filename,
int32_t line);
void *eco_realloc(void *chunk,
int32_t chunksize,
const char *error_message,
const char *filename,
int32_t line);
void eco_free(void *chunk,
const char *error_message,
const char *filename,
int32_t line);
void eco_trace_memory_allocation();
void eco_untrace_memory_allocation();
#define ECOMALLOC(size,error_message) \
eco_malloc((size),(error_message),__FILE__,__LINE__)
#define ECOREALLOC(chunk,size,error_message) \
eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__)
#define ECOFREE(chunk,error_message) \
eco_free((chunk),(error_message),__FILE__,__LINE__)
/*
*
* Error managment
*
*/
void ecoError(int32_t,const char*,const char *,int);
#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__)
#define ECO_IO_ERROR (1)
#define ECO_MEM_ERROR (2)
#define ECO_ASSERT_ERROR (3)
#define ECO_NOTFOUND_ERROR (4)
/*
*
* Low level Disk access functions
*
*/
FILE *open_ecorecorddb(const char *filename,
int32_t *sequencecount,
int32_t abort_on_open_error);
void *read_ecorecord(FILE *,int32_t *recordSize);
/*
* Read function in internal binary format
*/
FILE *open_ecoseqdb(const char *filename,
int32_t *sequencecount);
ecoseq_t *readnext_ecoseq(FILE *);
ecorankidx_t *read_rankidx(const char *filename);
econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy);
/**
* Read taxonomy data as formated by the ecoPCRFormat.py script.
*
* This function is normaly uses internaly by the read_taxonomy
* function and should not be called directly.
*
* @arg filename path to the *.tdx file of the reformated db
*
* @return pointer to a taxonomy index structure
*/
ecotxidx_t *read_taxonomyidx(const char *filename);
ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName);
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, int32_t taxid);
ecotx_t *eco_findtaxonatrank(ecotx_t *taxon, int32_t rankidx);
int eco_isundertaxon(ecotx_t *taxon, int other_taxid);
ecoseq_t *ecoseq_iterator(const char *prefix);
ecoseq_t *new_ecoseq();
int32_t delete_ecoseq(ecoseq_t *);
ecoseq_t *new_ecoseq_with_data( char *AC,
char *DE,
char *SQ,
int32_t taxid
);
int32_t delete_taxon(ecotx_t *taxon);
int32_t delete_taxonomy(ecotxidx_t *index);
int32_t rank_index(const char* label,ecorankidx_t* ranks);
//int32_t delete_apatseq(SeqPtr pseq);
//PatternPtr buildPattern(const char *pat, int32_t error_max);
//PatternPtr complementPattern(PatternPtr pat);
//
//SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular);
char *ecoComplementPattern(char *nucAcSeq);
char *ecoComplementSequence(char *nucAcSeq);
char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end);
ecotx_t *eco_getspecies(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getgenus(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid);
int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid);
#endif /*ECOPCR_H_*/
#include "../libapat/libstki.h"
#include "../libapat/apat.h"
#include "ecoPCR.h"
#include <string.h>
static void EncodeSequence(SeqPtr seq);
static void UpperSequence(char *seq);
/* -------------------------------------------- */
/* uppercase sequence */
/* -------------------------------------------- */
#define IS_LOWER(c) (((c) >= 'a') && ((c) <= 'z'))
#define TO_UPPER(c) ((c) - 'a' + 'A')
void UpperSequence(char *seq)
{
char *cseq;
for (cseq = seq ; *cseq ; cseq++)
if (IS_LOWER(*cseq))
*cseq = TO_UPPER(*cseq);
}
#undef IS_LOWER
#undef TO_UPPER
/* -------------------------------------------- */
/* encode sequence */
/* IS_UPPER is slightly faster than isupper */
/* -------------------------------------------- */
#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z'))
void EncodeSequence(SeqPtr seq)
{
int i;