Home > RAVEN > getBlast.m

getBlast

PURPOSE ^

getBlast

SYNOPSIS ^

function blastStructure=getBlast(organismID,fastaFile,modelIDs,refFastaFiles)

DESCRIPTION ^

 getBlast
   Performs a bidirectional BLASTp between the organism of interest and a
   set of template organisms.

   organismID      the id of the organism of interest. This should also
                   match with the id supplied to getModelFromHomology
   fastaFile       a FASTA file with the protein sequences for the
                   organism of interest
   modelIDs        a cell array of model id:s. These must match the
                   "model.id" fields in the "models" structure if the output
                   is to be used with getModelFromHomology
   refFastaFiles   a cell array with the paths to the corresponding FASTA
                   files
   
   blastStructure  structure containing the bidirectional homology
                   measurements which are used by getModelFromHomology

   NOTE: This function calls BLASTp to perform a bidirectional homology
   test between the organism of interest and a set of other organisms
   using standard settings. If you would like to use other homology
   measurements, please see getBlastFromExcel.

   Usage: blastStructure=getBlast(organismID,fastaFile,modelIDs,...
           refFastaFiles)

   Rasmus Agren, 2013-07-29

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function blastStructure=getBlast(organismID,fastaFile,modelIDs,refFastaFiles)
0002 % getBlast
0003 %   Performs a bidirectional BLASTp between the organism of interest and a
0004 %   set of template organisms.
0005 %
0006 %   organismID      the id of the organism of interest. This should also
0007 %                   match with the id supplied to getModelFromHomology
0008 %   fastaFile       a FASTA file with the protein sequences for the
0009 %                   organism of interest
0010 %   modelIDs        a cell array of model id:s. These must match the
0011 %                   "model.id" fields in the "models" structure if the output
0012 %                   is to be used with getModelFromHomology
0013 %   refFastaFiles   a cell array with the paths to the corresponding FASTA
0014 %                   files
0015 %
0016 %   blastStructure  structure containing the bidirectional homology
0017 %                   measurements which are used by getModelFromHomology
0018 %
0019 %   NOTE: This function calls BLASTp to perform a bidirectional homology
0020 %   test between the organism of interest and a set of other organisms
0021 %   using standard settings. If you would like to use other homology
0022 %   measurements, please see getBlastFromExcel.
0023 %
0024 %   Usage: blastStructure=getBlast(organismID,fastaFile,modelIDs,...
0025 %           refFastaFiles)
0026 %
0027 %   Rasmus Agren, 2013-07-29
0028 %
0029 
0030 %Everything should be cell arrays
0031 organismID=cellstr(organismID);
0032 fastaFile=cellstr(fastaFile);
0033 modelIDs=cellstr(modelIDs);
0034 refFastaFiles=cellstr(refFastaFiles);
0035 
0036 blastStructure=[];
0037 
0038 %Get the directory for RAVEN Toolbox. This may not be the easiest or best
0039 %way to do this
0040 [ST I]=dbstack('-completenames');
0041 ravenPath=fileparts(ST(I).file);
0042 
0043 %Construct databases and output file
0044 tmpDB=tempname;
0045 outFile=tempname;
0046 
0047 %Create a database for the new organism and blast each of the
0048 %refFastaFiles against it
0049 [status output]=system([fullfile(ravenPath,'software','blast-2.2.26+','makeblastdb') ' -in "' fastaFile{1} '" -out "' tmpDB '" -dbtype "prot"']);
0050 
0051 for i=1:numel(refFastaFiles)
0052     fprintf(['BLASTing "' modelIDs{i} '" against "' organismID{1} '"..\n']);
0053     [status output]=system([fullfile(ravenPath,'software','blast-2.2.26+','blastp') ' -query "' refFastaFiles{i} '" -out "' outFile '_' num2str(i) '" -db "' tmpDB '" -evalue 10e-5 -outfmt "10 qseqid sseqid evalue pident length"']);
0054 end
0055 delete([tmpDB '*']);
0056 
0057 %Then create a database for each of the reference organisms and blast
0058 %the new organism against them
0059 for i=1:numel(refFastaFiles)
0060     fprintf(['BLASTing "' organismID{1} '" against "' modelIDs{i} '"..\n']);
0061     [status output]=system([fullfile(ravenPath,'software','blast-2.2.26+','makeblastdb') ' -in "' refFastaFiles{i} '" -out "' tmpDB '" -dbtype "prot"']);
0062     [status output]=system([fullfile(ravenPath,'software','blast-2.2.26+','blastp') ' -query "' fastaFile{1} '" -out "' outFile '_r' num2str(i) '" -db "' tmpDB '" -evalue 10e-5 -outfmt "10 qseqid sseqid evalue pident length"']);
0063     delete([tmpDB '*']);
0064 end
0065     
0066 %Done with the BLAST, do the parsing of the text files
0067 for i=1:numel(refFastaFiles)*2
0068     tempStruct=[];
0069     if i<=numel(refFastaFiles)
0070         tempStruct.fromId=modelIDs{i};
0071         tempStruct.toId=organismID{1};
0072         A=importdata([outFile '_' num2str(i)]);
0073     else
0074         tempStruct.fromId=organismID{1};
0075         tempStruct.toId=modelIDs{i-numel(refFastaFiles)};
0076         A=importdata([outFile '_r' num2str(i-numel(refFastaFiles))]);
0077     end
0078     tempStruct.fromGenes=A.textdata(:,1);
0079     tempStruct.toGenes=A.textdata(:,2);
0080     tempStruct.evalue=A.data(:,1);
0081     tempStruct.identity=A.data(:,2);
0082     tempStruct.aligLen=A.data(:,3);
0083     blastStructure=[blastStructure tempStruct];
0084 end
0085 
0086 %Remove the old tempfiles
0087 delete([outFile '*']);
0088 end

Generated on Mon 06-Jan-2014 14:58:12 by m2html © 2005