function compSigClust(filename); % filename - name of file with the gene names for genes in this class. % Each row should contain ONE Orf name (the names aappear in the % first column of the expression file you have downloaded) cin = dlmread('cin.txt','\t'); catSize = dlmread('catSize.txt','\t'); n1 = textread('catNames.txt','%s','delimiter','\n'); tot1 = size(n1,1); pl = strmatch(' ',char(upper(n1))); s1 = 1:tot1; sels = setdiff(s1,pl); n1 = n1(sels,:); n2 = textread(filename,'%s','delimiter',' \n'); tot1 = size(n2,1); pl = strmatch(' ',char(upper(n2))); s1 = 1:tot1; sels = setdiff(s1,pl); n2 = n2(sels,:); tot2 = size(n2,1); n3 = textread('geneNames.txt','%s','delimiter',' \n'); tot3 = size(n3,1); pl = strmatch(' ',char(upper(n3))); s1 = 1:tot3; sels = setdiff(s1,pl); n3 = n3(sels,:); tot3 = size(n3,1); inC = [1:tot3]*0; for i=1:tot2, pl1 = strmatch(char(upper(n2(i,:))),char(upper(n3)),'exact'); if isempty(pl1) == 0 inC(pl1) = 1; end end totalCat = size(cin,1); totGenes = 6200; clustGenes = tot2; for i=1:totalCat, curCat = catSize(i); sizeInter = sum(inC .* cin(i,:)); pval(i) = 1-hygecdf(sizeInter,totGenes,clustGenes,curCat); end [Y I] = sort(pval); for j=1:3, sprintf('%s %1.0d',char(n1(I(j),:)),Y(j)) end