File: ef230-2023-08/www/examples/dictionary_read.m Download
%! Example of reading a ~500,000 word dictionary into Matlab
%! This example loads all words into a cell array and keeps a track of 
%! the count of the lengths and displays all words over 20 characters long
% University of Tennessee : EF 230 Fall, 2009 : Will Schleter
clear all;

% get the file, txt will be a huge text array
% line feeds (ascii 10) delimit each word
h=waitbar(0,'Getting file from web site','name','Dictionary Load');
txt = urlread('');

waitbar(0,h,'Parsing text');
jj=1; % index for start of current word 
lf = find(txt==10); % an array with the index of all linefeeds
ss= length(lf); % the number of words
list=cell(ss,1); % pre-allocate storage for the word list
cnt=zeros(50,1); % init counter storage
bwc=0; % big word count
for (ii=1:ss)
    kk = lf(ii); % index of end of ii^th word
    list{ii}=char(txt(jj:kk-1)); % get the word
    lw=kk-jj; % length of word
    cnt(lw)=cnt(lw)+1; % update count
    if (lw>20) % display big words in command window
        fprintf('Big word: %s\n',list{ii});
    % update the progress bar every 500 words
    if mod(ii,500)==0, waitbar(ii/ss,h,sprintf('%u words read, %u big words found',ii,bwc)); end;
clear txt lf;

% display the word length counts
for i=1:length(cnt)
    fprintf('%u letter words: %u\n',i,cnt(i));
fprintf('Total words: %u\n',length(list));
fprintf('Big words: %u\n',bwc);