I've just pushed updated wordlist which is filtered to similar characters taken from this matrix. BIP39 now consider following character pairs as similar: similar = ( ('a', 'c'), ('a', 'e'), ('a', 'o'), ('b', 'd'), ('b', 'h'), ('b', 'p'), ('b', 'q'), ('b', 'r'), ('c', 'e'), ('c', 'g'), ('c', 'n'), ('c', 'o'), ('c', 'q'), ('c', 'u'), ('d', 'g'), ('d', 'h'), ('d', 'o'), ('d', 'p'), ('d', 'q'), ('e', 'f'), ('e', 'o'), ('f', 'i'), ('f', 'j'), ('f', 'l'), ('f', 'p'), ('f', 't'), ('g', 'j'), ('g', 'o'), ('g', 'p'), ('g', 'q'), ('g', 'y'), ('h', 'k'), ('h', 'l'), ('h', 'm'), ('h', 'n'), ('h', 'r'), ('i', 'j'), ('i', 'l'), ('i', 't'), ('i', 'y'), ('j', 'l'), ('j', 'p'), ('j', 'q'), ('j', 'y'), ('k', 'x'), ('l', 't'), ('m', 'n'), ('m', 'w'), ('n', 'u'), ('n', 'z'), ('o', 'p'), ('o', 'q'), ('o', 'u'), ('o', 'v'), ('p', 'q'), ('p', 'r'), ('q', 'y'), ('s', 'z'), ('u', 'v'), ('u', 'w'), ('u', 'y'), ('v', 'w'), ('v', 'y') ) Feel free to review and comment current wordlist, but I think we're slowly moving forward final list. slush On Sat, Oct 19, 2013 at 1:58 AM, Gregory Maxwell wrote: > some fairly old wordlist solver code of mine: > > https://people.xiph.org/~greg/wordlist.visual.py > > it has a 52x52 letter visual similarity matrix in it (along with a > citation) > > On Fri, Oct 18, 2013 at 4:52 PM, jan wrote: > > > > The words 'public', 'private' and 'secret' could be confusing when > > encoding public and private keys. eg. a private key that begins with > > the word 'public'. > > > > I think avoiding words that could look similar when written down would > > be a good idea aswell. I searched for words that only differ by the > > letters c & e, g & y, u & v and found the following: > > > > car ear > > cat eat > > gear year > > value valve > > > > Other combinations could potentially be problematic depending on the > > handwriting style: ft, ao, ij, vy, possibly even lt and il? > > > > I've included the search utility I used below. > > > > > > #include > > #include > > #include > > > > char *similar_char_pairs[] = { "ce", "gy", "uv", NULL }; > > > > bool is_similar_char(char c1, char c2) > > { > > char **pairs = similar_char_pairs; > > do { > > char *p = *pairs; > > if ((c1 == p[0] && c2 == p[1]) || > > (c1 == p[1] && c2 == p[0])) > > return true; > > } while (*++pairs); > > > > return false; > > } > > > > bool print_words_if_similar(char *word1, char *word2) > > { > > /* reject words of different lengths */ > > if (strlen(word1) != strlen(word2)) > > return false; > > > > size_t i, similarcount = 0; > > > > for (i = 0; i < strlen(word1); i++) { > > /* skip identical letters */ > > if (word1[i] == word2[i]) > > continue; > > > > /* reject words that don't match */ > > if (is_similar_char(word1[i], word2[i]) == false) > > return false; > > > > similarcount++; > > } > > > > /* reject words with more than 1 different letter */ > > //if (similarcount > 1) > > // return false; > > > > printf("%s %s\n", word1, word2); > > > > return true; > > } > > > > int main(void) > > { > > /* english.txt is assumed to exist in the working directory > > download from: > > > https://github.com/trezor/python-mnemonic/blob/master/mnemonic/wordlist/english.txt*/ > > FILE* f = fopen("english.txt", "r"); > > if (!f) { > > fprintf(stderr, "failed to open english.txt\n"); > > return 1; > > } > > > > /* read in word list, assumes one word per line */ > > #define MAXWORD 16 > > char wordlist[2048][MAXWORD]; > > int word = 0; > > while (fgets(wordlist[word], MAXWORD, f)) { > > /* strip trailing whitespace, assumes no leading whitespace */ > > char *ch = strpbrk(wordlist[word], " \n\t"); > > if (ch) > > *ch = '\0'; > > word++; > > } > > > > if (word != 2048) { > > fprintf(stderr, "word list incorrect length\n"); > > return 1; > > } > > > > /* check each word for similarity against every other word */ > > int i, j, count = 0; > > for (i = 0; i < 2048; i++) { > > for (j = i+1; j < 2048; j++) { > > if (print_words_if_similar(wordlist[i], wordlist[j])) > > count++; > > } > > } > > > > printf("%d matches\n", count); > > > > return 0; > > } > > > > > ------------------------------------------------------------------------------ > > October Webinars: Code for Performance > > Free Intel webinars can help you accelerate application performance. > > Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most > from > > the latest Intel processors and coprocessors. See abstracts and register > > > > > http://pubads.g.doubleclick.net/gampad/clk?id=60135031&iu=/4140/ostg.clktrk > > _______________________________________________ > > Bitcoin-development mailing list > > Bitcoin-development@lists.sourceforge.net > > https://lists.sourceforge.net/lists/listinfo/bitcoin-development > > > ------------------------------------------------------------------------------ > October Webinars: Code for Performance > Free Intel webinars can help you accelerate application performance. > Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most > from > the latest Intel processors and coprocessors. See abstracts and register > > http://pubads.g.doubleclick.net/gampad/clk?id=60135031&iu=/4140/ostg.clktrk > _______________________________________________ > Bitcoin-development mailing list > Bitcoin-development@lists.sourceforge.net > https://lists.sourceforge.net/lists/listinfo/bitcoin-development >