I've just pushed updated wordlist which is filtered to similar characters taken from this matrix.

BIP39 now consider following character pairs as similar:

        similar = (
            ('a', 'c'), ('a', 'e'), ('a', 'o'),
            ('b', 'd'), ('b', 'h'), ('b', 'p'), ('b', 'q'), ('b', 'r'),
            ('c', 'e'), ('c', 'g'), ('c', 'n'), ('c', 'o'), ('c', 'q'), ('c', 'u'),
            ('d', 'g'), ('d', 'h'), ('d', 'o'), ('d', 'p'), ('d', 'q'),
            ('e', 'f'), ('e', 'o'),
            ('f', 'i'), ('f', 'j'), ('f', 'l'), ('f', 'p'), ('f', 't'),
            ('g', 'j'), ('g', 'o'), ('g', 'p'), ('g', 'q'), ('g', 'y'),
            ('h', 'k'), ('h', 'l'), ('h', 'm'), ('h', 'n'), ('h', 'r'),
            ('i', 'j'), ('i', 'l'), ('i', 't'), ('i', 'y'),
            ('j', 'l'), ('j', 'p'), ('j', 'q'), ('j', 'y'),
            ('k', 'x'),
            ('l', 't'),
            ('m', 'n'), ('m', 'w'),
            ('n', 'u'), ('n', 'z'),
            ('o', 'p'), ('o', 'q'), ('o', 'u'), ('o', 'v'),
            ('p', 'q'), ('p', 'r'),
            ('q', 'y'),
            ('s', 'z'),
            ('u', 'v'), ('u', 'w'), ('u', 'y'),
            ('v', 'w'), ('v', 'y')
        )

Feel free to review and comment current wordlist, but I think we're slowly moving forward final list.

slush


On Sat, Oct 19, 2013 at 1:58 AM, Gregory Maxwell <gmaxwell@gmail.com> wrote:
some fairly old wordlist solver code of mine:

https://people.xiph.org/~greg/wordlist.visual.py

it has a 52x52 letter visual similarity matrix in it (along with a citation)

On Fri, Oct 18, 2013 at 4:52 PM, jan <jan.marecek@gmail.com> wrote:
>
> The words 'public', 'private' and 'secret' could be confusing when
> encoding public and private keys. eg. a private key that begins with
> the word 'public'.
>
> I think avoiding words that could look similar when written down would
> be a good idea aswell. I searched for words that only differ by the
> letters c & e, g & y, u & v and found the following:
>
> car ear
> cat eat
> gear year
> value valve
>
> Other combinations could potentially be problematic depending on the
> handwriting style: ft, ao, ij, vy, possibly even lt and il?
>
> I've included the search utility I used below.
>
>
> #include <stdbool.h>
> #include <string.h>
> #include <stdio.h>
>
> char *similar_char_pairs[] = { "ce", "gy", "uv", NULL };
>
> bool is_similar_char(char c1, char c2)
> {
>   char **pairs = similar_char_pairs;
>   do {
>     char *p = *pairs;
>     if ((c1 == p[0] && c2 == p[1]) ||
>         (c1 == p[1] && c2 == p[0]))
>       return true;
>   } while (*++pairs);
>
>   return false;
> }
>
> bool print_words_if_similar(char *word1, char *word2)
> {
>   /* reject words of different lengths */
>   if (strlen(word1) != strlen(word2))
>     return false;
>
>   size_t i, similarcount = 0;
>
>   for (i = 0; i < strlen(word1); i++) {
>     /* skip identical letters */
>     if (word1[i] == word2[i])
>       continue;
>
>     /* reject words that don't match */
>     if (is_similar_char(word1[i], word2[i]) == false)
>       return false;
>
>     similarcount++;
>   }
>
>   /* reject words with more than 1 different letter */
>   //if (similarcount > 1)
>   //  return false;
>
>   printf("%s %s\n", word1, word2);
>
>   return true;
> }
>
> int main(void)
> {
>   /* english.txt is assumed to exist in the working directory
>      download from:
>      https://github.com/trezor/python-mnemonic/blob/master/mnemonic/wordlist/english.txt */
>   FILE* f = fopen("english.txt", "r");
>   if (!f) {
>     fprintf(stderr, "failed to open english.txt\n");
>     return 1;
>   }
>
>   /* read in word list, assumes one word per line */
>   #define MAXWORD 16
>   char wordlist[2048][MAXWORD];
>   int word = 0;
>   while (fgets(wordlist[word], MAXWORD, f)) {
>     /* strip trailing whitespace, assumes no leading whitespace */
>     char *ch = strpbrk(wordlist[word], " \n\t");
>     if (ch)
>       *ch = '\0';
>     word++;
>   }
>
>   if (word != 2048) {
>     fprintf(stderr, "word list incorrect length\n");
>     return 1;
>   }
>
>   /* check each word for similarity against every other word */
>   int i, j, count = 0;
>   for (i = 0; i < 2048; i++) {
>     for (j = i+1; j < 2048; j++) {
>       if (print_words_if_similar(wordlist[i], wordlist[j]))
>         count++;
>     }
>   }
>
>   printf("%d matches\n", count);
>
>   return 0;
> }
>
> ------------------------------------------------------------------------------
> October Webinars: Code for Performance
> Free Intel webinars can help you accelerate application performance.
> Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from
> the latest Intel processors and coprocessors. See abstracts and register >
> http://pubads.g.doubleclick.net/gampad/clk?id=60135031&iu=/4140/ostg.clktrk
> _______________________________________________
> Bitcoin-development mailing list
> Bitcoin-development@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/bitcoin-development

------------------------------------------------------------------------------
October Webinars: Code for Performance
Free Intel webinars can help you accelerate application performance.
Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from
the latest Intel processors and coprocessors. See abstracts and register >
http://pubads.g.doubleclick.net/gampad/clk?id=60135031&iu=/4140/ostg.clktrk
_______________________________________________
Bitcoin-development mailing list
Bitcoin-development@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/bitcoin-development