http://qs321.pair.com?node_id=392446


in reply to Re^2: NodeReaper out of control
in thread NodeReaper out of control

WADR, the ambiguity argument is garbage. AFAIK, all useful languages have ambiguities; as long as they are resolved in a clear way there's no problem. Perl certainly has more than its fair share, and not all of those are at all clearly resolved (e.g. /$x[foo]/: is it a scalar and a character class, or an array element? Depends on what foo is, and the guessing code is complex beyond description.)

Update: just for fun, here is the code that disambiguates $x[foo] in a regex. FALSE means a character class; TRUE means a subscript. s points to foo.

if (*s == ']' || *s == '^') return FALSE; else { /* this is terrifying, and it works */ int weight = 2; /* let's weigh the evidence */ char seen[256]; unsigned char un_char = 255, last_un_char; char *send = strchr(s,']'); char tmpbuf[sizeof PL_tokenbuf * 4]; if (!send) /* has to be an expression */ return TRUE; Zero(seen,256,char); if (*s == '$') weight -= 3; else if (isDIGIT(*s)) { if (s[1] != ']') { if (isDIGIT(s[1]) && s[2] == ']') weight -= 10; } else weight -= 100; } for (; s < send; s++) { last_un_char = un_char; un_char = (unsigned char)*s; switch (*s) { case '@': case '&': case '$': weight -= seen[un_char] * 10; if (isALNUM_lazy_if(s+1,UTF)) { scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE); if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,F +ALSE, SVt_PV)) weight -= 100; else weight -= 10; } else if (*s == '$' && s[1] && strchr("[#!%*<>()-=",s[1])) { if (/*{*/ strchr("])} =",s[2])) weight -= 10; else weight -= 1; } break; case '\\': un_char = 254; if (s[1]) { if (strchr("wds]",s[1])) weight += 100; else if (seen['\''] || seen['"']) weight += 1; else if (strchr("rnftbxcav",s[1])) weight += 40; else if (isDIGIT(s[1])) { weight += 40; while (s[1] && isDIGIT(s[1])) s++; } } else weight += 100; break; case '-': if (s[1] == '\\') weight += 50; if (strchr("aA01! ",last_un_char)) weight += 30; if (strchr("zZ79~",s[1])) weight += 30; if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == ' +$')) weight -= 5; /* cope with negative subscrip +t */ break; default: if (!isALNUM(last_un_char) && !strchr("$@&",last_un_ch +ar) && isALPHA(*s) && s[1] && isALPHA(s[1])) { char *d = tmpbuf; while (isALPHA(*s)) *d++ = *s++; *d = '\0'; if (keyword(tmpbuf, d - tmpbuf)) weight -= 150; } if (un_char == last_un_char + 1) weight += 5; weight -= seen[un_char]; break; } seen[un_char]++; } if (weight >= 0) /* probably a character class */ return FALSE; } return TRUE;